11#![ allow( clippy:: struct_excessive_bools) ]
22
33use std:: {
4+ collections:: BTreeMap ,
45 env:: consts:: { EXE_EXTENSION , EXE_SUFFIX } ,
56 io:: { self , Read } ,
67 path:: { Path , PathBuf , MAIN_SEPARATOR_STR } ,
@@ -11,7 +12,11 @@ use thiserror::Error;
1112use tokio:: { task:: spawn_blocking, time:: Instant } ;
1213use zip:: ZipArchive ;
1314
14- use crate :: { descriptor:: OS , result:: RokitResult , sources:: ArtifactFormat } ;
15+ use crate :: {
16+ descriptor:: { Descriptor , OS } ,
17+ result:: RokitResult ,
18+ sources:: ArtifactFormat ,
19+ } ;
1520
1621#[ derive( Debug , Error ) ]
1722pub enum ExtractError {
@@ -61,6 +66,7 @@ struct Candidate {
6166 matched_file_inexact : bool , // Case-insensitive filename match
6267 has_exec_perms : bool , // Has executable permissions (UNIX only)
6368 has_exec_suffix : bool , // Has an executable suffix (e.g. `.exe`)
69+ has_descriptor : bool , // Has executable contents (any platform)
6470}
6571
6672impl Candidate {
@@ -70,11 +76,13 @@ impl Candidate {
7076 + u32:: from ( self . matched_file_inexact )
7177 + u32:: from ( self . has_exec_perms )
7278 + u32:: from ( self . has_exec_suffix )
79+ + u32:: from ( self . has_descriptor )
7380 }
7481
7582 fn find_best (
7683 entry_paths : impl AsRef < [ ( PathBuf , Option < u32 > ) ] > ,
7784 desired_file_path : impl AsRef < Path > ,
85+ mut read_file_contents : impl FnMut ( & Path ) -> Option < Vec < u8 > > ,
7886 ) -> Option < Self > {
7987 let entry_paths = entry_paths. as_ref ( ) ;
8088 let desired_file_path = desired_file_path. as_ref ( ) ;
@@ -97,6 +105,9 @@ impl Candidate {
97105
98106 let has_exec_perms = perms. is_some_and ( |perms| ( perms & 0o111 ) != 0 ) ;
99107 let has_exec_suffix = path. extension ( ) . is_some_and ( |ext| ext == EXE_EXTENSION ) ;
108+ let has_descriptor = read_file_contents ( path)
109+ . and_then ( Descriptor :: detect_from_executable)
110+ . is_some ( ) ;
100111
101112 Some ( Self {
102113 path : path. clone ( ) ,
@@ -105,6 +116,7 @@ impl Candidate {
105116 matched_file_inexact,
106117 has_exec_perms,
107118 has_exec_suffix,
119+ has_descriptor,
108120 } )
109121 } )
110122 . filter ( |c| c. priority ( ) > 0 ) // Filter out candidates with no matches at all
@@ -140,43 +152,51 @@ pub async fn extract_zip_file(
140152 // Reading a zip file is a potentially expensive operation, so
141153 // spawn it as a blocking task and use the tokio thread pool.
142154 spawn_blocking ( move || {
143- let mut found = None ;
144- let mut reader = io:: Cursor :: new ( & zip_contents) ;
145- let mut zip = ZipArchive :: new ( & mut reader) ?;
155+ let mut zip_cursor = io:: Cursor :: new ( & zip_contents) ;
156+ let mut zip_reader = ZipArchive :: new ( & mut zip_cursor) ?;
146157
147- // Gather paths and their permissions,
148- // avoiding reading the entire zip file
149- let entry_paths = zip
158+ // Gather simple path + permissions pairs to find candidates from
159+ let entry_paths = zip_reader
150160 . file_names ( )
151- . map ( |name| {
152- // NOTE: We don't need to sanitize the files names here
153- // since we only use them for matching *within the zip file*
154- ( PathBuf :: from ( name) , None :: < u32 > )
155- } )
161+ . map ( |name| ( PathBuf :: from ( name) , None :: < u32 > ) )
156162 . collect :: < Vec < _ > > ( ) ;
157163
158- // Find the best candidate to extract, if any
159- let best = Candidate :: find_best ( entry_paths, & desired_file_path) ;
160- if let Some ( candidate) = best {
161- if let Some ( path_str) = candidate. path . to_str ( ) {
162- if let Ok ( mut entry) = zip. by_name ( path_str) {
163- let mut bytes = Vec :: new ( ) ;
164- entry. read_to_end ( & mut bytes) ?;
165- found = Some ( bytes) ;
166- }
164+ // Lazily cache any files that we read, to ensure that we do not
165+ // try to read a file entry which has already been read to its end
166+ let mut read_file_cache = BTreeMap :: < _ , Vec < u8 > > :: new ( ) ;
167+ let mut read_file_contents = |path : & Path | {
168+ if let Some ( existing) = read_file_cache. get ( path) {
169+ Ok ( existing. clone ( ) )
170+ } else if let Ok ( mut entry) = zip_reader. by_name ( path. to_str ( ) . unwrap ( ) ) {
171+ let mut bytes = Vec :: new ( ) ;
172+ entry. read_to_end ( & mut bytes) ?;
173+ read_file_cache. insert ( path. to_path_buf ( ) , bytes. clone ( ) ) ;
174+ Ok ( bytes)
175+ } else {
176+ Err ( io:: Error :: new (
177+ io:: ErrorKind :: NotFound ,
178+ format ! ( "File not found: {}" , path. display( ) ) ,
179+ ) )
167180 }
168- if found. is_none ( ) {
169- tracing:: warn!(
170- path = ?candidate. path,
171- "found candidate path, but failed to extract file"
172- ) ;
181+ } ;
182+
183+ // Find the best candidate to extract, if any
184+ let best = Candidate :: find_best ( entry_paths, & desired_file_path, |path| {
185+ read_file_contents ( path) . ok ( )
186+ } ) ;
187+ let ( path, found) = match best {
188+ None => ( None , None ) ,
189+ Some ( candidate) => {
190+ let contents = read_file_contents ( & candidate. path ) ?;
191+ ( Some ( candidate. path ) , Some ( contents) )
173192 }
174- }
193+ } ;
175194
176195 tracing:: debug!(
177196 num_kilobytes,
178197 elapsed = ?start. elapsed( ) ,
179- found = found. is_some( ) ,
198+ found_any = found. is_some( ) ,
199+ found_path = path. map( |path| path. display( ) . to_string( ) ) ,
180200 "extracted zip file"
181201 ) ;
182202 Ok ( found)
@@ -203,58 +223,67 @@ pub async fn extract_tar_file(
203223 // Reading a tar file is a potentially expensive operation, so
204224 // spawn it as a blocking task and use the tokio thread pool.
205225 spawn_blocking ( move || {
206- let mut found = None ;
207-
208- /*
209- Gather paths and their permissions - note that we
210- need to read the tar file twice to be able to use
211- our find_best_candidate matching implementation...
212-
213- We can however use the `entries_with_seek` method
214- to avoid reading actual file contents into memory.
215- */
216- let mut entry_cursor = io:: Cursor :: new ( & tar_contents) ;
217- let mut entry_reader = TarArchive :: new ( & mut entry_cursor) ;
218- let entry_paths = entry_reader
226+ // Gather paths and references to their respective entries,
227+ // without reading actual file contents into memory just yet
228+ let mut tar_cursor = io:: Cursor :: new ( & tar_contents) ;
229+ let mut tar_reader = TarArchive :: new ( & mut tar_cursor) ;
230+ let mut tar_files = tar_reader
219231 . entries_with_seek ( ) ?
220232 . filter_map ( |entry| {
221233 let entry = entry. ok ( ) ?;
222234 if entry. header ( ) . entry_type ( ) . is_dir ( ) {
223235 return None ;
224236 }
225237 let path = entry. path ( ) . ok ( ) ?;
238+ Some ( ( path. to_path_buf ( ) , entry) )
239+ } )
240+ . collect :: < BTreeMap < PathBuf , _ > > ( ) ;
241+
242+ // Map to simple path + permissions pairs to find candidates from
243+ let entry_paths = tar_files
244+ . iter ( )
245+ . map ( |( path, entry) | {
226246 let perms = entry. header ( ) . mode ( ) . ok ( ) ;
227- Some ( ( path. to_path_buf ( ) , perms) )
247+ ( path. clone ( ) , perms)
228248 } )
229249 . collect :: < Vec < _ > > ( ) ;
230250
231- // Find the best candidate to extract, if any
232- let best = Candidate :: find_best ( entry_paths, & desired_file_path) ;
233- if let Some ( candidate) = best {
234- let contents_cursor = io:: Cursor :: new ( & tar_contents) ;
235- let mut contents_reader = TarArchive :: new ( contents_cursor) ;
236- for entry in contents_reader. entries_with_seek ( ) ? {
237- let mut entry = entry?;
238- let entry_path = entry. path ( ) ?;
239- if entry_path == candidate. path . as_path ( ) {
240- let mut bytes = Vec :: new ( ) ;
241- entry. read_to_end ( & mut bytes) ?;
242- found = Some ( bytes) ;
243- break ;
244- }
251+ // Lazily cache any files that we read, to ensure that we do not
252+ // try to read a file entry which has already been read to its end
253+ let mut read_file_cache = BTreeMap :: < _ , Vec < u8 > > :: new ( ) ;
254+ let mut read_file_contents = |path : & Path | {
255+ if let Some ( existing) = read_file_cache. get ( path) {
256+ Ok ( existing. clone ( ) )
257+ } else if let Some ( entry) = tar_files. get_mut ( path) {
258+ let mut bytes = Vec :: new ( ) ;
259+ entry. read_to_end ( & mut bytes) ?;
260+ read_file_cache. insert ( path. to_path_buf ( ) , bytes. clone ( ) ) ;
261+ Ok ( bytes)
262+ } else {
263+ Err ( io:: Error :: new (
264+ io:: ErrorKind :: NotFound ,
265+ format ! ( "File not found: {}" , path. display( ) ) ,
266+ ) )
245267 }
246- if found. is_none ( ) {
247- tracing:: warn!(
248- path = ?candidate. path,
249- "found candidate path, but failed to extract file"
250- ) ;
268+ } ;
269+
270+ // Find the best candidate to extract, if any
271+ let best = Candidate :: find_best ( entry_paths, & desired_file_path, |path| {
272+ read_file_contents ( path) . ok ( )
273+ } ) ;
274+ let ( path, found) = match best {
275+ None => ( None , None ) ,
276+ Some ( candidate) => {
277+ let contents = read_file_contents ( & candidate. path ) ?;
278+ ( Some ( candidate. path ) , Some ( contents) )
251279 }
252- }
280+ } ;
253281
254282 tracing:: debug!(
255283 num_kilobytes,
256284 elapsed = ?start. elapsed( ) ,
257- found = found. is_some( ) ,
285+ found_any = found. is_some( ) ,
286+ found_path = path. map( |path| path. display( ) . to_string( ) ) ,
258287 "extracted tar file"
259288 ) ;
260289 Ok ( found)
0 commit comments