Skip to content

Commit a052d03

Browse files
authored
Improved artifact executable detection (#82)
1 parent 85a12c3 commit a052d03

File tree

5 files changed

+122
-85
lines changed

5 files changed

+122
-85
lines changed

lib/descriptor/arch.rs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -47,13 +47,13 @@ impl Arch {
4747
Get the architecture of the current host system.
4848
*/
4949
#[must_use]
50-
pub fn current_system() -> Self {
51-
match CURRENT_ARCH {
52-
"aarch64" => Self::Arm64,
53-
"x86_64" => Self::X64,
54-
"x86" => Self::X86,
55-
"arm" => Self::Arm32,
56-
_ => panic!("Unsupported architecture: {CURRENT_ARCH}"),
50+
pub const fn current_system() -> Self {
51+
match CURRENT_ARCH.as_bytes() {
52+
b"aarch64" => Self::Arm64,
53+
b"x86_64" => Self::X64,
54+
b"x86" => Self::X86,
55+
b"arm" => Self::Arm32,
56+
_ => panic!("Unsupported architecture"),
5757
}
5858
}
5959

@@ -118,7 +118,7 @@ impl Arch {
118118
Get the architecture as a string, such as "x64" or "arm64".
119119
*/
120120
#[must_use]
121-
pub fn as_str(&self) -> &'static str {
121+
pub const fn as_str(&self) -> &'static str {
122122
match self {
123123
Self::Arm64 => "arm64",
124124
Self::X64 => "x64",

lib/descriptor/mod.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ impl Descriptor {
3838
Get the description for the current host system.
3939
*/
4040
#[must_use]
41-
pub fn current_system() -> Self {
41+
pub const fn current_system() -> Self {
4242
Self {
4343
os: OS::current_system(),
4444
arch: Some(Arch::current_system()),
@@ -85,23 +85,23 @@ impl Descriptor {
8585
Get the operating system of this description.
8686
*/
8787
#[must_use]
88-
pub fn os(&self) -> OS {
88+
pub const fn os(&self) -> OS {
8989
self.os
9090
}
9191

9292
/**
9393
Get the architecture of this description.
9494
*/
9595
#[must_use]
96-
pub fn arch(&self) -> Option<Arch> {
96+
pub const fn arch(&self) -> Option<Arch> {
9797
self.arch
9898
}
9999

100100
/**
101101
Get the preferred toolchain of this description.
102102
*/
103103
#[must_use]
104-
pub fn toolchain(&self) -> Option<Toolchain> {
104+
pub const fn toolchain(&self) -> Option<Toolchain> {
105105
self.toolchain
106106
}
107107

lib/descriptor/os.rs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,12 @@ impl OS {
3838
Get the operating system of the current host system.
3939
*/
4040
#[must_use]
41-
pub fn current_system() -> Self {
42-
match CURRENT_OS {
43-
"windows" => Self::Windows,
44-
"macos" => Self::MacOS,
45-
"linux" => Self::Linux,
46-
_ => panic!("Unsupported OS: {CURRENT_OS}"),
41+
pub const fn current_system() -> Self {
42+
match CURRENT_OS.as_bytes() {
43+
b"windows" => Self::Windows,
44+
b"macos" => Self::MacOS,
45+
b"linux" => Self::Linux,
46+
_ => panic!("Unsupported OS"),
4747
}
4848
}
4949

@@ -93,7 +93,7 @@ impl OS {
9393
Get the name of the operating system as a string.
9494
*/
9595
#[must_use]
96-
pub fn as_str(self) -> &'static str {
96+
pub const fn as_str(self) -> &'static str {
9797
match self {
9898
Self::Windows => "windows",
9999
Self::MacOS => "macos",

lib/descriptor/toolchain.rs

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,16 @@ impl Toolchain {
2121
Get the toolchain of the current host system.
2222
*/
2323
#[must_use]
24-
pub fn current_system() -> Option<Self> {
25-
None // TODO: Implement detection of the host toolchain
24+
pub const fn current_system() -> Option<Self> {
25+
if cfg!(target_env = "msvc") {
26+
Some(Self::Msvc)
27+
} else if cfg!(target_env = "gnu") {
28+
Some(Self::Gnu)
29+
} else if cfg!(target_env = "musl") {
30+
Some(Self::Musl)
31+
} else {
32+
None
33+
}
2634
}
2735

2836
/**
@@ -44,7 +52,7 @@ impl Toolchain {
4452
Get the name of the toolchain as a string.
4553
*/
4654
#[must_use]
47-
pub fn as_str(self) -> &'static str {
55+
pub const fn as_str(self) -> &'static str {
4856
match self {
4957
Self::Msvc => "msvc",
5058
Self::Gnu => "gnu",

lib/sources/extraction.rs

Lines changed: 92 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#![allow(clippy::struct_excessive_bools)]
22

33
use std::{
4+
collections::BTreeMap,
45
env::consts::{EXE_EXTENSION, EXE_SUFFIX},
56
io::{self, Read},
67
path::{Path, PathBuf, MAIN_SEPARATOR_STR},
@@ -11,7 +12,11 @@ use thiserror::Error;
1112
use tokio::{task::spawn_blocking, time::Instant};
1213
use zip::ZipArchive;
1314

14-
use crate::{descriptor::OS, result::RokitResult, sources::ArtifactFormat};
15+
use crate::{
16+
descriptor::{Descriptor, OS},
17+
result::RokitResult,
18+
sources::ArtifactFormat,
19+
};
1520

1621
#[derive(Debug, Error)]
1722
pub enum ExtractError {
@@ -61,6 +66,7 @@ struct Candidate {
6166
matched_file_inexact: bool, // Case-insensitive filename match
6267
has_exec_perms: bool, // Has executable permissions (UNIX only)
6368
has_exec_suffix: bool, // Has an executable suffix (e.g. `.exe`)
69+
has_descriptor: bool, // Has executable contents (any platform)
6470
}
6571

6672
impl Candidate {
@@ -70,11 +76,13 @@ impl Candidate {
7076
+ u32::from(self.matched_file_inexact)
7177
+ u32::from(self.has_exec_perms)
7278
+ u32::from(self.has_exec_suffix)
79+
+ u32::from(self.has_descriptor)
7380
}
7481

7582
fn find_best(
7683
entry_paths: impl AsRef<[(PathBuf, Option<u32>)]>,
7784
desired_file_path: impl AsRef<Path>,
85+
mut read_file_contents: impl FnMut(&Path) -> Option<Vec<u8>>,
7886
) -> Option<Self> {
7987
let entry_paths = entry_paths.as_ref();
8088
let desired_file_path = desired_file_path.as_ref();
@@ -97,6 +105,9 @@ impl Candidate {
97105

98106
let has_exec_perms = perms.is_some_and(|perms| (perms & 0o111) != 0);
99107
let has_exec_suffix = path.extension().is_some_and(|ext| ext == EXE_EXTENSION);
108+
let has_descriptor = read_file_contents(path)
109+
.and_then(Descriptor::detect_from_executable)
110+
.is_some();
100111

101112
Some(Self {
102113
path: path.clone(),
@@ -105,6 +116,7 @@ impl Candidate {
105116
matched_file_inexact,
106117
has_exec_perms,
107118
has_exec_suffix,
119+
has_descriptor,
108120
})
109121
})
110122
.filter(|c| c.priority() > 0) // Filter out candidates with no matches at all
@@ -140,43 +152,51 @@ pub async fn extract_zip_file(
140152
// Reading a zip file is a potentially expensive operation, so
141153
// spawn it as a blocking task and use the tokio thread pool.
142154
spawn_blocking(move || {
143-
let mut found = None;
144-
let mut reader = io::Cursor::new(&zip_contents);
145-
let mut zip = ZipArchive::new(&mut reader)?;
155+
let mut zip_cursor = io::Cursor::new(&zip_contents);
156+
let mut zip_reader = ZipArchive::new(&mut zip_cursor)?;
146157

147-
// Gather paths and their permissions,
148-
// avoiding reading the entire zip file
149-
let entry_paths = zip
158+
// Gather simple path + permissions pairs to find candidates from
159+
let entry_paths = zip_reader
150160
.file_names()
151-
.map(|name| {
152-
// NOTE: We don't need to sanitize the files names here
153-
// since we only use them for matching *within the zip file*
154-
(PathBuf::from(name), None::<u32>)
155-
})
161+
.map(|name| (PathBuf::from(name), None::<u32>))
156162
.collect::<Vec<_>>();
157163

158-
// Find the best candidate to extract, if any
159-
let best = Candidate::find_best(entry_paths, &desired_file_path);
160-
if let Some(candidate) = best {
161-
if let Some(path_str) = candidate.path.to_str() {
162-
if let Ok(mut entry) = zip.by_name(path_str) {
163-
let mut bytes = Vec::new();
164-
entry.read_to_end(&mut bytes)?;
165-
found = Some(bytes);
166-
}
164+
// Lazily cache any files that we read, to ensure that we do not
165+
// try to read a file entry which has already been read to its end
166+
let mut read_file_cache = BTreeMap::<_, Vec<u8>>::new();
167+
let mut read_file_contents = |path: &Path| {
168+
if let Some(existing) = read_file_cache.get(path) {
169+
Ok(existing.clone())
170+
} else if let Ok(mut entry) = zip_reader.by_name(path.to_str().unwrap()) {
171+
let mut bytes = Vec::new();
172+
entry.read_to_end(&mut bytes)?;
173+
read_file_cache.insert(path.to_path_buf(), bytes.clone());
174+
Ok(bytes)
175+
} else {
176+
Err(io::Error::new(
177+
io::ErrorKind::NotFound,
178+
format!("File not found: {}", path.display()),
179+
))
167180
}
168-
if found.is_none() {
169-
tracing::warn!(
170-
path = ?candidate.path,
171-
"found candidate path, but failed to extract file"
172-
);
181+
};
182+
183+
// Find the best candidate to extract, if any
184+
let best = Candidate::find_best(entry_paths, &desired_file_path, |path| {
185+
read_file_contents(path).ok()
186+
});
187+
let (path, found) = match best {
188+
None => (None, None),
189+
Some(candidate) => {
190+
let contents = read_file_contents(&candidate.path)?;
191+
(Some(candidate.path), Some(contents))
173192
}
174-
}
193+
};
175194

176195
tracing::debug!(
177196
num_kilobytes,
178197
elapsed = ?start.elapsed(),
179-
found = found.is_some(),
198+
found_any = found.is_some(),
199+
found_path = path.map(|path| path.display().to_string()),
180200
"extracted zip file"
181201
);
182202
Ok(found)
@@ -203,58 +223,67 @@ pub async fn extract_tar_file(
203223
// Reading a tar file is a potentially expensive operation, so
204224
// spawn it as a blocking task and use the tokio thread pool.
205225
spawn_blocking(move || {
206-
let mut found = None;
207-
208-
/*
209-
Gather paths and their permissions - note that we
210-
need to read the tar file twice to be able to use
211-
our find_best_candidate matching implementation...
212-
213-
We can however use the `entries_with_seek` method
214-
to avoid reading actual file contents into memory.
215-
*/
216-
let mut entry_cursor = io::Cursor::new(&tar_contents);
217-
let mut entry_reader = TarArchive::new(&mut entry_cursor);
218-
let entry_paths = entry_reader
226+
// Gather paths and references to their respective entries,
227+
// without reading actual file contents into memory just yet
228+
let mut tar_cursor = io::Cursor::new(&tar_contents);
229+
let mut tar_reader = TarArchive::new(&mut tar_cursor);
230+
let mut tar_files = tar_reader
219231
.entries_with_seek()?
220232
.filter_map(|entry| {
221233
let entry = entry.ok()?;
222234
if entry.header().entry_type().is_dir() {
223235
return None;
224236
}
225237
let path = entry.path().ok()?;
238+
Some((path.to_path_buf(), entry))
239+
})
240+
.collect::<BTreeMap<PathBuf, _>>();
241+
242+
// Map to simple path + permissions pairs to find candidates from
243+
let entry_paths = tar_files
244+
.iter()
245+
.map(|(path, entry)| {
226246
let perms = entry.header().mode().ok();
227-
Some((path.to_path_buf(), perms))
247+
(path.clone(), perms)
228248
})
229249
.collect::<Vec<_>>();
230250

231-
// Find the best candidate to extract, if any
232-
let best = Candidate::find_best(entry_paths, &desired_file_path);
233-
if let Some(candidate) = best {
234-
let contents_cursor = io::Cursor::new(&tar_contents);
235-
let mut contents_reader = TarArchive::new(contents_cursor);
236-
for entry in contents_reader.entries_with_seek()? {
237-
let mut entry = entry?;
238-
let entry_path = entry.path()?;
239-
if entry_path == candidate.path.as_path() {
240-
let mut bytes = Vec::new();
241-
entry.read_to_end(&mut bytes)?;
242-
found = Some(bytes);
243-
break;
244-
}
251+
// Lazily cache any files that we read, to ensure that we do not
252+
// try to read a file entry which has already been read to its end
253+
let mut read_file_cache = BTreeMap::<_, Vec<u8>>::new();
254+
let mut read_file_contents = |path: &Path| {
255+
if let Some(existing) = read_file_cache.get(path) {
256+
Ok(existing.clone())
257+
} else if let Some(entry) = tar_files.get_mut(path) {
258+
let mut bytes = Vec::new();
259+
entry.read_to_end(&mut bytes)?;
260+
read_file_cache.insert(path.to_path_buf(), bytes.clone());
261+
Ok(bytes)
262+
} else {
263+
Err(io::Error::new(
264+
io::ErrorKind::NotFound,
265+
format!("File not found: {}", path.display()),
266+
))
245267
}
246-
if found.is_none() {
247-
tracing::warn!(
248-
path = ?candidate.path,
249-
"found candidate path, but failed to extract file"
250-
);
268+
};
269+
270+
// Find the best candidate to extract, if any
271+
let best = Candidate::find_best(entry_paths, &desired_file_path, |path| {
272+
read_file_contents(path).ok()
273+
});
274+
let (path, found) = match best {
275+
None => (None, None),
276+
Some(candidate) => {
277+
let contents = read_file_contents(&candidate.path)?;
278+
(Some(candidate.path), Some(contents))
251279
}
252-
}
280+
};
253281

254282
tracing::debug!(
255283
num_kilobytes,
256284
elapsed = ?start.elapsed(),
257-
found = found.is_some(),
285+
found_any = found.is_some(),
286+
found_path = path.map(|path| path.display().to_string()),
258287
"extracted tar file"
259288
);
260289
Ok(found)

0 commit comments

Comments
 (0)