Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 107 additions & 4 deletions crates/pet-python-utils/src/env.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,27 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

use log::{error, trace};
use log::{error, trace, warn};
use pet_core::{arch::Architecture, env::PythonEnv, python_environment::PythonEnvironment};
use serde::{Deserialize, Serialize};
use std::{
path::{Path, PathBuf},
time::SystemTime,
process::Stdio,
thread,
time::{Duration, Instant, SystemTime},
};

use crate::{cache::create_cache, executable::new_silent_command};

const PYTHON_INFO_JSON_SEPARATOR: &str = "093385e9-59f7-4a16-a604-14bf206256fe";
const PYTHON_INFO_CMD:&str = "import json, sys; print('093385e9-59f7-4a16-a604-14bf206256fe');print(json.dumps({'version': '.'.join(str(n) for n in sys.version_info), 'sys_prefix': sys.prefix, 'executable': sys.executable, 'is64_bit': sys.maxsize > 2**32}))";

/// Maximum wall-clock time to wait for a spawned Python interpreter to print
/// its info JSON before we give up and kill it. Stale cached paths on Windows
/// (Store stubs, vanished network shares, EDR-stalled `CreateProcess`) can
/// otherwise block `resolve` for tens to hundreds of seconds (Fixes #463).
const RESOLVE_SPAWN_TIMEOUT: Duration = Duration::from_secs(15);

#[derive(Debug, Deserialize, Clone)]
pub struct InterpreterInfo {
pub version: String,
Expand Down Expand Up @@ -88,13 +96,66 @@ impl ResolvedPythonEnv {
}

fn get_interpreter_details(executable: &Path) -> Option<ResolvedPythonEnv> {
get_interpreter_details_with_timeout(executable, RESOLVE_SPAWN_TIMEOUT)
}

fn get_interpreter_details_with_timeout(
executable: &Path,
timeout: Duration,
) -> Option<ResolvedPythonEnv> {
// Spawn the python exe and get the version, sys.prefix and sys.executable.
let executable = executable.to_str()?;
let start = SystemTime::now();
trace!("Executing Python: {} -c {}", executable, PYTHON_INFO_CMD);
let result = new_silent_command(executable)
let mut child = match new_silent_command(executable)
.args(["-c", PYTHON_INFO_CMD])
.output();
.stdin(Stdio::null())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
{
Ok(child) => child,
Err(err) => {
error!(
"Failed to spawn Python to resolve info {:?}: {}",
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copilot generated:
The exit status from try_wait() is discarded (Ok(Some(_status)) => break). Consider checking status.success() here and returning None early with a diagnostic log (including stderr) for non-zero exits, rather than falling through to JSON parsing on potentially empty/garbled stdout.

[verified]

executable, err
);
return None;
}
};

// Poll for completion up to the timeout. A stale cached path on Windows
// (Store stub, vanished network share, EDR-stalled `CreateProcess`) can
// otherwise block `wait_with_output()` for tens to hundreds of seconds.
let deadline = Instant::now() + timeout;
loop {
match child.try_wait() {
Ok(Some(_status)) => break,
Ok(None) => {
if Instant::now() >= deadline {
warn!(
"Timed out after {:?} resolving Python via spawn for {:?}; killing child.",
timeout, executable
);
let _ = child.kill();
let _ = child.wait();
return None;
}
thread::sleep(Duration::from_millis(25));
}
Err(err) => {
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copilot generated:
[unverified] wait_with_output() reads stdout/stderr to EOF. If the spawned process is a wrapper (conda shim, pyenv shim) that forks a grandchild inheriting the piped stdout fd, EOF won't arrive until the grandchild closes the fd — re-introducing unbounded blocking after the poll loop. Consider taking child.stdout/child.stderr before the loop and draining with a bounded read, or using child.stdout.take() + manual read_to_end with a second deadline after the loop.

[verified]

error!(
"Failed to wait on Python interpreter spawn for {:?}: {}",
executable, err
);
let _ = child.kill();
let _ = child.wait();
return None;
}
}
}

let result = child.wait_with_output();
match result {
Ok(output) => {
let output = String::from_utf8(output.stdout).unwrap().trim().to_string();
Expand Down Expand Up @@ -143,3 +204,45 @@ fn get_interpreter_details(executable: &Path) -> Option<ResolvedPythonEnv> {
}
}
}

#[cfg(all(test, unix))]
mod tests {
use super::*;
use std::os::unix::fs::PermissionsExt;

/// Regression test for #463: a spawn that never exits must not block the
/// resolve path indefinitely. We use a shell script that sleeps far longer
/// than the test timeout and assert that the call returns None promptly
/// (well under the script's sleep duration).
#[test]
fn get_interpreter_details_times_out_on_hanging_executable() {
let tmp_dir = std::env::temp_dir().join(format!(
"pet_resolve_timeout_{}_{}",
std::process::id(),
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_nanos()
));
std::fs::create_dir_all(&tmp_dir).unwrap();
let fake_exe = tmp_dir.join("hangs");
std::fs::write(&fake_exe, "#!/bin/sh\nsleep 60\n").unwrap();
let mut perms = std::fs::metadata(&fake_exe).unwrap().permissions();
perms.set_mode(0o755);
std::fs::set_permissions(&fake_exe, perms).unwrap();

let start = Instant::now();
let result = get_interpreter_details_with_timeout(&fake_exe, Duration::from_millis(200));
let elapsed = start.elapsed();

let _ = std::fs::remove_file(&fake_exe);
let _ = std::fs::remove_dir(&tmp_dir);

assert!(result.is_none(), "hanging spawn must return None");
assert!(
elapsed < Duration::from_secs(5),
"spawn must be killed near the timeout (took {:?})",
elapsed
);
}
}
Loading