Skip to content

Commit 3a81f94

Browse files
authored
Optimize unpack, str.__add__ and fastlocals (RustPython#7293)
* Remove intermediate Vec allocation in unpack_sequence fast path Push elements directly from tuple/list slice in reverse order instead of cloning into a temporary Vec first. * Use read-only atomic load before swap in check_signals Add Relaxed load guard before the Acquire swap to avoid cache-line invalidation on every instruction dispatch when no signal is pending. * Cache builtins downcast in ExecutingFrame for LOAD_GLOBAL Pre-compute builtins.downcast_ref::<PyDict>() at frame entry and reuse the cached reference in load_global_or_builtin and LoadBuildClass. Also add get_chain_exact to skip redundant exact_dict type checks. * Add number Add slot to PyStr for direct str+str dispatch binary_op1 can now resolve str+str addition directly via the number slot instead of falling through to the sequence concat path. * Guard FastLocals access in locals() with try_lock on state mutex Address CodeRabbit review: f_locals() could access fastlocals without synchronization when called from another thread. Use try_lock on the state mutex so concurrent access is properly serialized. * Use exact type check for builtins_dict cache downcast_ref::<PyDict>() matches dict subclasses, causing get_chain_exact to bypass custom __getitem__ overrides. Use downcast_ref_if_exact to only fast-path exact dict types. * Consolidate with_recursion in _cmp to single guard Move the recursion depth check to wrap the entire _cmp body instead of each individual call_cmp direction, reducing Cell read/write pairs and scopeguard overhead per comparison. * Add opcode-level fast paths for FOR_ITER, COMPARE_OP, BINARY_OP - FOR_ITER: detect PyRangeIterator and bypass generic iterator protocol (atomic slot load + indirect call) - COMPARE_OP: inline int/float comparison for exact types, skip rich_compare dispatch and with_recursion overhead - BINARY_OP: inline int add/sub with i64 checked arithmetic to avoid BigInt heap allocation and binary_op1 dispatch * Also check globals is exact dict for LOAD_GLOBAL fast path get_chain_exact bypasses __missing__ on dict subclasses. Move get_chain_exact to PyExact<PyDict> impl with debug_assert, and have get_chain delegate to it. Store builtins_dict as Option<&PyExact<PyDict>> to enforce exact type at compile time. Use PyRangeIterator::next_fast() instead of pub(crate) fields. Fix comment style issues.
1 parent 7b89d82 commit 3a81f94

6 files changed

Lines changed: 271 additions & 102 deletions

File tree

crates/vm/src/builtins/dict.rs

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use super::{
55
use crate::common::lock::LazyLock;
66
use crate::object::{Traverse, TraverseFn};
77
use crate::{
8-
AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyRefExact, PyResult,
8+
AsObject, Context, Py, PyExact, PyObject, PyObjectRef, PyPayload, PyRef, PyRefExact, PyResult,
99
TryFromObject, atomic_func,
1010
builtins::{
1111
PyTuple,
@@ -681,7 +681,10 @@ impl Py<PyDict> {
681681
let self_exact = self.exact_dict(vm);
682682
let other_exact = other.exact_dict(vm);
683683
if self_exact && other_exact {
684-
self.entries.get_chain(&other.entries, vm, key)
684+
// SAFETY: exact_dict checks passed
685+
let self_exact = unsafe { PyExact::ref_unchecked(self) };
686+
let other_exact = unsafe { PyExact::ref_unchecked(other) };
687+
self_exact.get_chain_exact(other_exact, key, vm)
685688
} else if let Some(value) = self.get_item_opt(key, vm)? {
686689
Ok(Some(value))
687690
} else {
@@ -690,6 +693,21 @@ impl Py<PyDict> {
690693
}
691694
}
692695

696+
impl PyExact<PyDict> {
697+
/// Look up `key` in `self`, falling back to `other`.
698+
/// Both dicts must be exact `dict` types (enforced by `PyExact`).
699+
pub(crate) fn get_chain_exact<K: DictKey + ?Sized>(
700+
&self,
701+
other: &Self,
702+
key: &K,
703+
vm: &VirtualMachine,
704+
) -> PyResult<Option<PyObjectRef>> {
705+
debug_assert!(self.class().is(vm.ctx.types.dict_type));
706+
debug_assert!(other.class().is(vm.ctx.types.dict_type));
707+
self.entries.get_chain(&other.entries, vm, key)
708+
}
709+
}
710+
693711
// Implement IntoIterator so that we can easily iterate dictionaries from rust code.
694712
impl IntoIterator for PyDictRef {
695713
type Item = (PyObjectRef, PyObjectRef);

crates/vm/src/builtins/range.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -613,6 +613,19 @@ pub struct PyRangeIterator {
613613
length: usize,
614614
}
615615

616+
impl PyRangeIterator {
617+
/// Advance and return next value without going through the iterator protocol.
618+
#[inline]
619+
pub(crate) fn next_fast(&self) -> Option<isize> {
620+
let index = self.index.fetch_add(1);
621+
if index < self.length {
622+
Some(self.start + (index as isize) * self.step)
623+
} else {
624+
None
625+
}
626+
}
627+
}
628+
616629
impl PyPayload for PyRangeIterator {
617630
#[inline]
618631
fn class(ctx: &Context) -> &'static Py<PyType> {

crates/vm/src/builtins/str.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1584,6 +1584,20 @@ impl AsMapping for PyStr {
15841584
impl AsNumber for PyStr {
15851585
fn as_number() -> &'static PyNumberMethods {
15861586
static AS_NUMBER: PyNumberMethods = PyNumberMethods {
1587+
add: Some(|a, b, vm| {
1588+
let Some(a) = a.downcast_ref::<PyStr>() else {
1589+
return Ok(vm.ctx.not_implemented());
1590+
};
1591+
let Some(b) = b.downcast_ref::<PyStr>() else {
1592+
return Ok(vm.ctx.not_implemented());
1593+
};
1594+
let bytes = a.as_wtf8().py_add(b.as_wtf8());
1595+
Ok(unsafe {
1596+
let kind = a.kind() | b.kind();
1597+
PyStr::new_str_unchecked(bytes.into(), kind)
1598+
}
1599+
.to_pyobject(vm))
1600+
}),
15871601
remainder: Some(|a, b, vm| {
15881602
if let Some(a) = a.downcast_ref::<PyStr>() {
15891603
a.__mod__(b.to_owned(), vm).to_pyresult(vm)

0 commit comments

Comments
 (0)