diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index c6ca020..c92994a 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -11,9 +11,9 @@ Concise, project-specific guidance for AI coding agents working on this repo. Fo - Support code in `utils/` (`utils.py`, `decode_utils.py`) and enums/models folders. Option dataclasses centralize behavioral switches; never scatter ad-hoc flags. ## 2. Key Behavioral Invariants -- DO NOT mutate caller inputs—copy/normalize (`deepcopy` for mappings, index-projection for sequences) before traversal. +- DO NOT mutate caller inputs—copy/normalize (shallow copy for mappings; deep-copy only when a callable filter may mutate; index-projection for sequences) before traversal. - Cycle detection in `encode._encode` must raise `ValueError("Circular reference detected")`—preserve side-channel algorithm. -- Depth, list, and parameter limits are security/safety features: respect `depth`, `list_limit`, `parameter_limit`, and `strict_depth` / `raise_on_limit_exceeded` exactly as tests assert. +- Depth, list, and parameter limits are security/safety features: respect `depth`, `max_depth`, `list_limit`, `parameter_limit`, and `strict_depth` / `raise_on_limit_exceeded` exactly as tests assert. `max_depth` is capped to the current recursion limit. - Duplicate key handling delegated to `Duplicates` enum: COMBINE → list accumulation; FIRST/LAST semantics enforced during merge. - List format semantics (`ListFormat` enum) change how prefixes are generated; COMMA + `comma_round_trip=True` must emit single-element marker for round-trip fidelity. - Charset sentinel logic: when `charset_sentinel=True`, prepend sentinel *before* payload; obey override rules when both charset and sentinel present. @@ -39,6 +39,7 @@ Concise, project-specific guidance for AI coding agents working on this repo. Fo - When altering merge or list/index logic, adjust `Utils.merge` or decoding helpers—never inline merging elsewhere. - New list or formatting strategies: add Enum member with associated generator/formatter; augment tests to cover serialization/deserialization round trip. - Performance-sensitive paths: avoid repeated regex compilation or deep copies inside tight loops; reuse existing pre-processing structure (tokenize first, structure later). + - `Utils.merge` is internal and may reuse dict targets for performance; do not assume it preserves caller immutability. ## 6. Testing Strategy - Mirror existing parametric test style in `tests/unit/*_test.py`. diff --git a/.gitignore b/.gitignore index 2ef20ba..f28dd75 100644 --- a/.gitignore +++ b/.gitignore @@ -116,7 +116,6 @@ venv.bak/ # AI related .junie -AGENTS.md # VS Code specific .history \ No newline at end of file diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..6515853 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,34 @@ +# Repository Guidelines + +## Project Structure & Module Organization +- `src/qs_codec/` contains the codec implementation, option models, and helpers; place new modules here and keep exports deliberate. +- `tests/` mirrors the package layout with `test_*.py` files so every feature has a nearby regression check. +- `docs/` builds the Sphinx site; refresh guides when behavior or options change. +- `requirements_dev.txt` pins tooling and `tox.ini` mirrors the CI matrix—update both when adding dependencies. + +## Build, Test, and Development Commands +- `python -m pip install -e .[dev]` installs the package alongside linting and typing extras. +- `pytest -v --cov=src/qs_codec` drives the unit suite and produces the coverage XML consumed by codecov. +- `tox -e python3.13` runs tests in an isolated interpreter; swap the env name to target other supported versions. +- `tox -e linters` chains Black, isort, flake8, pylint, bandit, pyright, and mypy to catch style or security drift before review. + +## Coding Style & Naming Conventions +- Format code with Black (120-char lines) and order imports with isort's Black profile, both configured in `pyproject.toml`. +- Keep functions and modules in snake_case, reserve PascalCase for classes reflecting `qs` data structures, and type hint public APIs. +- Respect docstring tone and option names from the JavaScript `qs` package to signal parity. + +## Testing Guidelines +- Add or extend pytest cases under `tests/`, leaning on parametrization for the different encoder/decoder modes. +- Preserve or raise the coverage level tracked in `coverage.xml`; CI flags regressions. +- Name tests `test_{feature}_{scenario}` and refresh fixtures whenever query-string semantics shift. +- When touching cross-language behavior, run `tests/comparison/compare_outputs.sh` to confirm parity with the Node reference. + - For encoding depth changes, cover `EncodeOptions.max_depth` (positive int/None) and cap-to-recursion behavior. + +## Commit & Pull Request Guidelines +- Follow the emoji-prefixed summaries visible in `git log` (e.g., `:arrow_up: Bump actions/setup-python from 5 to 6 (#26)`), using the imperative mood. +- Keep each commit focused; include a short body for impactful changes explaining compatibility or migration notes. +- For PRs, push only after `tox` succeeds, link the driving issue, outline user-facing changes, and note the tests you ran (attach before/after snippets for docs tweaks). + +## Security & Compatibility Notes +- Follow `SECURITY.md` for private vulnerability disclosure and avoid posting sensitive details in public threads. +- This port tracks the npm `qs` package; document intentional divergences in both code and docs as soon as they occur. diff --git a/README.rst b/README.rst index 50cfb72..d0a2bdb 100644 --- a/README.rst +++ b/README.rst @@ -25,7 +25,7 @@ Highlights - Pluggable hooks: custom ``encoder``/``decoder`` callables; options to sort keys, filter output, and control percent-encoding (keys-only, values-only). - Nulls & empties: ``strict_null_handling`` and ``skip_nulls``; support for empty lists/arrays when desired. - Dates: ``serialize_date`` for ISO 8601 or custom (e.g., UNIX timestamp). -- Safety limits: configurable nesting depth, parameter limit, and list index limit; optional strict-depth errors; duplicate-key strategies (combine/first/last). +- Safety limits: configurable decode depth and encode max depth, parameter limit, and list index limit; optional strict-depth errors; duplicate-key strategies (combine/first/last). - Extras: numeric entity decoding (e.g. ``☺`` → ☺), alternate delimiters/regex, and query-prefix helpers. Compatibility @@ -458,6 +458,23 @@ Encoding can be disabled for keys by setting the qs.EncodeOptions(encode_values_only=True) ) == 'a=b&c[0]=d&c[1]=e%3Df&f[0][0]=g&f[1][0]=h' +Maximum encoding depth +^^^^^^^^^^^^^^^^^^^^^^ + +You can cap how deep the encoder will traverse by setting the +`max_depth `__ +option. If unset, the encoder derives a safe limit from the interpreter recursion limit; when set, the effective +limit is capped to the current recursion limit to avoid ``RecursionError``. + +.. code:: python + + import qs_codec as qs + + try: + qs.encode({'a': {'b': {'c': 'd'}}}, qs.EncodeOptions(max_depth=2)) + except ValueError as e: + assert str(e) == 'Maximum encoding depth exceeded' + This encoding can also be replaced by a custom ``Callable`` in the `encoder `__ option: diff --git a/docs/README.rst b/docs/README.rst index 256f092..d73f8f4 100644 --- a/docs/README.rst +++ b/docs/README.rst @@ -409,6 +409,23 @@ Encoding can be disabled for keys by setting the qs.EncodeOptions(encode_values_only=True) ) == 'a=b&c[0]=d&c[1]=e%3Df&f[0][0]=g&f[1][0]=h' +Maximum encoding depth +^^^^^^^^^^^^^^^^^^^^^^ + +You can cap how deep the encoder will traverse by setting the +:py:attr:`max_depth ` option. If unset, the encoder derives a +safe limit from the interpreter recursion limit; when set, the effective limit is capped to the current recursion +limit to avoid ``RecursionError``. + +.. code:: python + + import qs_codec as qs + + try: + qs.encode({'a': {'b': {'c': 'd'}}}, qs.EncodeOptions(max_depth=2)) + except ValueError as e: + assert str(e) == 'Maximum encoding depth exceeded' + This encoding can also be replaced by a custom ``Callable`` in the :py:attr:`encoder ` option: diff --git a/docs/index.rst b/docs/index.rst index 8a50d8b..bab387c 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -29,7 +29,7 @@ Highlights - Pluggable hooks: custom ``encoder``/``decoder`` callables; options to sort keys, filter output, and control percent-encoding (keys-only, values-only). - Nulls & empties: ``strict_null_handling`` and ``skip_nulls``; support for empty lists/arrays when desired. - Dates: ``serialize_date`` for ISO 8601 or custom (e.g., UNIX timestamp). -- Safety limits: configurable nesting depth, parameter limit, and list index limit; optional strict-depth errors; duplicate-key strategies (combine/first/last). +- Safety limits: configurable decode depth and encode max depth, parameter limit, and list index limit; optional strict-depth errors; duplicate-key strategies (combine/first/last). - Extras: numeric entity decoding (e.g. ``☺`` → ☺), alternate delimiters/regex, and query-prefix helpers. Compatibility diff --git a/src/qs_codec/encode.py b/src/qs_codec/encode.py index 0fc53e6..a6084fd 100644 --- a/src/qs_codec/encode.py +++ b/src/qs_codec/encode.py @@ -13,6 +13,7 @@ Nothing in this module mutates caller objects: inputs are shallow‑normalized and deep‑copied only where safe/necessary to honor options. """ +import sys import typing as t from collections.abc import Sequence as ABCSequence from copy import deepcopy @@ -45,7 +46,8 @@ def encode(value: t.Any, options: EncodeOptions = EncodeOptions()) -> str: The encoded query string (possibly prefixed with "?" if requested), or an empty string when there is nothing to encode. Notes: - - Caller input is not mutated. When a mapping is provided it is deep-copied; sequences are projected to a temporary mapping. + - Caller input is not mutated. When a mapping is provided it is shallow-copied (deep-copied only when a callable + filter is used); sequences are projected to a temporary mapping. - If a callable `filter` is provided, it can transform the root object. - If an iterable filter is provided, it selects which *root* keys to emit. """ @@ -53,13 +55,15 @@ def encode(value: t.Any, options: EncodeOptions = EncodeOptions()) -> str: if value is None: return "" + filter_opt = options.filter + # Normalize the root into a mapping we can traverse deterministically: - # - Mapping -> deepcopy (avoid mutating caller containers) + # - Mapping -> shallow copy (deep-copy only when a callable filter may mutate) # - Sequence -> promote to {"0": v0, "1": v1, ...} # - Other -> empty (encodes to "") obj: t.Mapping[str, t.Any] if isinstance(value, t.Mapping): - obj = deepcopy(value) + obj = deepcopy(value) if callable(filter_opt) else dict(value) elif isinstance(value, (list, tuple)): obj = {str(i): item for i, item in enumerate(value)} else: @@ -73,7 +77,6 @@ def encode(value: t.Any, options: EncodeOptions = EncodeOptions()) -> str: # If an iterable filter is provided for the root, restrict emission to those keys. obj_keys: t.Optional[t.List[t.Any]] = None - filter_opt = options.filter if filter_opt is not None: if callable(filter_opt): # Callable filter may transform the root object. @@ -94,6 +97,7 @@ def encode(value: t.Any, options: EncodeOptions = EncodeOptions()) -> str: # Side channel for cycle detection across recursive calls. side_channel: WeakKeyDictionary = WeakKeyDictionary() + max_depth = _get_max_encode_depth(options.max_depth) # Encode each selected root key. for _key in obj_keys: @@ -126,6 +130,7 @@ def encode(value: t.Any, options: EncodeOptions = EncodeOptions()) -> str: encode_values_only=options.encode_values_only, charset=options.charset, add_query_prefix=options.add_query_prefix, + _max_depth=max_depth, ) # `_encode` yields either a flat list of `key=value` tokens or a single token. @@ -157,6 +162,15 @@ def encode(value: t.Any, options: EncodeOptions = EncodeOptions()) -> str: # Unique placeholder used as a key within the side-channel chain to pass context down recursion. _sentinel: WeakWrapper = WeakWrapper({}) +# Keep a safety buffer below Python's recursion limit to avoid RecursionError on deep inputs. +_DEPTH_MARGIN: int = 50 + + +def _get_max_encode_depth(max_depth: t.Optional[int]) -> int: + limit = max(0, sys.getrecursionlimit() - _DEPTH_MARGIN) + if max_depth is None: + return limit + return min(max_depth, limit) def _encode( @@ -181,6 +195,8 @@ def _encode( encode_values_only: bool = False, charset: t.Optional[Charset] = Charset.UTF8, add_query_prefix: bool = False, + _depth: int = 0, + _max_depth: t.Optional[int] = None, ) -> t.Union[t.List[t.Any], t.Tuple[t.Any, ...], t.Any]: """ Recursive worker that produces `key=value` tokens for a single subtree. @@ -217,6 +233,11 @@ def _encode( Returns: Either a list/tuple of tokens or a single token string. """ + if _max_depth is None: + _max_depth = _get_max_encode_depth(None) + if _depth > _max_depth: + raise ValueError("Maximum encoding depth exceeded") + # Establish a starting prefix for the top-most invocation (used when called directly). if prefix is None: prefix = "?" if add_query_prefix else "" @@ -425,6 +446,8 @@ def _encode( allow_dots=allow_dots, encode_values_only=encode_values_only, charset=charset, + _depth=_depth + 1, + _max_depth=_max_depth, ) # Flatten nested results into the `values` list. diff --git a/src/qs_codec/models/encode_options.py b/src/qs_codec/models/encode_options.py index 7c2b25a..db9ef4b 100644 --- a/src/qs_codec/models/encode_options.py +++ b/src/qs_codec/models/encode_options.py @@ -117,6 +117,13 @@ def encoder(self, value: t.Optional[t.Callable[[t.Any, t.Optional[Charset], t.Op sort: t.Optional[t.Callable[[t.Any, t.Any], int]] = field(default=None) """Optional comparator for deterministic key ordering. Must return -1, 0, or +1.""" + max_depth: t.Optional[int] = None + """Maximum nesting depth allowed during encoding. + + When ``None``, the encoder derives a safe limit from the interpreter recursion limit (minus a safety margin). + When set, the effective limit is capped to the current recursion limit to avoid ``RecursionError``. + """ + def __post_init__(self) -> None: """Normalize interdependent options. @@ -126,6 +133,9 @@ def __post_init__(self) -> None: """ if not hasattr(self, "_encoder") or self._encoder is None: self._encoder = EncodeUtils.encode + if self.max_depth is not None: + if not isinstance(self.max_depth, int) or isinstance(self.max_depth, bool) or self.max_depth <= 0: + raise ValueError("max_depth must be a positive integer or None") # Default `encode_dot_in_keys` first, then mirror into `allow_dots` when unspecified. if self.encode_dot_in_keys is None: self.encode_dot_in_keys = False diff --git a/src/qs_codec/models/weak_wrapper.py b/src/qs_codec/models/weak_wrapper.py index ed4bbee..cb7d6d5 100644 --- a/src/qs_codec/models/weak_wrapper.py +++ b/src/qs_codec/models/weak_wrapper.py @@ -1,4 +1,4 @@ -"""Weakly wrap *any* object with identity equality and deep content hashing.""" +"""Weakly wrap *any* object with identity equality and stable hashing.""" from __future__ import annotations @@ -40,75 +40,12 @@ def _get_proxy(value: t.Any) -> "_Proxy": return proxy -def _deep_hash( - obj: t.Any, - _seen: t.Optional[set[int]] = None, - _depth: int = 0, -) -> int: - """Deterministic deep hash with cycle & depth protection. - - - Raises ValueError("Circular reference detected") on cycles. - - Raises RecursionError when nesting exceeds 400. - - Produces equal hashes for equal-by-contents containers. - """ - if _depth > 400: - raise RecursionError("Maximum hashing depth exceeded") - - if _seen is None: - _seen = set() - - # Track only containers by identity for cycle detection - def _enter(o: t.Any) -> int: - oid = id(o) - if oid in _seen: - raise ValueError("Circular reference detected") - _seen.add(oid) - return oid - - def _leave(oid: int) -> None: - _seen.remove(oid) - - if isinstance(obj, dict): - oid = _enter(obj) - try: - # Compute key/value deep hashes once and sort pairs for determinism - pairs = [(_deep_hash(k, _seen, _depth + 1), _deep_hash(v, _seen, _depth + 1)) for k, v in obj.items()] - pairs.sort() - kv_hashes = tuple(pairs) - return hash(("dict", kv_hashes)) - finally: - _leave(oid) - - if isinstance(obj, (list, tuple)): - oid = _enter(obj) - try: - elem_hashes = tuple(_deep_hash(x, _seen, _depth + 1) for x in obj) - tag = "list" if isinstance(obj, list) else "tuple" - return hash((tag, elem_hashes)) - finally: - _leave(oid) - - if isinstance(obj, set): - oid = _enter(obj) - try: - set_hashes = tuple(sorted(_deep_hash(x, _seen, _depth + 1) for x in obj)) - return hash(("set", set_hashes)) - finally: - _leave(oid) - - # Fallback for scalars / unhashables - try: - return hash(obj) - except TypeError: - return hash(repr(obj)) - - class WeakWrapper: """Wrapper suitable for use as a WeakKeyDictionary key. - Holds a *strong* reference to the proxy (keeps proxy alive while wrapper exists). - Exposes a weakref to the proxy via `_wref` so tests can observe/force GC. - - Equality is proxy identity; hash is a deep hash of the underlying value. + - Equality is proxy identity; hash is the proxy identity (stable across mutations). """ __slots__ = ("_proxy", "_wref", "__weakref__") @@ -145,6 +82,5 @@ def __eq__(self, other: object) -> bool: return self._proxy is other._proxy def __hash__(self) -> int: - """Return a deep hash of the wrapped value.""" - # Uses your existing deep-hash helper (not shown here). - return _deep_hash(self.value) + """Return a stable hash based on the proxy identity.""" + return hash(self._proxy) diff --git a/src/qs_codec/utils/utils.py b/src/qs_codec/utils/utils.py index f4f6f0c..1b829dd 100644 --- a/src/qs_codec/utils/utils.py +++ b/src/qs_codec/utils/utils.py @@ -17,7 +17,6 @@ - Several routines use an object‑identity `visited` set to avoid infinite recursion when user inputs contain cycles. """ -import copy import typing as t from collections import deque from datetime import datetime, timedelta @@ -218,9 +217,12 @@ def merge( _res.append(_el) return _res - # Prepare a mutable copy of the target we can merge into. - is_overflow_target = Utils.is_overflow(target) - merge_target: t.Dict[str, t.Any] = copy.deepcopy(target if isinstance(target, dict) else dict(target)) + # Prepare a mutable target we can merge into; reuse dict targets for performance. + merge_target: t.Dict[str, t.Any] + if isinstance(target, dict): + merge_target = target + else: + merge_target = dict(target) # For overlapping keys, merge recursively; otherwise, take the new value. merged_updates: t.Dict[t.Any, t.Any] = {} @@ -233,18 +235,16 @@ def merge( merged_updates[normalized_key] = Utils.merge(merge_target[normalized_key], value, options) else: merged_updates[key] = value - merged = { - **merge_target, - **merged_updates, - } - return OverflowDict(merged) if is_overflow_target else merged + if merged_updates: + merge_target.update(merged_updates) + return merge_target @staticmethod def compact(root: t.Dict[str, t.Any]) -> t.Dict[str, t.Any]: """ Remove all `Undefined` sentinels from a nested container in place. - Traversal is iterative (explicit stack) to avoid deep recursion, and a per‑object `visited` set prevents infinite + Traversal is iterative (explicit stack) to avoid deep recursion, and a per-object `visited` set prevents infinite loops on cyclic inputs. Args: @@ -254,7 +254,7 @@ def compact(root: t.Dict[str, t.Any]) -> t.Dict[str, t.Any]: The same `root` object for chaining. """ # Depth‑first traversal without recursion. - stack: deque[t.Union[t.Dict, t.List]] = deque([root]) + stack: t.Deque[t.Union[t.Dict, t.List]] = deque([root]) # Track object identities to avoid revisiting in cycles. visited: t.Set[int] = {id(root)} diff --git a/tests/unit/encode_options_test.py b/tests/unit/encode_options_test.py index b3ef86d..d330f66 100644 --- a/tests/unit/encode_options_test.py +++ b/tests/unit/encode_options_test.py @@ -1,3 +1,5 @@ +import pytest + from qs_codec import EncodeOptions from qs_codec.utils.encode_utils import EncodeUtils @@ -27,3 +29,10 @@ def test_equality_detects_field_difference(self) -> None: lhs = EncodeOptions() rhs = EncodeOptions(allow_dots=True) assert lhs != rhs + + def test_max_depth_must_be_positive(self) -> None: + for value in (0, -1, True, 1.5): + with pytest.raises(ValueError, match="max_depth must be a positive integer or None"): + EncodeOptions(max_depth=value) # type: ignore[arg-type] + + assert EncodeOptions(max_depth=5).max_depth == 5 diff --git a/tests/unit/encode_test.py b/tests/unit/encode_test.py index 6e2a1a8..f42526a 100644 --- a/tests/unit/encode_test.py +++ b/tests/unit/encode_test.py @@ -856,6 +856,35 @@ def test_non_circular_duplicated_references_can_still_work(self) -> None: == "filters[$and][function]=gte&filters[$and][arguments][function]=hour_of_day&filters[$and][arguments]=0&filters[$and][function]=lte&filters[$and][arguments][function]=hour_of_day&filters[$and][arguments]=23" ) + def test_encode_depth_guard_prevents_recursion_errors(self) -> None: + data: t.Dict[str, t.Any] = {} + current = data + for _ in range(5): + nxt: t.Dict[str, t.Any] = {} + current["a"] = nxt + current = nxt + + with pytest.raises(ValueError, match="Maximum encoding depth exceeded"): + encode(data, options=EncodeOptions(max_depth=3)) + + def test_encode_depth_guard_caps_to_recursion_limit(self, monkeypatch: pytest.MonkeyPatch) -> None: + import importlib + + encode_module = importlib.import_module("qs_codec.encode") + + limit = encode_module._DEPTH_MARGIN + 3 + monkeypatch.setattr(encode_module.sys, "getrecursionlimit", lambda: limit) + + data: t.Dict[str, t.Any] = {} + current = data + for _ in range(5): + nxt: t.Dict[str, t.Any] = {} + current["a"] = nxt + current = nxt + + with pytest.raises(ValueError, match="Maximum encoding depth exceeded"): + encode(data, options=EncodeOptions(max_depth=10_000)) + @pytest.mark.parametrize( "data, options, expected", [ diff --git a/tests/unit/utils_test.py b/tests/unit/utils_test.py index db3b118..bb02e73 100644 --- a/tests/unit/utils_test.py +++ b/tests/unit/utils_test.py @@ -1,6 +1,7 @@ import copy import re import typing as t +from types import MappingProxyType import pytest @@ -513,6 +514,11 @@ def test_unescape_fallback(self, monkeypatch: pytest.MonkeyPatch) -> None: def test_merges_dict_with_list(self) -> None: assert Utils.merge({"0": "a"}, [Undefined(), "b"]) == {"0": "a", "1": "b"} + def test_merge_with_non_dict_mapping_target(self) -> None: + target = MappingProxyType({"a": {"b": 1}}) + source = {"a": {"c": 2}} + assert Utils.merge(target, source) == {"a": {"b": 1, "c": 2}} + def test_merges_two_dicts_with_the_same_key_and_different_values(self) -> None: assert Utils.merge({"foo": [{"a": "a", "b": "b"}, {"a": "aa"}]}, {"foo": [Undefined(), {"b": "bb"}]}) == { "foo": [{"a": "a", "b": "b"}, {"a": "aa", "b": "bb"}] diff --git a/tests/unit/weakref_test.py b/tests/unit/weakref_test.py index 84bc93d..9065cb8 100644 --- a/tests/unit/weakref_test.py +++ b/tests/unit/weakref_test.py @@ -51,22 +51,25 @@ def test_wrappers_for_different_objects_are_not_equal(self) -> None: w2 = WeakWrapper(v2) # Different identity → not equal assert w1 != w2 - # hashes match because contents match - assert hash(w1) == hash(w2) def test_repr_includes_value_when_proxy_alive(self) -> None: wrapper = WeakWrapper({"k": "v"}) text = repr(wrapper) assert text.startswith("WeakWrapper(") and "'v'" in text - def test_hash_handles_sets(self) -> None: - s1 = {"a", "b"} - s2 = {"b", "a"} - w1 = WeakWrapper(s1) - w2 = WeakWrapper(s2) - assert hash(w1) == hash(w2) + def test_value_property_returns_original_object(self) -> None: + payload: t.Dict[str, t.Any] = {"k": "v"} + wrapper = WeakWrapper(payload) + assert wrapper.value is payload - def test_hash_fallback_uses_repr_for_unhashable_object(self) -> None: + def test_hash_is_stable_for_mutable_sets(self) -> None: + s = {"a", "b"} + wrapper = WeakWrapper(s) + first = hash(wrapper) + s.add("c") + assert hash(wrapper) == first + + def test_hash_handles_unhashable_object(self) -> None: class Unhashable: __hash__ = None # type: ignore[assignment] @@ -85,24 +88,27 @@ def test_eq_non_wrapper_returns_notimplemented(self) -> None: wrapper = WeakWrapper({}) assert wrapper.__eq__(object()) is NotImplemented - def test_hash_detects_circular_references(self) -> None: - a: t.Dict[str, t.Any] = {} - a["self"] = a - wrapper = WeakWrapper(a) - with pytest.raises(ValueError, match="Circular reference detected"): - _ = hash(wrapper) - - def test_hash_detects_excessive_depth(self) -> None: - # artificially create a super deep nested list - deep: t.List[t.Any] = [] - current: t.List[t.Any] = deep - for _ in range(401): # 400 is the limit - new_list: t.List[t.Any] = [] - current.append(new_list) - current = new_list - wrapper = WeakWrapper(deep) - with pytest.raises(RecursionError): - _ = hash(wrapper) + def test_hash_is_identity_based_for_same_wrapped_object(self) -> None: + payload: t.Dict[str, t.Any] = {"a": 1} + wrapper1 = WeakWrapper(payload) + wrapper2 = WeakWrapper(payload) + + first_hash = hash(wrapper1) + assert first_hash == hash(wrapper2) + + payload["b"] = 2 + assert hash(wrapper1) == first_hash + assert hash(wrapper2) == first_hash + + def test_weakkey_lookups_survive_mutation(self) -> None: + payload: t.Dict[str, t.Any] = {"a": 1} + wrapper = WeakWrapper(payload) + wk: WeakKeyDictionary = WeakKeyDictionary() + wk[wrapper] = "ok" + + payload["b"] = 2 + assert wk.get(wrapper) == "ok" + assert wk.get(WeakWrapper(payload)) == "ok" def test_deleted_object_raises_reference_error_on_access(self) -> None: value: t.Dict[str, t.Any] = {"foo": "bar"}