Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 26 additions & 18 deletions mypyc/codegen/emit.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,12 @@

from __future__ import annotations

import pprint
import sys
import textwrap
from collections.abc import Callable
from typing import Final

from mypyc.codegen.cstring import c_string_initializer
from mypyc.codegen.literals import Literals
from mypyc.codegen.literals import Literals, literal_sort_key
from mypyc.common import (
ATTR_PREFIX,
BITMAP_BITS,
Expand Down Expand Up @@ -237,24 +235,16 @@ def attr(self, name: str) -> str:
return ATTR_PREFIX + name

def object_annotation(self, obj: object, line: str) -> str:
"""Build a C comment with an object's string representation.
"""Build a C comment with a literal value's string representation.

If the comment exceeds the line length limit, it's wrapped into a
multiline string (with the extra lines indented to be aligned with
the first line's comment).
This is a debugging aid that makes generated C easier to read.

If it contains illegal characters, an empty string is returned."""
line_width = self._indent + len(line)
formatted = pprint.pformat(obj, compact=True, indent=1, width=max(90 - line_width, 20))
if any(x in formatted for x in ("/*", "*/", "\0")):
If it contains illegal characters or is too long, return an empty string.
"""
formatted = stable_literal_repr(obj)
if any(x in formatted for x in ("/*", "*/", "\0")) or len(formatted) >= 256:
return ""

if "\n" in formatted:
first_line, rest = formatted.split("\n", maxsplit=1)
comment_continued = textwrap.indent(rest, (line_width + 3) * " ")
return f" /* {first_line}\n{comment_continued} */"
else:
return f" /* {formatted} */"
return f" /* {formatted} */"

def emit_line(self, line: str = "", *, ann: object = None) -> None:
if line.startswith("}"):
Expand Down Expand Up @@ -1486,3 +1476,21 @@ def native_function_doc_initializer(func: FuncIR) -> str:
return "NULL"
docstring = f"{text_sig}\n--\n\n"
return c_string_initializer(docstring.encode("ascii", errors="backslashreplace"))


def stable_literal_repr(obj: object) -> str:
"""Return a single-line repr of a literal value.

Behaves like repr() for most values, but renders frozenset members in a
deterministic order (frozenset iteration order is hash-seed dependent).
"""
if isinstance(obj, frozenset):
if not obj:
return "frozenset()"
items = ", ".join(stable_literal_repr(item) for item in sorted(obj, key=literal_sort_key))
return "frozenset({" + items + "})"
elif isinstance(obj, tuple):
if len(obj) == 1:
return "(" + stable_literal_repr(obj[0]) + ",)"
return "(" + ", ".join(stable_literal_repr(item) for item in obj) + ")"
return repr(obj)
24 changes: 20 additions & 4 deletions mypyc/codegen/literals.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ def record_literal(self, value: LiteralValue) -> None:
elif isinstance(value, frozenset):
frozenset_literals = self.frozenset_literals
if value not in frozenset_literals:
for item in value:
# Sort members so that we don't depend on frozenset iteration order.
for item in sorted(value, key=literal_sort_key):
assert _is_literal_value(item)
self.record_literal(item)
frozenset_literals[value] = len(frozenset_literals)
Expand Down Expand Up @@ -140,10 +141,14 @@ def encoded_tuple_values(self) -> list[str]:
return self._encode_collection_values(self.tuple_literals)

def encoded_frozenset_values(self) -> list[str]:
return self._encode_collection_values(self.frozenset_literals)
# Ensure deterministic frozenset item order by sorting items.
return self._encode_collection_values(self.frozenset_literals, sort_items=True)

def _encode_collection_values(
self, values: dict[tuple[object, ...], int] | dict[frozenset[object], int]
self,
values: dict[tuple[object, ...], int] | dict[frozenset[object], int],
*,
sort_items: bool = False,
) -> list[str]:
"""Encode tuple/frozenset values into a C array.

Expand All @@ -164,7 +169,8 @@ def _encode_collection_values(
for i in range(count):
value = value_by_index[i]
result.append(str(len(value)))
for item in value:
items = sorted(value, key=literal_sort_key) if sort_items else value
for item in items:
assert _is_literal_value(item)
index = self.literal_index(item)
result.append(str(index))
Expand Down Expand Up @@ -299,3 +305,13 @@ def _encode_complex_values(values: dict[complex, int]) -> list[str]:
result.append(float_to_c(value.real))
result.append(float_to_c(value.imag))
return result


def literal_sort_key(value: object) -> tuple[object, ...]:
"""Return a sort key for a literal value."""
if isinstance(value, frozenset):
# Sort items to avoid depending on the unpredictable iteration order.
return ("frozenset", tuple(sorted(literal_sort_key(item) for item in value)))
elif isinstance(value, tuple):
return ("tuple", tuple(literal_sort_key(item) for item in value))
return (type(value).__name__, repr(value))
26 changes: 19 additions & 7 deletions mypyc/test/test_emit.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,23 @@ def test_reg(self) -> None:

def test_object_annotation(self) -> None:
assert self.emitter.object_annotation("hello, world", "line;") == " /* 'hello, world' */"
assert self.emitter.object_annotation(list(range(30)), "line;") == """\
/* [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
23, 24, 25, 26, 27, 28, 29] */"""
assert self.emitter.object_annotation(42, "line;") == " /* 42 */"
assert self.emitter.object_annotation((1, "x", None), "line;") == " /* (1, 'x', None) */"
# Annotations containing illegal C comment characters are dropped.
assert self.emitter.object_annotation("a /* b */ c", "line;") == ""

def test_object_annotation_frozenset_is_deterministic(self) -> None:
assert (
self.emitter.object_annotation(frozenset({"self", "cls"}), "line;")
== self.emitter.object_annotation(frozenset({"cls", "self"}), "line;")
== " /* frozenset({'cls', 'self'}) */"
)
assert (
self.emitter.object_annotation((frozenset({"b", "a"}),), "line;")
== self.emitter.object_annotation((frozenset({"a", "b"}),), "line;")
== " /* (frozenset({'a', 'b'}),) */"
)
assert self.emitter.object_annotation(frozenset(), "line;") == " /* frozenset() */"

def test_emit_line(self) -> None:
emitter = self.emitter
Expand All @@ -55,11 +69,9 @@ def test_emit_line(self) -> None:
assert emitter.fragments == ["line;\n", "a {\n", " f();\n", "}\n"]
emitter = Emitter(self.context, {})
emitter.emit_line("CPyStatics[0];", ann="hello, world")
emitter.emit_line("CPyStatics[1];", ann=list(range(30)))
emitter.emit_line("CPyStatics[1];", ann=42)
assert emitter.fragments[0] == "CPyStatics[0]; /* 'hello, world' */\n"
assert emitter.fragments[1] == """\
CPyStatics[1]; /* [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24, 25, 26, 27, 28, 29] */\n"""
assert emitter.fragments[1] == "CPyStatics[1]; /* 42 */\n"

def test_emit_undefined_value_for_simple_type(self) -> None:
emitter = self.emitter
Expand Down
48 changes: 48 additions & 0 deletions mypyc/test/test_literals.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
_encode_int_values,
_encode_str_values,
format_str_literal,
literal_sort_key,
)


Expand Down Expand Up @@ -88,3 +89,50 @@ def test_tuple_literal(self) -> None:
"7", # Second tuple (length=4)
"0", # Third tuple (length=0)
]

def test_frozenset_literal_index_is_deterministic(self) -> None:
# Index assignment for members must not depend on frozenset iteration
# order (which is hash-seed dependent), so that generated code is
# reproducible.
lit1 = Literals()
lit1.record_literal(frozenset({"self", "cls"}))
lit2 = Literals()
lit2.record_literal(frozenset({"cls", "self"}))
for s in ("self", "cls"):
assert lit1.literal_index(s) == lit2.literal_index(s)
# Members are recorded in sorted order.
assert lit1.literal_index("cls") == 3
assert lit1.literal_index("self") == 4

def test_frozenset_encoding_is_deterministic(self) -> None:
lit1 = Literals()
lit1.record_literal(frozenset({"self", "cls"}))
lit2 = Literals()
lit2.record_literal(frozenset({"cls", "self"}))
assert lit1.encoded_frozenset_values() == lit2.encoded_frozenset_values()

def test_literal_sort_key_is_total_over_types(self) -> None:
# Heterogeneous, individually unorderable items must still be sorted.
values = ["x", b"y", 1, None, (1, 2), frozenset({1, 2})]
values_reversed = list(reversed(values))
assert sorted(values, key=literal_sort_key) == sorted(
values_reversed, key=literal_sort_key
)

def test_literal_sort_key_with_frozenset(self) -> None:
assert literal_sort_key(frozenset({"a", "b"})) == literal_sort_key(frozenset({"b", "a"}))
assert literal_sort_key((frozenset({"a", "b"}),)) == literal_sort_key(
(frozenset({"b", "a"}),)
)
assert literal_sort_key(frozenset({"a", frozenset({"b", "c"})})) == literal_sort_key(
frozenset({frozenset({"c", "b"}), "a"})
)

def test_nested_frozenset_literal_index_is_deterministic(self) -> None:
lit1 = Literals()
lit1.record_literal(frozenset({frozenset({"a", "b"}), frozenset({"c", "d"})}))
lit2 = Literals()
lit2.record_literal(frozenset({frozenset({"d", "c"}), frozenset({"b", "a"})}))
for s in ("a", "b", "c", "d"):
assert lit1.literal_index(s) == lit2.literal_index(s)
assert lit1.encoded_frozenset_values() == lit2.encoded_frozenset_values()
Loading