diff --git a/CHANGELOG.md b/CHANGELOG.md index 596c9e12a..eb14fdbd8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ Full release notes with details on each version: [GitHub Releases](https://githu ## Unreleased +- Feat: Java standard-library types are no longer emitted as `references` noise. A skip list (`_JAVA_BUILTIN_TYPES`) now suppresses ubiquitous `java.lang`/`java.util`/`java.io`/`java.time`/`java.util.{stream,function,concurrent}`/`java.math`/`java.nio.file` type names (`String`, `List`, `Map`, `Optional`, `Integer`, `Exception`, …) at the type-ref walker — they never resolve to a project node, so edges to them were pure noise (mirrors `_GO_PREDECLARED_TYPES`/`_PYTHON_ANNOTATION_NOISE`). Nested user-type generic arguments still resolve: `List` drops the `List` container edge but keeps `Item`. User-defined types and boxed-scalar primitives are unaffected. - Fix: Elixir multi-alias brace form now emits imports edges (#1577, thanks @Synvoya). `alias Foo.{Bar, Baz}` produced no imports (the handler only matched a bare single alias); it now expands to one edge per member module. Single `alias`/`import`/`require`/`use` unchanged. - Fix: Fortran function invocations now emit `calls` edges (#1578, thanks @Synvoya). Only `call sub(...)` (subroutine) calls were captured; `y = f(x)` function calls (a `call_expression`) were dropped. Resolved against procedures defined in the file so array indexing (`arr(i)`, same `name(...)` syntax) can't fabricate a spurious call. - Fix: Rust enum variant payload types now emit `references` edges (#1579, thanks @Synvoya). `Click(Logger)` / `Resize { size: Dim }` referenced nothing — `enum_item` had no type-reference handler (struct/trait did). Both tuple and struct variant field types now resolve. diff --git a/graphify/extract.py b/graphify/extract.py index 994198851..000340cbc 100644 --- a/graphify/extract.py +++ b/graphify/extract.py @@ -868,6 +868,69 @@ def _java_type_parameters_in_scope(node, source: bytes) -> frozenset[str]: return frozenset(names) +# java.lang (auto-imported) plus the ubiquitous java.util / java.io / java.time / +# java.util.{stream,function,concurrent} / java.math / java.nio.file types that +# appear as field, parameter, return, and generic-argument annotations. They never +# resolve to a project node, so emitting `references` edges to them is pure noise +# (mirrors _GO_PREDECLARED_TYPES / _PYTHON_ANNOTATION_NOISE). Suppressed at the +# type-ref walker so they are never created as nodes or emitted as edges. The +# boxed-scalar/`void` primitives are already dropped by grammar node type above; +# these are the class/interface names the grammar reports as identifiers. +_JAVA_BUILTIN_TYPES = frozenset({ + # java.lang — core + "Object", "String", "CharSequence", "StringBuilder", "StringBuffer", + "Number", "Byte", "Short", "Integer", "Long", "Float", "Double", + "Boolean", "Character", "Void", "Class", "Enum", "Record", "Math", + "System", "Thread", "Runnable", "Comparable", "Iterable", "Cloneable", + "AutoCloseable", "Appendable", "Readable", "Process", "ProcessBuilder", + "Runtime", "Package", "ThreadLocal", "InheritableThreadLocal", + # java.lang — throwables + "Throwable", "Exception", "RuntimeException", "Error", + "IllegalArgumentException", "IllegalStateException", "NullPointerException", + "IndexOutOfBoundsException", "ArrayIndexOutOfBoundsException", + "ClassCastException", "NumberFormatException", "ArithmeticException", + "UnsupportedOperationException", "InterruptedException", + "CloneNotSupportedException", "SecurityException", "StackOverflowError", + "OutOfMemoryError", "AssertionError", + # java.util — collections & core + "Collection", "List", "ArrayList", "LinkedList", "Vector", "Stack", + "Set", "HashSet", "LinkedHashSet", "TreeSet", "SortedSet", "NavigableSet", + "EnumSet", "Map", "HashMap", "LinkedHashMap", "TreeMap", "SortedMap", + "NavigableMap", "Hashtable", "EnumMap", "Properties", "Queue", "Deque", + "ArrayDeque", "PriorityQueue", "Iterator", "ListIterator", "Comparator", + "Optional", "OptionalInt", "OptionalLong", "OptionalDouble", "Collections", + "Arrays", "Objects", "Date", "Calendar", "Random", "UUID", "Scanner", + "StringJoiner", "StringTokenizer", "BitSet", "Spliterator", "Locale", + "NoSuchElementException", "ConcurrentModificationException", + # java.util.stream + "Stream", "IntStream", "LongStream", "DoubleStream", "Collector", + "Collectors", + # java.util.function + "Function", "BiFunction", "Consumer", "BiConsumer", "Supplier", + "Predicate", "BiPredicate", "UnaryOperator", "BinaryOperator", + "IntFunction", "ToIntFunction", "ToLongFunction", "ToDoubleFunction", + # java.util.concurrent + "Callable", "Future", "CompletableFuture", "CompletionStage", "Executor", + "ExecutorService", "Executors", "ScheduledExecutorService", "TimeUnit", + "ConcurrentHashMap", "ConcurrentMap", "CopyOnWriteArrayList", + "BlockingQueue", "CountDownLatch", "Semaphore", "CyclicBarrier", + "AtomicInteger", "AtomicLong", "AtomicBoolean", "AtomicReference", + # java.time + "Instant", "Duration", "Period", "LocalDate", "LocalTime", "LocalDateTime", + "ZonedDateTime", "OffsetDateTime", "ZoneId", "ZoneOffset", "DayOfWeek", + "Month", "Year", "Clock", "DateTimeFormatter", + # java.io / java.nio.file + "IOException", "UncheckedIOException", "FileNotFoundException", "File", + "InputStream", "OutputStream", "Reader", "Writer", "BufferedReader", + "BufferedWriter", "InputStreamReader", "OutputStreamWriter", "FileReader", + "FileWriter", "PrintStream", "PrintWriter", "ByteArrayInputStream", + "ByteArrayOutputStream", "Serializable", "Closeable", "Path", "Paths", + "Files", + # java.math + "BigDecimal", "BigInteger", +}) + + def _java_collect_type_refs( node, source: bytes, @@ -885,19 +948,23 @@ def _java_collect_type_refs( return if t == "type_identifier": name = _read_text(node, source) - if name and name not in skip: + if name and name not in skip and name not in _JAVA_BUILTIN_TYPES: out.append((name, "generic_arg" if generic else "type")) return if t == "scoped_type_identifier": text = _read_text(node, source).rsplit(".", 1)[-1] - if text: + if text and text not in _JAVA_BUILTIN_TYPES: out.append((text, "generic_arg" if generic else "type")) return if t == "generic_type": for c in node.children: if c.type in ("type_identifier", "scoped_type_identifier"): text = _read_text(c, source).rsplit(".", 1)[-1] - if text and (c.type == "scoped_type_identifier" or text not in skip): + if ( + text + and text not in _JAVA_BUILTIN_TYPES + and (c.type == "scoped_type_identifier" or text not in skip) + ): out.append((text, "generic_arg" if generic else "type")) break for c in node.children: diff --git a/tests/test_java_type_resolution.py b/tests/test_java_type_resolution.py index ccd97d9bb..35229776e 100644 --- a/tests/test_java_type_resolution.py +++ b/tests/test_java_type_resolution.py @@ -150,6 +150,57 @@ def test_java_type_parameters_do_not_resolve_to_real_class(tmp_path: Path): assert ("Generic", "references", "T") not in references +def test_java_builtin_library_types_not_emitted_as_references(tmp_path: Path): + # Built-in / standard-library types (java.lang, java.util, …) used as field, + # parameter, or return types carry no useful graph meaning: they never resolve + # to a project node, so emitting `references` edges to them is pure noise. + svc = _write( + tmp_path / "Svc.java", + "package com.app;\n" + "import java.util.List;\n" + "import java.util.Map;\n" + "public class Svc {\n" + " private String name;\n" + " private List ids;\n" + " public Map lookup(Long id) { return null; }\n" + " public java.util.Optional flag() { return null; }\n" + "}\n", + ) + result = extract([svc], cache_root=tmp_path) + + ref_targets = { + by_label + for (src, rel, by_label) in _label_edges(result, {"references"}) + } + for builtin in ( + "String", "Integer", "Map", "Object", "Long", + "List", "Optional", "Boolean", + ): + assert builtin not in ref_targets, ( + f"builtin/library type {builtin!r} should not be a references target" + ) + + +def test_java_user_types_still_emit_references(tmp_path: Path): + # Guard against over-skipping: a user-defined type sharing the field/return + # shape must still resolve to a real `references` edge. + dto = _write(tmp_path / "OrderDto.java", + "package com.app;\npublic class OrderDto {}\n") + svc = _write( + tmp_path / "OrderSvc.java", + "package com.app;\n" + "public class OrderSvc {\n" + " private java.util.List orders;\n" + " public OrderDto first() { return null; }\n" + "}\n", + ) + result = extract([dto, svc], cache_root=tmp_path) + ref_targets = { + by_label for (_, _, by_label) in _label_edges(result, {"references"}) + } + assert "OrderDto" in ref_targets, "user type OrderDto must still emit references" + + def test_java_cross_file_constructor_call_resolves(tmp_path: Path): # #1373: `new Foo(...)` in a method body must produce a cross-file edge to the # Foo definition. Foo is NOT used as a return type here, so the edge can only diff --git a/tests/test_languages.py b/tests/test_languages.py index c082e2d03..2363bfede 100644 --- a/tests/test_languages.py +++ b/tests/test_languages.py @@ -463,7 +463,9 @@ def test_java_record_component_type_references(tmp_path): result = extract_java(source) assert ("Order", "Payload") in _edge_labels(result, "references", "field") - assert ("Order", "List") in _edge_labels(result, "references", "field") + # `List` is a java.util library type: skipped as noise, so only its user-type + # generic argument (`Item`) survives, not the container itself. + assert ("Order", "List") not in _edge_labels(result, "references") assert ("Order", "Item") in _edge_labels(result, "references", "generic_arg") assert ("Order", "Attachment") in _edge_labels(result, "references", "field") diff --git a/uv.lock b/uv.lock index fcd87008b..5ef0d536a 100644 --- a/uv.lock +++ b/uv.lock @@ -1090,7 +1090,7 @@ wheels = [ [[package]] name = "graphifyy" -version = "0.9.3" +version = "0.9.4" source = { editable = "." } dependencies = [ { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },