Skip to content

Commit e9db220

Browse files
Scan files for packages faster
Add a new "Binary" optional step which also scans for binaries in files. Also adds package scan performance improvements from scancode. Reference: aboutcode-org/scancode-toolkit#4064 Signed-off-by: Ayan Sinha Mahapatra <asmahapatra@aboutcode.org>
1 parent 8712d48 commit e9db220

3 files changed

Lines changed: 12 additions & 5 deletions

File tree

pyproject.toml

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,9 @@ dependencies = [
5858
# Docker
5959
"container-inspector==33.0.0",
6060
# ScanCode-toolkit
61-
"scancode-toolkit[packages]==32.4.1",
61+
"scancode-toolkit[packages]@git+https://github.com/aboutcode-org/scancode-toolkit.git@6b6a79b8a1c0b9789a466df4c5623ab723890a76",
6262
"extractcode[full]==31.0.0",
63-
"commoncode==32.3.0",
63+
"commoncode==32.4.0",
6464
"Beautifulsoup4[chardet]==4.13.4",
6565
"packageurl-python==0.17.6",
6666
# Workaround issue https://github.com/aboutcode-org/scancode.io/issues/1795
@@ -101,9 +101,7 @@ dependencies = [
101101
# AboutCode pipeline
102102
"aboutcode.pipeline==0.2.1",
103103
# ScoreCode
104-
"scorecode==0.0.4",
105-
# Workaround issue https://github.com/aboutcode-org/scancode.io/issues/1885
106-
"click==8.2.1"
104+
"scorecode==0.0.4"
107105
]
108106

109107
[project.optional-dependencies]

scanpipe/pipelines/inspect_packages.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,16 @@ def steps(cls):
4949
cls.collect_and_create_codebase_resources,
5050
cls.flag_empty_files,
5151
cls.flag_ignored_resources,
52+
cls.scan_binaries,
5253
cls.scan_for_application_packages,
5354
cls.resolve_dependencies,
5455
)
5556

57+
@optional_step("Binary")
58+
def scan_binaries(self):
59+
"""Scan binaries for package and dependency information."""
60+
self.scan_binaries = True
61+
5662
def scan_for_application_packages(self):
5763
"""
5864
Scan resources for package information to add DiscoveredPackage
@@ -61,6 +67,7 @@ def scan_for_application_packages(self):
6167
scancode.scan_for_application_packages(
6268
project=self.project,
6369
assemble=True,
70+
binary=self.scan_binaries or False,
6471
package_only=True,
6572
progress_logger=self.log,
6673
)

scanpipe/pipes/scancode.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,7 @@ def scan_for_files(project, resource_qs=None, progress_logger=None):
409409
def scan_for_application_packages(
410410
project,
411411
assemble=True,
412+
binary=False,
412413
package_only=False,
413414
resource_qs=None,
414415
progress_logger=logger.info,
@@ -431,6 +432,7 @@ def scan_for_application_packages(
431432

432433
scan_func_kwargs = {
433434
"package_only": package_only,
435+
"binary": binary,
434436
}
435437

436438
# Collect detected Package data and save it to the CodebaseResource it was

0 commit comments

Comments
 (0)