diff --git a/AUTHORS.rst b/AUTHORS.rst index 75b0533f921..6aab83e6717 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -105,3 +105,4 @@ The following organizations or individuals have contributed to ScanCode: - Yash Sharma @yasharmaster - Yunus Rahbar @yns88 - Stefano Zacchiroli @zacchiro +- Dibyajyoti Mandal @dibyajyoti-mandal diff --git a/src/packagedcode/__init__.py b/src/packagedcode/__init__.py index 9cc46d0e09b..559b85d6eeb 100644 --- a/src/packagedcode/__init__.py +++ b/src/packagedcode/__init__.py @@ -40,6 +40,7 @@ from packagedcode import swift from packagedcode import win_pe from packagedcode import windows +from packagedcode import pylock if on_linux: from packagedcode import msi @@ -181,6 +182,9 @@ pypi.PythonSetupPyHandler, pypi.SetupCfgHandler, + # Handler for pylock.toml + pylock.PylockTomlHandler, + readme.ReadmeHandler, rpm.RpmArchiveHandler, diff --git a/src/packagedcode/pylock.py b/src/packagedcode/pylock.py new file mode 100644 index 00000000000..de10cacf1ca --- /dev/null +++ b/src/packagedcode/pylock.py @@ -0,0 +1,204 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# ScanCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/scancode-toolkit for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging +import os +import sys + +from packageurl import PackageURL + +from packagedcode import models +from packagedcode.pypi import BaseExtractedPythonLayout, get_pypi_urls + +try: + import tomli as tomllib +except ImportError: + import tomllib + +""" +Detect and collect Python pylock.toml lockfile information. +Support for PEP 751: A file format to record Python dependencies for installation reproducibility. +See https://packaging.python.org/en/latest/specifications/pylock-toml/ +""" + +TRACE = os.environ.get("SCANCODE_DEBUG_PACKAGE", False) + + +def logger_debug(*args): + pass + + +logger = logging.getLogger(__name__) + +if TRACE: + logging.basicConfig(stream=sys.stdout) + logger.setLevel(logging.DEBUG) + + def logger_debug(*args): + return logger.debug(" ".join(isinstance(a, str) and a or repr(a) for a in args)) + + +class PylockTomlHandler(BaseExtractedPythonLayout): + datasource_id = "pypi_pylock_toml" + path_patterns = ("*pylock.toml",) + default_package_type = "pypi" + default_primary_language = "Python" + description = "Python pylock.toml lockfile (PEP 751)" + documentation_url = ( + "https://packaging.python.org/en/latest/specifications/pylock-toml/" + ) + + @classmethod + def parse(cls, location, package_only=False): + """ + Parse a pylock.toml file and yield PackageData with dependencies. + """ + with open(location, "rb") as fp: + toml_data = tomllib.load(fp) + + lock_ver = toml_data.get("lock-version") + packages = toml_data.get("packages", []) + + if not packages: + return + + dependencies = [] + + for package in packages: + name = package.get("name") + version = package.get("version") + + if not name or not version: + continue + + dependencies_for_resolved = [] + + pkg_dependencies = package.get("dependencies", []) + for dep in pkg_dependencies: + if not isinstance(dep, dict): + continue + + dep_name = dep.get("name") + if not dep_name: + continue + + dep_requirement = dep.get("version") + + dep_purl = PackageURL( + type=cls.default_package_type, + name=dep_name, + ) + + dependency = models.DependentPackage( + purl=dep_purl.to_string(), + extracted_requirement=dep_requirement, + scope="dependencies", + is_runtime=True, + is_optional=False, + is_direct=True, + is_pinned=True, + ) + dependencies_for_resolved.append(dependency.to_dict()) + + download_url = None + hash_data = {} + extra_data = {} + + vcs = package.get("vcs") + if vcs: + vcs_type = vcs.get("type") + vcs_url = vcs.get("url") + commit_id = vcs.get("commit-id") + if vcs_type and vcs_url and commit_id: + download_url = f"{vcs_type}+{vcs_url}@{commit_id}" + extra_data["vcs"] = vcs + + sdist = package.get("sdist") + if sdist: + if not download_url: + download_url = sdist.get("url") + if "hashes" in sdist: + hash_data.update(sdist["hashes"]) + + wheels = package.get("wheels", []) + if wheels: + if not download_url and len(wheels) > 0: + download_url = wheels[0].get("url") + + if not hash_data and len(wheels) > 0: + first_wheel_hashes = wheels[0].get("hashes", {}) + hash_data.update(first_wheel_hashes) + + if hash_data: + extra_data["hashes"] = hash_data + + markers = package.get("marker") + if markers: + extra_data["markers"] = markers + + urls = get_pypi_urls(name, version) + + package_data = dict( + datasource_id=cls.datasource_id, + type=cls.default_package_type, + primary_language="Python", + name=name, + version=version, + is_virtual=True, + dependencies=dependencies_for_resolved, + extra_data=extra_data, + download_url=download_url, + **urls, + ) + + if "sha256" in hash_data: + package_data["sha256"] = hash_data["sha256"] + if "sha512" in hash_data: + package_data["sha512"] = hash_data["sha512"] + if "md5" in hash_data: + package_data["md5"] = hash_data["md5"] + + resolved_package = models.PackageData.from_data(package_data, package_only) + + dependency = models.DependentPackage( + purl=resolved_package.purl, + extracted_requirement=version, + scope="dependencies", + is_runtime=True, + is_optional=False, + is_direct=False, + is_pinned=True, + resolved_package=resolved_package.to_dict(), + ) + dependencies.append(dependency.to_dict()) + + lockfile_extra_data = {} + + if lock_ver: + lockfile_extra_data["lock_version"] = lock_ver + + req_python = toml_data.get("requires-python") + if req_python: + lockfile_extra_data["requires_python"] = req_python + + created_by = toml_data.get("created-by") + if created_by: + lockfile_extra_data["created_by"] = created_by + + root_package_data = dict( + datasource_id=cls.datasource_id, + type=cls.default_package_type, + primary_language="Python", + name="pylock-toml-project", + version=None, + extra_data=lockfile_extra_data, + dependencies=dependencies, + ) + + yield models.PackageData.from_data(root_package_data, package_only) diff --git a/tests/packagedcode/data/plugin/plugins_list_linux.txt b/tests/packagedcode/data/plugin/plugins_list_linux.txt index e24512dfd91..55d083911a0 100755 --- a/tests/packagedcode/data/plugin/plugins_list_linux.txt +++ b/tests/packagedcode/data/plugin/plugins_list_linux.txt @@ -762,6 +762,13 @@ Package type: pypi description: Python poetry pyproject.toml path_patterns: '*pyproject.toml' -------------------------------------------- +Package type: pypi + datasource_id: pypi_pylock_toml + documentation URL: https://packaging.python.org/en/latest/specifications/pylock-toml/ + primary language: Python + description: Python pylock.toml lockfile (PEP 751) + path_patterns: '*pylock.toml' +-------------------------------------------- Package type: pypi datasource_id: pypi_pyproject_toml documentation URL: https://packaging.python.org/en/latest/specifications/pyproject-toml/ diff --git a/tests/packagedcode/data/pylock/pylock.toml b/tests/packagedcode/data/pylock/pylock.toml new file mode 100644 index 00000000000..c596890df1a --- /dev/null +++ b/tests/packagedcode/data/pylock/pylock.toml @@ -0,0 +1,60 @@ +lock-version = '1.0' +environments = ["sys_platform == 'win32'", "sys_platform == 'linux'"] +requires-python = '== 3.12' +created-by = 'mousebender' + +[[packages]] +name = 'attrs' +version = '25.1.0' +requires-python = '>= 3.8' + + [[packages.wheels]] + name = 'attrs-25.1.0-py3-none-any.whl' + upload-time = 2025-01-25T11:30:10.164985+00:00 + url = 'https://files.pythonhosted.org/packages/fc/30/d4986a882011f9df997a55e6becd864812ccfcd821d64aac8570ee39f719/attrs-25.1.0-py3-none-any.whl' + size = 63152 + hashes = {sha256 = 'c75a69e28a550a7e93789579c22aa26b0f5b83b75dc4e08fe092980051e1090a'} + + [[packages.attestation-identities]] + environment = 'release-pypi' + kind = 'GitHub' + repository = 'python-attrs/attrs' + workflow = 'pypi-package.yml' + +[[packages]] +name = 'cattrs' +version = '24.1.2' +requires-python = '>= 3.8' +dependencies = [ + {name = 'attrs'}, +] + + [[packages.wheels]] + name = 'cattrs-24.1.2-py3-none-any.whl' + upload-time = 2024-09-22T14:58:34.812643+00:00 + url = 'https://files.pythonhosted.org/packages/c8/d5/867e75361fc45f6de75fe277dd085627a9db5ebb511a87f27dc1396b5351/cattrs-24.1.2-py3-none-any.whl' + size = 66446 + hashes = {sha256 = '67c7495b760168d931a10233f979b28dc04daf853b30752246f4f8471c6d68d0'} + +[[packages]] +name = 'numpy' +version = '2.2.3' +requires-python = '>= 3.10' + + [[packages.wheels]] + name = 'numpy-2.2.3-cp312-cp312-win_amd64.whl' + upload-time = 2025-02-13T16:51:21.821880+00:00 + url = 'https://files.pythonhosted.org/packages/42/6e/55580a538116d16ae7c9aa17d4edd56e83f42126cb1dfe7a684da7925d2c/numpy-2.2.3-cp312-cp312-win_amd64.whl' + size = 12626357 + hashes = {sha256 = '83807d445817326b4bcdaaaf8e8e9f1753da04341eceec705c001ff342002e5d'} + + [[packages.wheels]] + name = 'numpy-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl' + upload-time = 2025-02-13T16:50:00.079662+00:00 + url = 'https://files.pythonhosted.org/packages/39/04/78d2e7402fb479d893953fb78fa7045f7deb635ec095b6b4f0260223091a/numpy-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl' + size = 16116679 + hashes = {sha256 = '3b787adbf04b0db1967798dba8da1af07e387908ed1553a0d6e74c084d1ceafe'} + +[tool.mousebender] +command = ['.', 'lock', '--platform', 'cpython3.12-windows-x64', '--platform', 'cpython3.12-manylinux2014-x64', 'cattrs', 'numpy'] +run-on = 2025-03-06T12:28:57.760769 \ No newline at end of file diff --git a/tests/packagedcode/data/pylock/pylock_toml-expected.json b/tests/packagedcode/data/pylock/pylock_toml-expected.json new file mode 100644 index 00000000000..029f7d780cf --- /dev/null +++ b/tests/packagedcode/data/pylock/pylock_toml-expected.json @@ -0,0 +1,237 @@ +[ + { + "type": "pypi", + "namespace": null, + "name": "pylock-toml-project", + "version": null, + "qualifiers": {}, + "subpath": null, + "primary_language": "Python", + "description": null, + "release_date": null, + "parties": [], + "keywords": [], + "homepage_url": null, + "download_url": null, + "size": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha512": null, + "bug_tracking_url": null, + "code_view_url": null, + "vcs_url": null, + "copyright": null, + "holder": null, + "declared_license_expression": null, + "declared_license_expression_spdx": null, + "license_detections": [], + "other_license_expression": null, + "other_license_expression_spdx": null, + "other_license_detections": [], + "extracted_license_statement": null, + "notice_text": null, + "source_packages": [], + "file_references": [], + "is_private": false, + "is_virtual": false, + "extra_data": { + "lock_version": "1.0", + "requires_python": "== 3.12", + "created_by": "mousebender" + }, + "dependencies": [ + { + "purl": "pkg:pypi/attrs@25.1.0", + "extracted_requirement": "25.1.0", + "scope": "dependencies", + "is_runtime": true, + "is_optional": false, + "is_pinned": true, + "is_direct": false, + "resolved_package": { + "type": "pypi", + "namespace": null, + "name": "attrs", + "version": "25.1.0", + "qualifiers": {}, + "subpath": null, + "primary_language": "Python", + "description": null, + "release_date": null, + "parties": [], + "keywords": [], + "homepage_url": null, + "download_url": "https://files.pythonhosted.org/packages/fc/30/d4986a882011f9df997a55e6becd864812ccfcd821d64aac8570ee39f719/attrs-25.1.0-py3-none-any.whl", + "size": null, + "sha1": null, + "md5": null, + "sha256": "c75a69e28a550a7e93789579c22aa26b0f5b83b75dc4e08fe092980051e1090a", + "sha512": null, + "bug_tracking_url": null, + "code_view_url": null, + "vcs_url": null, + "copyright": null, + "holder": null, + "declared_license_expression": null, + "declared_license_expression_spdx": null, + "license_detections": [], + "other_license_expression": null, + "other_license_expression_spdx": null, + "other_license_detections": [], + "extracted_license_statement": null, + "notice_text": null, + "source_packages": [], + "file_references": [], + "is_private": false, + "is_virtual": true, + "extra_data": { + "hashes": { + "sha256": "c75a69e28a550a7e93789579c22aa26b0f5b83b75dc4e08fe092980051e1090a" + } + }, + "dependencies": [], + "repository_homepage_url": "https://pypi.org/project/attrs", + "repository_download_url": "https://pypi.org/packages/source/a/attrs/attrs-25.1.0.tar.gz", + "api_data_url": "https://pypi.org/pypi/attrs/25.1.0/json", + "datasource_id": "pypi_pylock_toml", + "purl": "pkg:pypi/attrs@25.1.0" + }, + "extra_data": {} + }, + { + "purl": "pkg:pypi/cattrs@24.1.2", + "extracted_requirement": "24.1.2", + "scope": "dependencies", + "is_runtime": true, + "is_optional": false, + "is_pinned": true, + "is_direct": false, + "resolved_package": { + "type": "pypi", + "namespace": null, + "name": "cattrs", + "version": "24.1.2", + "qualifiers": {}, + "subpath": null, + "primary_language": "Python", + "description": null, + "release_date": null, + "parties": [], + "keywords": [], + "homepage_url": null, + "download_url": "https://files.pythonhosted.org/packages/c8/d5/867e75361fc45f6de75fe277dd085627a9db5ebb511a87f27dc1396b5351/cattrs-24.1.2-py3-none-any.whl", + "size": null, + "sha1": null, + "md5": null, + "sha256": "67c7495b760168d931a10233f979b28dc04daf853b30752246f4f8471c6d68d0", + "sha512": null, + "bug_tracking_url": null, + "code_view_url": null, + "vcs_url": null, + "copyright": null, + "holder": null, + "declared_license_expression": null, + "declared_license_expression_spdx": null, + "license_detections": [], + "other_license_expression": null, + "other_license_expression_spdx": null, + "other_license_detections": [], + "extracted_license_statement": null, + "notice_text": null, + "source_packages": [], + "file_references": [], + "is_private": false, + "is_virtual": true, + "extra_data": { + "hashes": { + "sha256": "67c7495b760168d931a10233f979b28dc04daf853b30752246f4f8471c6d68d0" + } + }, + "dependencies": [ + { + "purl": "pkg:pypi/attrs", + "extracted_requirement": null, + "scope": "dependencies", + "is_runtime": true, + "is_optional": false, + "is_pinned": true, + "is_direct": true, + "resolved_package": {}, + "extra_data": {} + } + ], + "repository_homepage_url": "https://pypi.org/project/cattrs", + "repository_download_url": "https://pypi.org/packages/source/c/cattrs/cattrs-24.1.2.tar.gz", + "api_data_url": "https://pypi.org/pypi/cattrs/24.1.2/json", + "datasource_id": "pypi_pylock_toml", + "purl": "pkg:pypi/cattrs@24.1.2" + }, + "extra_data": {} + }, + { + "purl": "pkg:pypi/numpy@2.2.3", + "extracted_requirement": "2.2.3", + "scope": "dependencies", + "is_runtime": true, + "is_optional": false, + "is_pinned": true, + "is_direct": false, + "resolved_package": { + "type": "pypi", + "namespace": null, + "name": "numpy", + "version": "2.2.3", + "qualifiers": {}, + "subpath": null, + "primary_language": "Python", + "description": null, + "release_date": null, + "parties": [], + "keywords": [], + "homepage_url": null, + "download_url": "https://files.pythonhosted.org/packages/42/6e/55580a538116d16ae7c9aa17d4edd56e83f42126cb1dfe7a684da7925d2c/numpy-2.2.3-cp312-cp312-win_amd64.whl", + "size": null, + "sha1": null, + "md5": null, + "sha256": "83807d445817326b4bcdaaaf8e8e9f1753da04341eceec705c001ff342002e5d", + "sha512": null, + "bug_tracking_url": null, + "code_view_url": null, + "vcs_url": null, + "copyright": null, + "holder": null, + "declared_license_expression": null, + "declared_license_expression_spdx": null, + "license_detections": [], + "other_license_expression": null, + "other_license_expression_spdx": null, + "other_license_detections": [], + "extracted_license_statement": null, + "notice_text": null, + "source_packages": [], + "file_references": [], + "is_private": false, + "is_virtual": true, + "extra_data": { + "hashes": { + "sha256": "83807d445817326b4bcdaaaf8e8e9f1753da04341eceec705c001ff342002e5d" + } + }, + "dependencies": [], + "repository_homepage_url": "https://pypi.org/project/numpy", + "repository_download_url": "https://pypi.org/packages/source/n/numpy/numpy-2.2.3.tar.gz", + "api_data_url": "https://pypi.org/pypi/numpy/2.2.3/json", + "datasource_id": "pypi_pylock_toml", + "purl": "pkg:pypi/numpy@2.2.3" + }, + "extra_data": {} + } + ], + "repository_homepage_url": null, + "repository_download_url": null, + "api_data_url": null, + "datasource_id": "pypi_pylock_toml", + "purl": "pkg:pypi/pylock-toml-project" + } +] \ No newline at end of file diff --git a/tests/packagedcode/test_pylock.py b/tests/packagedcode/test_pylock.py new file mode 100644 index 00000000000..a8c5fdfe940 --- /dev/null +++ b/tests/packagedcode/test_pylock.py @@ -0,0 +1,25 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# ScanCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/scancode-toolkit for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import os + +from packages_test_utils import PackageTester + +from packagedcode import pylock + + +class TestPylockTomlHandler(PackageTester): + # set path to data directory + test_data_dir = os.path.join(os.path.dirname(__file__), "data") + + def test_pylock_parse(self): + test_file = self.get_test_loc("pylock/pylock.toml") + package = pylock.PylockTomlHandler.parse(test_file) + expected_loc = self.get_test_loc("pylock/pylock_toml-expected.json") + self.check_packages_data(package, expected_loc, regen=False)