Skip to content
Merged
1 change: 1 addition & 0 deletions component_catalog/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -884,6 +884,7 @@ class PackageAdmin(
"parties",
"datasource_id",
"file_references",
"package_content",
)
},
),
Expand Down
2 changes: 2 additions & 0 deletions component_catalog/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -618,6 +618,7 @@ class PackageSerializer(
required=False,
scope_content_type=True,
)
package_content = serializers.ReadOnlyField(source="get_package_content_display")
collect_data = serializers.BooleanField(
write_only=True,
required=False,
Expand Down Expand Up @@ -687,6 +688,7 @@ class Meta:
"parties",
"datasource_id",
"file_references",
"package_content",
"external_references",
"created_date",
"last_modified_date",
Expand Down
4 changes: 3 additions & 1 deletion component_catalog/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,7 @@ class Meta:
"version",
"qualifiers",
"subpath",
"package_content",
"collect_data",
]
widgets = {
Expand Down Expand Up @@ -407,7 +408,7 @@ def helper(self):
HTML("<hr>"),
Group("description", "keywords"),
Group("primary_language", "cpe"),
Group("size", "release_date"),
Group("package_content", "size", "release_date"),
Group("dependencies", "notes"),
HTML("<hr>"),
Group("homepage_url", "code_view_url"),
Expand Down Expand Up @@ -1183,6 +1184,7 @@ class Meta:
"version",
"qualifiers",
"subpath",
"package_content",
]


Expand Down
18 changes: 18 additions & 0 deletions component_catalog/migrations/0013_package_package_content.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 5.2.8 on 2025-11-24 12:00

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('component_catalog', '0012_alter_component_children'),
]

operations = [
migrations.AddField(
model_name='package',
name='package_content',
field=models.IntegerField(blank=True, choices=[(1, 'curation'), (2, 'patch'), (3, 'source_repo'), (4, 'source_archive'), (5, 'binary'), (6, 'test'), (7, 'doc')], help_text='Content of this Package as one of: curation, patch, source_repo, source_archive, binary, test, doc', null=True),
),
]
54 changes: 53 additions & 1 deletion component_catalog/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
from dejacode_toolkit import spdx
from dejacode_toolkit.purldb import PurlDB
from dejacode_toolkit.purldb import pick_purldb_entry
from dejacode_toolkit.purldb import pick_source_package
from dejacode_toolkit.scancodeio import ScanCodeIO
from dje import urn
from dje.copier import post_copy
Expand Down Expand Up @@ -1652,6 +1653,42 @@ def __str__(self):
return self.label


class PackageContentFieldMixin(models.Model):
"""
Field extracted from the `purldb.packagedb.models.Package` model.
It need to stay aligned with its upstream PurlDB implementation.
"""

class PackageContentType(models.IntegerChoices):
CURATION = 1, "curation"
PATCH = 2, "patch"
SOURCE_REPO = 3, "source_repo"
SOURCE_ARCHIVE = 4, "source_archive"
BINARY = 5, "binary"
TEST = 6, "test"
DOC = 7, "doc"

package_content = models.IntegerField(
null=True,
blank=True,
choices=PackageContentType.choices,
help_text=_(
"Content of this Package as one of: {}".format(", ".join(PackageContentType.labels))
),
)

class Meta:
abstract = True

@classmethod
def get_package_content_value_from_label(cls, label):
"""Convert a package_content string label to its integer value."""
try:
return cls.PackageContentType[label.upper()].value
except (KeyError, AttributeError):
return


PACKAGE_URL_FIELDS = ["type", "namespace", "name", "version", "qualifiers", "subpath"]


Expand Down Expand Up @@ -1795,6 +1832,7 @@ class Package(
URLFieldsMixin,
HashFieldsMixin,
PackageURLMixin,
PackageContentFieldMixin,
DataspacedModel,
):
filename = models.CharField(
Expand Down Expand Up @@ -2504,7 +2542,7 @@ def create_from_url(cls, url, user):
package_for_match = cls(download_url=download_url)
package_for_match.set_package_url(package_url)
purldb_entries = package_for_match.get_purldb_entries(user)
# Look for one ith the same exact purl in that case
# Look for one with the same exact purl in that case
if purldb_data := pick_purldb_entry(purldb_entries, purl=url):
# The format from PurlDB is "2019-11-18T00:00:00Z" from DateTimeField
if release_date := purldb_data.get("release_date"):
Expand Down Expand Up @@ -2597,6 +2635,8 @@ def update_from_purldb(self, user):

- Retrieves matching entries from PurlDB using the given user.
- If exactly one match is found, its data is used directly.
- If multiple entries are found, leverage the package_content value when
available to select a "source" package.
- If multiple entries are found, only values that are non-empty and
common across all entries are merged and used to update the Package.
"""
Expand All @@ -2607,6 +2647,8 @@ def update_from_purldb(self, user):
purldb_entries_count = len(purldb_entries)
if purldb_entries_count == 1:
package_data = purldb_entries[0]
elif source_package := pick_source_package(purldb_entries):
package_data = source_package
else:
package_data = merge_common_non_empty_values(purldb_entries)

Expand All @@ -2615,6 +2657,10 @@ def update_from_purldb(self, user):
package_data["release_date"] = release_date.split("T")[0]
package_data["license_expression"] = package_data.get("declared_license_expression")

if package_content := package_data.get("package_content"):
package_content_value = Package.get_package_content_value_from_label(package_content)
package_data["package_content"] = package_content_value

# Avoid raising an IntegrityError when the values in `package_data` for the
# identifier fields already exist on another Package instance.
#
Expand Down Expand Up @@ -2647,6 +2693,12 @@ def update_from_purldb(self, user):
override=False,
override_unknown=True,
)

if updated_fields:
msg = f"Automatically updated {', '.join(updated_fields)} from PurlDB."
logger.debug(f"PurlDB: {msg}")
History.log_change(user, self, message=msg)

return updated_fields

def update_from_scan(self, user, update_products=False):
Expand Down
51 changes: 50 additions & 1 deletion component_catalog/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1366,6 +1366,7 @@ def test_component_catalog_models_get_exclude_candidates_fields(self):
"file_references",
"other_license_expression",
"parties",
"package_content",
],
),
)
Expand Down Expand Up @@ -2381,6 +2382,16 @@ def test_package_model_github_repo_url(self):
p.download_url = url
self.assertEqual(expected, p.github_repo_url)

def test_package_model_get_package_content_value_from_label(self):
get_label_func = Package.get_package_content_value_from_label
self.assertIsNone(get_label_func(None))
self.assertIsNone(get_label_func(100))
self.assertIsNone(get_label_func("wrong"))

self.assertEqual(2, get_label_func("patch"))
self.assertEqual(2, get_label_func("Patch"))
self.assertEqual(2, get_label_func("PATCH"))

@mock.patch("requests.get")
def test_collect_package_data(self, mock_get):
expected_message = (
Expand Down Expand Up @@ -2635,6 +2646,7 @@ def test_package_model_update_from_purldb(self, mock_get_purldb_entries):
"sha256": "0a1efde1b685a6c30999ba00902f23613cf5db864c5a1532d2edf3eda7896a37",
"copyright": "(c) Copyright",
"declared_license_expression": "(bsd-simplified AND bsd-new)",
"package_content": "source_archive",
}

mock_get_purldb_entries.return_value = [purldb_entry]
Expand All @@ -2656,12 +2668,13 @@ def test_package_model_update_from_purldb(self, mock_get_purldb_entries):
"sha256",
"copyright",
"declared_license_expression",
"package_content",
"license_expression",
]
self.assertEqual(expected, updated_fields)

package1.refresh_from_db()
# Handle release_date separatly
# Handle release_date and package_content separatly
updated_fields.remove("release_date")
self.assertEqual(purldb_entry["release_date"], str(package1.release_date))

Expand Down Expand Up @@ -2700,6 +2713,42 @@ def test_package_model_update_from_purldb_multiple_entries(self, mock_get_purldb
self.assertEqual(["Keyword1", "Keyword2"], package1.keywords)
self.assertEqual("Python", package1.primary_language)

@mock.patch("component_catalog.models.Package.get_purldb_entries")
def test_package_model_update_from_purldb_multiple_entries_package_content(
self, mock_get_entries
):
purldb_entry_binary = {
"uuid": "e133e70b-8dd3-4cf1-9711-72b1f57523a0",
"purl": "pkg:pypi/[email protected]?file_name=boto3-1.37.26-py3-none-any.whl",
"type": "pypi",
"name": "boto3",
"version": "1.37.26",
"filename": "boto3-1.37.26-py3-none-any.whl",
"download_url": "https://files.pythonhosted.org/packages/boto3-1.37.26-py3-none-any.whl",
"package_content": "binary",
}
purldb_entry_source = {
"uuid": "326aa7a8-4f28-406d-89f9-c1404916925b",
"purl": "pkg:pypi/[email protected]?file_name=boto3-1.37.26.tar.gz",
"type": "pypi",
"name": "boto3",
"version": "1.37.26",
"filename": "boto3-1.37.26.tar.gz",
"download_url": "https://files.pythonhosted.org/packages/boto3-1.37.26.tar.gz",
"package_content": "source_archive",
}

mock_get_entries.return_value = [purldb_entry_binary, purldb_entry_source]
package1 = make_package(self.dataspace, package_url="pkg:pypi/[email protected]")
updated_fields = package1.update_from_purldb(self.user)
expected = ["download_url", "filename", "package_content"]
self.assertEqual(expected, sorted(updated_fields))

package1.refresh_from_db()
self.assertEqual(purldb_entry_source["download_url"], package1.download_url)
self.assertEqual(purldb_entry_source["filename"], package1.filename)
self.assertEqual("source_archive", package1.get_package_content_display())

@mock.patch("component_catalog.models.Package.get_purldb_entries")
def test_package_model_update_from_purldb_duplicate_exception(self, mock_get_purldb_entries):
package_url = "pkg:pypi/[email protected]"
Expand Down
3 changes: 3 additions & 0 deletions component_catalog/tests/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -1243,6 +1243,7 @@ def test_package_details_view_num_queries(self):
# Create a Package Set
package_url = "pkg:pypi/[email protected]"
self.package1.set_package_url(package_url)
self.package1.package_content = Package.PackageContentType.SOURCE_ARCHIVE
self.package1.save()
license_expression = "{} AND {}".format(self.license1.key, self.license2.key)
make_package(self.dataspace, package_url=package_url, license_expression=license_expression)
Expand Down Expand Up @@ -3389,6 +3390,7 @@ def test_component_catalog_package_add_view_initial_data(
"description": "Abbot Java GUI Test Library",
"declared_license_expression": "bsd-new OR eps-1.0 OR apache-2.0 OR mit",
"keywords": ["keyword1", "keyword2"],
"package_content": "binary",
}
mock_request_get.return_value = {
"count": 1,
Expand All @@ -3411,6 +3413,7 @@ def test_component_catalog_package_add_view_initial_data(
"description": "Abbot Java GUI Test Library",
"license_expression": "bsd-new OR eps-1.0 OR apache-2.0 OR mit",
"declared_license_expression": "bsd-new OR eps-1.0 OR apache-2.0 OR mit",
"package_content": Package.PackageContentType.BINARY,
}
self.assertEqual(expected, response.context["form"].initial)

Expand Down
8 changes: 8 additions & 0 deletions component_catalog/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -1140,6 +1140,7 @@ class PackageDetailsView(
"parties",
"datasource_id",
"file_references",
"package_content",
],
},
"components": {
Expand Down Expand Up @@ -1293,6 +1294,7 @@ def tab_others(self):
TabField("parties"),
TabField("datasource_id"),
TabField("file_references"),
TabField("package_content", source="get_package_content_display"),
]

fields = self.get_tab_fields(tab_fields)
Expand Down Expand Up @@ -1930,6 +1932,12 @@ def get_initial(self):
if purldb_entry := self.get_entry_from_purldb():
# Duplicate the declared_license_expression as the "concluded" license_expression
purldb_entry["license_expression"] = purldb_entry.get("declared_license_expression")

# Convert package_content string label to integer value
if content_label := purldb_entry.pop("package_content", None):
if content_value := Package.get_package_content_value_from_label(content_label):
purldb_entry["package_content"] = content_value

model_fields = [field.name for field in Package._meta.get_fields()]
initial_from_purldb_entry = {
field_name: value
Expand Down
16 changes: 16 additions & 0 deletions dejacode_toolkit/purldb.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ def get_package_by_purl(self, package_url):

def find_packages(self, payload, timeout=None):
"""Get Packages details using provided `payload` filters on the PurlDB package list."""
payload.update({"sort": "package_content"})

response = self.request_get(self.package_api_url, params=payload, timeout=timeout)
if response and response.get("count") > 0:
return response.get("results")
Expand Down Expand Up @@ -88,3 +90,17 @@ def pick_purldb_entry(purldb_entries, purl=None):
matches = [entry for entry in purldb_entries if entry.get("purl") == purl]
if len(matches) == 1:
return matches[0]


def pick_source_package(purldb_entries):
"""Pick a source package from a list of PurlDB entries."""
if not purldb_entries:
return

if len(purldb_entries) == 1:
return purldb_entries[0]

for entry in purldb_entries:
package_content = entry.get("package_content")
if package_content and package_content.lower() == "source_archive":
return entry
5 changes: 3 additions & 2 deletions dje/tests/testfiles/test_dataset_cc_only.json
Original file line number Diff line number Diff line change
Expand Up @@ -292,12 +292,13 @@
"vcs_url": "",
"code_view_url": "",
"bug_tracking_url": "",
"md5": "",
"sha1": "",
"sha256": "",
"sha512": "",
"package_content": null,
"filename": "systemu-2.5.2.gem",
"download_url": "https://s3.amazonaws.com/production.s3.rubygems.org/gems/systemu-2.5.2.gem",
"sha1": "",
"md5": "",
"size": null,
"release_date": null,
"primary_language": "",
Expand Down
1 change: 1 addition & 0 deletions dje/tests/testfiles/test_dataset_pp_only.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
"sha1": "",
"sha256": "",
"sha512": "",
"package_content": null,
"filename": "systemu-2.5.2.gem",
"download_url": "https://s3.amazonaws.com/production.s3.rubygems.org/gems/systemu-2.5.2.gem",
"size": null,
Expand Down
Loading