Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
136 changes: 136 additions & 0 deletions src/licensedcode/required_phrases.py
Original file line number Diff line number Diff line change
Expand Up @@ -593,6 +593,123 @@ def update_rules_using_license_attributes(
dry_run=dry_run,
)

def update_composite_rules_using_license_attributes(
license_expression=None,
write_phrase_source=False,
verbose=False,
dry_run=False,
):
"""
Add required phrases to composite (multi license) rules using license attributes

For each composite rule, parse its expression into individual license keys, then collect
candidate phrases for each key from the license database and from is_required_phrase rules.
A rule is only marked if ALL keys in the expression have at least one matching phrase found
in the rule text
"""
licenses_by_key = get_licenses_db()
licensing = Licensing()

# collect known required phrase texts per license key from is_required_phrase rules
is_required_phrases_by_key = {}
all_rules_by_expression = get_base_rules_by_expression()
for expression, rules in all_rules_by_expression.items():
for rule in rules:
if rule.is_required_phrase:
if expression not in is_required_phrases_by_key:
is_required_phrases_by_key[expression] = []
is_required_phrases_by_key[expression].append(rule.text.strip())

rules_by_expression = get_updatable_rules_by_expression(
license_expression, simple_expression=False,
)

for expression, rules in rules_by_expression.items():
try:
keys = licensing.license_keys(expression, unique=True)
except Exception:
if verbose:
click.echo(f' Skipping unparseable expression: {expression}')
continue

# single key expressions are handled by --from-license-attributes
if len(keys) < 2:
continue

# collect candidate phrases for each key from license attributes and
# is_required_phrase rules, longest first so we match the most specific
phrases_by_key = {}
skip_expression = False
for key in keys:
lic = licenses_by_key.get(key)
if not lic:
skip_expression = True
break

candidates = []
if lic.name:
candidates.append(lic.name)
if lic.short_name and lic.short_name != lic.name:
candidates.append(lic.short_name)
if lic.spdx_license_key and lic.spdx_license_key not in candidates:
candidates.append(lic.spdx_license_key)
# add texts from is_required_phrase rules for this key
for phrase_text in is_required_phrases_by_key.get(key, []):
if phrase_text not in candidates:
candidates.append(phrase_text)
candidates.sort(key=len, reverse=True)
phrases_by_key[key] = candidates

if skip_expression or not phrases_by_key:
continue

if verbose:
click.echo(f'Processing composite expression: {expression}')

for rule in rules:
# try to find a matching phrase for each key in the rule text
matched_phrases = {}
all_keys_found = True

for key in keys:
candidates = phrases_by_key.get(key, [])
found = False
for phrase in candidates:
spans = find_phrase_spans_in_text(
text=rule.text,
phrase_text=phrase,
)
if spans:
matched_phrases[key] = phrase
found = True
break
if not found:
all_keys_found = False
break

if not all_keys_found:
continue

# all keys matched, inject markers for each phrase
for key, phrase in matched_phrases.items():
source = rule.source or ""
if write_phrase_source:
source += f" {key}.LICENSE : composite"

add_required_phrase_to_rule(
rule=rule,
required_phrase=phrase,
source=source,
dry_run=dry_run,
)

if verbose:
click.echo(
f' {rule.identifier}: marked phrases: '
f'{list(matched_phrases.values())}'
)


####################################################################################################
#
# Inject new required phrase in rules
Expand Down Expand Up @@ -629,6 +746,15 @@ def delete_required_phrase_rules_source_debug(rules_data_dir):
"Mutually exclusive with --from-other-rule.",
cls=PluggableCommandLineOption,
)
@click.option(
"-c",
"--composite-rules",
is_flag=True,
default=False,
help="Add required phrases to composite (multi license) rules using license attributes. "
"Only marks a rule if all license keys in the expression have a matching phrase in the text",
cls=PluggableCommandLineOption,
)
@click.option(
"-l",
"--license-expression",
Expand Down Expand Up @@ -691,6 +817,7 @@ def delete_required_phrase_rules_source_debug(rules_data_dir):
def add_required_phrases(
from_other_rules,
from_license_attributes,
composite_rules,
license_expression,
validate,
reindex,
Expand Down Expand Up @@ -726,6 +853,15 @@ def add_required_phrases(
verbose=verbose,
)

elif composite_rules:
click.echo('Updating composite rules from license attributes.')
update_composite_rules_using_license_attributes(
license_expression=license_expression,
write_phrase_source=write_phrase_source,
dry_run=dry_run,
verbose=verbose,
)

validate_and_reindex(validate, reindex, verbose)


Expand Down
94 changes: 94 additions & 0 deletions tests/licensedcode/test_required_phrases.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,3 +180,97 @@ def test_update_rules_using_is_required_phrases_rules(self):
@pytest.mark.scanslow
def test_update_rules_using_license_attributes(self):
update_rules_using_license_attributes(verbose=True, dry_run=True)

@pytest.mark.scanslow
def test_update_composite_rules_using_license_attributes(self):
from licensedcode.required_phrases import update_composite_rules_using_license_attributes
update_composite_rules_using_license_attributes(verbose=True, dry_run=True)


class TestCompositeRulesAnnotation(TestCaseClass):

def test_composite_rule_marks_both_phrases(self):
from licensedcode.required_phrases import add_required_phrase_to_rule

rule = Rule(
license_expression="mit AND apache-2.0",
identifier="mit_and_apache-2.0_test.RULE",
text="Licensed under the MIT License or the Apache License.",
is_license_notice=True,
)

added_mit = add_required_phrase_to_rule(
rule=rule,
required_phrase="MIT License",
source="composite",
dry_run=True,
)
assert added_mit
assert "{{MIT License}}" in rule.text

added_apache = add_required_phrase_to_rule(
rule=rule,
required_phrase="Apache License",
source="composite",
dry_run=True,
)
assert added_apache
assert "{{Apache License}}" in rule.text

def test_composite_rule_no_double_marking(self):
from licensedcode.required_phrases import add_required_phrase_to_rule

rule = Rule(
license_expression="mit AND apache-2.0",
identifier="mit_and_apache-2.0_test.RULE",
text="Licensed under the {{MIT License}} or the Apache License.",
is_license_notice=True,
)

added = add_required_phrase_to_rule(
rule=rule,
required_phrase="MIT License",
source="composite",
dry_run=True,
)
assert not added

def test_composite_rule_three_keys_all_marked(self):
from licensedcode.required_phrases import add_required_phrase_to_rule

rule = Rule(
license_expression="mit AND apache-2.0 AND bsd-new",
identifier="triple_test.RULE",
text="Dual licensed: MIT License, Apache License, and BSD License.",
is_license_notice=True,
)

add_required_phrase_to_rule(rule=rule, required_phrase="MIT License", source="", dry_run=True)
add_required_phrase_to_rule(rule=rule, required_phrase="Apache License", source="", dry_run=True)
add_required_phrase_to_rule(rule=rule, required_phrase="BSD License", source="", dry_run=True)

assert "{{MIT License}}" in rule.text
assert "{{Apache License}}" in rule.text
assert "{{BSD License}}" in rule.text

def test_composite_rule_overlapping_spans_handled(self):
from licensedcode.required_phrases import add_required_phrase_to_rule

rule = Rule(
license_expression="mit AND mit-0",
identifier="overlap_test.RULE",
text="Released under the MIT License terms.",
is_license_notice=True,
)

added = add_required_phrase_to_rule(
rule=rule, required_phrase="MIT License", source="", dry_run=True,
)
assert added
assert "{{MIT License}}" in rule.text

# "MIT" overlaps with already marked span, should not double mark
added2 = add_required_phrase_to_rule(
rule=rule, required_phrase="MIT", source="", dry_run=True,
)
assert not added2
Loading