diff --git a/src/licensedcode/required_phrases.py b/src/licensedcode/required_phrases.py index f99af2c09d..4b8e165b1b 100644 --- a/src/licensedcode/required_phrases.py +++ b/src/licensedcode/required_phrases.py @@ -593,6 +593,123 @@ def update_rules_using_license_attributes( dry_run=dry_run, ) +def update_composite_rules_using_license_attributes( + license_expression=None, + write_phrase_source=False, + verbose=False, + dry_run=False, +): + """ + Add required phrases to composite (multi license) rules using license attributes + + For each composite rule, parse its expression into individual license keys, then collect + candidate phrases for each key from the license database and from is_required_phrase rules. + A rule is only marked if ALL keys in the expression have at least one matching phrase found + in the rule text + """ + licenses_by_key = get_licenses_db() + licensing = Licensing() + + # collect known required phrase texts per license key from is_required_phrase rules + is_required_phrases_by_key = {} + all_rules_by_expression = get_base_rules_by_expression() + for expression, rules in all_rules_by_expression.items(): + for rule in rules: + if rule.is_required_phrase: + if expression not in is_required_phrases_by_key: + is_required_phrases_by_key[expression] = [] + is_required_phrases_by_key[expression].append(rule.text.strip()) + + rules_by_expression = get_updatable_rules_by_expression( + license_expression, simple_expression=False, + ) + + for expression, rules in rules_by_expression.items(): + try: + keys = licensing.license_keys(expression, unique=True) + except Exception: + if verbose: + click.echo(f' Skipping unparseable expression: {expression}') + continue + + # single key expressions are handled by --from-license-attributes + if len(keys) < 2: + continue + + # collect candidate phrases for each key from license attributes and + # is_required_phrase rules, longest first so we match the most specific + phrases_by_key = {} + skip_expression = False + for key in keys: + lic = licenses_by_key.get(key) + if not lic: + skip_expression = True + break + + candidates = [] + if lic.name: + candidates.append(lic.name) + if lic.short_name and lic.short_name != lic.name: + candidates.append(lic.short_name) + if lic.spdx_license_key and lic.spdx_license_key not in candidates: + candidates.append(lic.spdx_license_key) + # add texts from is_required_phrase rules for this key + for phrase_text in is_required_phrases_by_key.get(key, []): + if phrase_text not in candidates: + candidates.append(phrase_text) + candidates.sort(key=len, reverse=True) + phrases_by_key[key] = candidates + + if skip_expression or not phrases_by_key: + continue + + if verbose: + click.echo(f'Processing composite expression: {expression}') + + for rule in rules: + # try to find a matching phrase for each key in the rule text + matched_phrases = {} + all_keys_found = True + + for key in keys: + candidates = phrases_by_key.get(key, []) + found = False + for phrase in candidates: + spans = find_phrase_spans_in_text( + text=rule.text, + phrase_text=phrase, + ) + if spans: + matched_phrases[key] = phrase + found = True + break + if not found: + all_keys_found = False + break + + if not all_keys_found: + continue + + # all keys matched, inject markers for each phrase + for key, phrase in matched_phrases.items(): + source = rule.source or "" + if write_phrase_source: + source += f" {key}.LICENSE : composite" + + add_required_phrase_to_rule( + rule=rule, + required_phrase=phrase, + source=source, + dry_run=dry_run, + ) + + if verbose: + click.echo( + f' {rule.identifier}: marked phrases: ' + f'{list(matched_phrases.values())}' + ) + + #################################################################################################### # # Inject new required phrase in rules @@ -629,6 +746,15 @@ def delete_required_phrase_rules_source_debug(rules_data_dir): "Mutually exclusive with --from-other-rule.", cls=PluggableCommandLineOption, ) +@click.option( + "-c", + "--composite-rules", + is_flag=True, + default=False, + help="Add required phrases to composite (multi license) rules using license attributes. " + "Only marks a rule if all license keys in the expression have a matching phrase in the text", + cls=PluggableCommandLineOption, +) @click.option( "-l", "--license-expression", @@ -691,6 +817,7 @@ def delete_required_phrase_rules_source_debug(rules_data_dir): def add_required_phrases( from_other_rules, from_license_attributes, + composite_rules, license_expression, validate, reindex, @@ -726,6 +853,15 @@ def add_required_phrases( verbose=verbose, ) + elif composite_rules: + click.echo('Updating composite rules from license attributes.') + update_composite_rules_using_license_attributes( + license_expression=license_expression, + write_phrase_source=write_phrase_source, + dry_run=dry_run, + verbose=verbose, + ) + validate_and_reindex(validate, reindex, verbose) diff --git a/tests/licensedcode/test_required_phrases.py b/tests/licensedcode/test_required_phrases.py index 860ccc70f5..0cf50dd4a5 100644 --- a/tests/licensedcode/test_required_phrases.py +++ b/tests/licensedcode/test_required_phrases.py @@ -180,3 +180,97 @@ def test_update_rules_using_is_required_phrases_rules(self): @pytest.mark.scanslow def test_update_rules_using_license_attributes(self): update_rules_using_license_attributes(verbose=True, dry_run=True) + + @pytest.mark.scanslow + def test_update_composite_rules_using_license_attributes(self): + from licensedcode.required_phrases import update_composite_rules_using_license_attributes + update_composite_rules_using_license_attributes(verbose=True, dry_run=True) + + +class TestCompositeRulesAnnotation(TestCaseClass): + + def test_composite_rule_marks_both_phrases(self): + from licensedcode.required_phrases import add_required_phrase_to_rule + + rule = Rule( + license_expression="mit AND apache-2.0", + identifier="mit_and_apache-2.0_test.RULE", + text="Licensed under the MIT License or the Apache License.", + is_license_notice=True, + ) + + added_mit = add_required_phrase_to_rule( + rule=rule, + required_phrase="MIT License", + source="composite", + dry_run=True, + ) + assert added_mit + assert "{{MIT License}}" in rule.text + + added_apache = add_required_phrase_to_rule( + rule=rule, + required_phrase="Apache License", + source="composite", + dry_run=True, + ) + assert added_apache + assert "{{Apache License}}" in rule.text + + def test_composite_rule_no_double_marking(self): + from licensedcode.required_phrases import add_required_phrase_to_rule + + rule = Rule( + license_expression="mit AND apache-2.0", + identifier="mit_and_apache-2.0_test.RULE", + text="Licensed under the {{MIT License}} or the Apache License.", + is_license_notice=True, + ) + + added = add_required_phrase_to_rule( + rule=rule, + required_phrase="MIT License", + source="composite", + dry_run=True, + ) + assert not added + + def test_composite_rule_three_keys_all_marked(self): + from licensedcode.required_phrases import add_required_phrase_to_rule + + rule = Rule( + license_expression="mit AND apache-2.0 AND bsd-new", + identifier="triple_test.RULE", + text="Dual licensed: MIT License, Apache License, and BSD License.", + is_license_notice=True, + ) + + add_required_phrase_to_rule(rule=rule, required_phrase="MIT License", source="", dry_run=True) + add_required_phrase_to_rule(rule=rule, required_phrase="Apache License", source="", dry_run=True) + add_required_phrase_to_rule(rule=rule, required_phrase="BSD License", source="", dry_run=True) + + assert "{{MIT License}}" in rule.text + assert "{{Apache License}}" in rule.text + assert "{{BSD License}}" in rule.text + + def test_composite_rule_overlapping_spans_handled(self): + from licensedcode.required_phrases import add_required_phrase_to_rule + + rule = Rule( + license_expression="mit AND mit-0", + identifier="overlap_test.RULE", + text="Released under the MIT License terms.", + is_license_notice=True, + ) + + added = add_required_phrase_to_rule( + rule=rule, required_phrase="MIT License", source="", dry_run=True, + ) + assert added + assert "{{MIT License}}" in rule.text + + # "MIT" overlaps with already marked span, should not double mark + added2 = add_required_phrase_to_rule( + rule=rule, required_phrase="MIT", source="", dry_run=True, + ) + assert not added2