diff --git a/.github/workflows/commoncode-release.yml b/.github/workflows/commoncode-release.yml index 467e6ac234b..ca127d29656 100644 --- a/.github/workflows/commoncode-release.yml +++ b/.github/workflows/commoncode-release.yml @@ -1,4 +1,4 @@ -name: Create library release archives, create a GH release and publish PyPI wheel and sdist on tag in main branch +name: Create and release commoncode wheels on GitHub and Pypi # This is executed automatically on a tag in the main branch diff --git a/.github/workflows/licensedcode-data-index-release.yml b/.github/workflows/licensedcode-data-index-release.yml index ba267f89f5f..353829d1905 100644 --- a/.github/workflows/licensedcode-data-index-release.yml +++ b/.github/workflows/licensedcode-data-index-release.yml @@ -1,4 +1,4 @@ -name: Create library release archives, create a GH release and publish PyPI wheel and sdist on tag in main branch +name: Create and release licensedcode index & data wheels on GitHub and Pypi # This is executed automatically on a tag in the main branch diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 3ca4f7e915e..b0abbc0ec64 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -183,6 +183,20 @@ jobs: venv/bin/scancode -i --verbose samples/ -n3 --json foo.json; done +################################################################################ +# Tests with released commoncode instead of local editable commoncode +################################################################################ + + - template: etc/ci/azure-posix.yml + parameters: + job_name: ubuntu_test_released_commocode + image_name: ubuntu-22.04 + python_versions: ['3.14'] + python_architecture: x64 + test_suites: + all: + venv/bin/pip uninstall -y commoncode && venv/bin/pip install commoncode && venv/bin/pytest -n 2 -vvs tests/scancode/test_cli.py --reruns 2 + ################################################################################ # Tests using a plain pip install to get the latest of all wheels diff --git a/commoncode-CHANGELOG.rst b/commoncode-CHANGELOG.rst index dc63866360b..2d56e748149 100644 --- a/commoncode-CHANGELOG.rst +++ b/commoncode-CHANGELOG.rst @@ -1,6 +1,29 @@ Release notes ============= +Version 32.5.2 - (2026-06-11) +----------------------------- + +- Bump version properly. + +Version 32.5.1 - (2026-06-11) +----------------------------- + +- Minor fix in pyproject.toml to release wheels + to pypi properly. + +Version 32.5.0 - (2026-06-11) +----------------------------- + +- Merge commoncode back into scancode-toolkit + https://github.com/aboutcode-org/scancode-toolkit/pull/5116 + +- Add support to create codebase from multiple input paths by + starting codebase walk from these inputs and then ignoring + based on path patterns. Improves codebase and resource + collection and creation performance for multi-path scan inputs + https://github.com/aboutcode-org/scancode-toolkit/pull/5055 + Version 32.4.2 - (2025-01-08) ----------------------------- diff --git a/configure b/configure index 5262999ce74..d278901062e 100755 --- a/configure +++ b/configure @@ -256,6 +256,15 @@ install_packages() { $1 } +install_packages_with_local() { + # commoncode is present as dependencies of dependencies and so + # we need to install commoncode from local source first so this + # is tested and not the released commoncode + "$CFG_BIN_DIR/flot" --pyproject pyproject-commoncode.toml + "$CFG_BIN_DIR/pip" install ./dist/commoncode*.whl + install_packages "$CFG_REQUIREMENTS" +} + ################################ cli_help() { @@ -313,7 +322,7 @@ PIP_EXTRA_ARGS="$PIP_EXTRA_ARGS" find_python create_virtualenv "$VIRTUALENV_DIR" install_packages "$FLOT_REQUIREMENTS" -install_packages "$CFG_REQUIREMENTS" +install_packages_with_local . "$CFG_BIN_DIR/activate" "$CFG_BIN_DIR/scancode-train-gibberish-model" diff --git a/configure.bat b/configure.bat index b4f61216441..0bf0b8615bb 100644 --- a/configure.bat +++ b/configure.bat @@ -162,6 +162,9 @@ if %ERRORLEVEL% neq 0 ( %PIP_EXTRA_ARGS% ^ %FLOT_REQUIREMENTS% +"%CFG_BIN_DIR%\flot" --pyproject pyproject-commoncode.toml +"%CFG_BIN_DIR%\pip" install ./dist/commoncode*.whl + "%CFG_BIN_DIR%\pip" install ^ --upgrade ^ %CFG_QUIET% ^ diff --git a/docs/source/reference/scancode-cli/cli-core-options.rst b/docs/source/reference/scancode-cli/cli-core-options.rst index e945d245da0..e89374f1a4f 100644 --- a/docs/source/reference/scancode-cli/cli-core-options.rst +++ b/docs/source/reference/scancode-cli/cli-core-options.rst @@ -145,3 +145,98 @@ Comparing progress message options This would scan the file ``samples/levelone/leveltwo/file`` but ignore ``samples/levelone/leveltwo/levelthree/file`` + +---- + +.. _cli-ignore-option: + +``--ignore `` +---------------------- + + In a scan, all files inside the directory specified as an input argument is scanned. But if + there are some files which you don't want to scan, the ``--ignore`` option can be used to do + the same. + + **Example** + + .. code-block:: shell + + scancode --ignore "*.java" samples samples.json + + Here, ScanCode ignores files ending with `.java`, and continues with other files as usual. + + More information on :ref:`glob-pattern-matching`. + +---- + +.. _cli-config-option: + +``--config-file `` +------------------------ + + Path patterns which should be ignored in the scan can also be provided + through a configuration file. + + **Example** + + .. code-block:: shell + + scancode --config-file scancode-config.yaml samples samples.json + + .. code-block:: yaml + + ignored_patterns: + - '*.java' + - '*/licenses/*' + + Here, ScanCode ignores files ending with `.java` and the `licenses` directory, + and continues with other files as usual. + + This is also compatible with the `scancode.io configuration file `_. + +---- + +.. _glob-pattern-matching: + +Glob Pattern Matching +--------------------- + + All the pre-scan options use pattern matching, so the basics of Glob Pattern Matching is + discussed briefly below. + + Glob pattern matching is useful for matching a group of files, by using patterns in their + names. Then using these patterns, files are grouped and treated differently as required. + + Here are some rules from the `Linux Manual `_ + on glob patterns. Refer the same for more detailed information. + + A string is a wildcard pattern if it contains one of the characters '?', '*' or '['. Globbing + is the operation that expands a wildcard pattern into the list of pathnames matching the + pattern. Matching is defined by: + + - A '?' (not between brackets) matches any single character. + + - A '*' (not between brackets) matches any string, including the empty string. + + - An expression "[...]" where the first character after the leading '[' is not an '!' matches a + single character, namely any of the characters enclosed by the brackets. + + - There is one special convention: two characters separated by '-' denote a range. + + - An expression "[!...]" matches a single character, namely any character that is not matched + by the expression obtained by removing the first '!' from it. + + - A '/' in a pathname cannot be matched by a '?' or '*' wildcard, or by a range like "[.-0]". + + Note that wildcard patterns are not regular expressions, although they are a bit similar. + + For more information on glob pattern matching refer these resources: + + - `Linux Manual `_ + - `Wildcard Match Documentation `_. + + You can also import these Python Libraries to practice UNIX style pattern matching: + + - `fnmatch `_ for File Name matching + - `glob `_ for File Path matching + diff --git a/docs/source/reference/scancode-cli/cli-help-text-options.rst b/docs/source/reference/scancode-cli/cli-help-text-options.rst index a3bfb44f776..bd857273874 100644 --- a/docs/source/reference/scancode-cli/cli-help-text-options.rst +++ b/docs/source/reference/scancode-cli/cli-help-text-options.rst @@ -125,8 +125,6 @@ The following help text is displayed for ScanCode version 32.0.0: such that all paths have a common root directory. pre-scan: - --ignore Ignore files matching . - --include Include files matching . --classify Classify files with flags indicating whether the file is a legal, readme, test or similar file. --facet = Add the to files with a path matching @@ -169,11 +167,13 @@ The following help text is displayed for ScanCode version 32.0.0: at the file and directory level. core: + --ignore Ignore files matching . --timeout Stop an unfinished file scan after a timeout in seconds. [default: 120 seconds] -n, --processes INT Set the number of parallel processes to use. Disable parallel processing if 0. Also disable threading if -1. [default: (number of CPUs)-1] + -c, --config-file FILENAME Path to the configuration file. -q, --quiet Do not print summary or progress. -v, --verbose Print progress as file-by-file path instead of a progress bar. Print verbose scan counters. @@ -512,7 +512,7 @@ for ScanCode Version 32.0.0. -------------------------------------------- Plugin: scancode_post_scan:classify class: summarycode.classify_plugin:FileClassifier codebase_attributes: - resource_attributes: is_legal, is_manifest, is_readme, is_top_level, is_key_file + resource_attributes: is_legal, is_manifest, is_readme, is_top_level, is_key_file, is_community sort_order: 4 required_plugins: options: @@ -690,6 +690,19 @@ for ScanCode Version 32.0.0. - packages + -------------------------------------------- + Plugin: scancode_post_scan:todo class: summarycode.todo:AmbiguousDetectionsToDoPlugin + codebase_attributes: todo + resource_attributes: for_todo + sort_order: 3 + required_plugins: + options: + help_group: post-scan, name: todo: --todo + help: Summarize scans by providing all ambiguous detections which are todo items and needs manual review. + doc: + Summarize a scan by compiling review items of ambiguous detections. + + -------------------------------------------- Plugin: scancode_pre_scan:facet class: summarycode.facet:AddFacet codebase_attributes: @@ -705,21 +718,6 @@ for ScanCode Version 32.0.0. test vs. data, etc. - -------------------------------------------- - Plugin: scancode_pre_scan:ignore class: scancode.plugin_ignore:ProcessIgnore - codebase_attributes: - resource_attributes: - sort_order: 100 - required_plugins: - options: - help_group: pre-scan, name: ignore: --ignore - help: Ignore files matching . - help_group: pre-scan, name: include: --include - help: Include files matching . - doc: - Include or ignore files matching patterns. - - -------------------------------------------- Plugin: scancode_scan:copyrights class: cluecode.plugin_copyright:CopyrightScanner codebase_attributes: @@ -761,10 +759,23 @@ for ScanCode Version 32.0.0. Tag a file as generated. + -------------------------------------------- + Plugin: scancode_scan:go_symbol class: go_inspector.plugin:GoSymbolScannerPlugin + codebase_attributes: + resource_attributes: go_symbols + sort_order: 100 + required_plugins: + options: + help_group: primary scans, name: go_symbol: --go-symbol + help: Collect Go symbols. + doc: + Scan a Go binary for symbols using GoReSym. + + -------------------------------------------- Plugin: scancode_scan:info class: scancode.plugin_info:InfoScanner codebase_attributes: - resource_attributes: date, sha1, md5, sha256, mime_type, file_type, programming_language, is_binary, is_text, is_archive, is_media, is_source, is_script + resource_attributes: date, sha1, md5, sha256, sha1_git, mime_type, file_type, programming_language, is_binary, is_text, is_archive, is_media, is_source, is_script sort_order: 0 required_plugins: options: @@ -779,7 +790,7 @@ for ScanCode Version 32.0.0. Plugin: scancode_scan:licenses class: licensedcode.plugin_license:LicenseScanner codebase_attributes: license_detections resource_attributes: detected_license_expression, detected_license_expression_spdx, license_detections, license_clues, percentage_of_license_text - sort_order: 4 + sort_order: 5 required_plugins: options: help_group: primary scans, name: license: -l, --license @@ -804,13 +815,15 @@ for ScanCode Version 32.0.0. Plugin: scancode_scan:packages class: packagedcode.plugin_package:PackageScanner codebase_attributes: packages, dependencies resource_attributes: package_data, for_packages - sort_order: 3 + sort_order: 4 required_plugins: scan:licenses options: help_group: primary scans, name: package: -p, --package help: Scan for application package and dependency manifests, lockfiles and related data. help_group: primary scans, name: system_package: --system-package help: Scan for installed system package databases. + help_group: primary scans, name: package_in_compiled: --package-in-compiled + help: Scan for package and dependency related data in compiled binaries. Currently supported compiled binaries: Go, Rust. help_group: primary scans, name: package_only: --package-only help: Scan for system and application package data and skip license/copyright detection and top-level package creation. help_group: documentation, name: list_packages: --list-packages @@ -821,6 +834,19 @@ for ScanCode Version 32.0.0. level. + -------------------------------------------- + Plugin: scancode_scan:rust_symbol class: rust_inspector.plugin:RustSymbolScannerPlugin + codebase_attributes: + resource_attributes: rust_symbols + sort_order: 100 + required_plugins: + options: + help_group: primary scans, name: rust_symbol: --rust-symbol + help: Collect Rust symbols from rust binaries. + doc: + Scan a Rust binary for symbols using blint, lief and symbolic. + + -------------------------------------------- Plugin: scancode_scan:urls class: cluecode.plugin_url:UrlScanner codebase_attributes: diff --git a/docs/source/reference/scancode-cli/cli-post-scan-options.rst b/docs/source/reference/scancode-cli/cli-post-scan-options.rst index 690b4d27a22..e3a9a628a64 100644 --- a/docs/source/reference/scancode-cli/cli-post-scan-options.rst +++ b/docs/source/reference/scancode-cli/cli-post-scan-options.rst @@ -17,6 +17,56 @@ To see all plugins available via command line help, use ``--plugins``. ---- +.. _cli-classify-option: + +``--classify`` +-------------- + + .. admonition:: Sub-option + + The options ``--license-clarity-score`` and ``--tallies-key-files`` are sub-options of + ``--classify``. ``--license-clarity-score`` and ``--tallies-key-files`` are Post-Scan + Options. + + **Example** + + .. code-block:: shell + + scancode -clpieu --json-pp sample_facet.json samples --classify + + This option makes ScanCode further classify scanned files/directories, to determine whether they + fall in these following categories + + - legal + - readme + - top-level + - manifest + + A manifest file in computing is a file containing metadata for a group of accompanying + files that are part of a set or coherent unit. + + - key-file + + A KEY file serves as a keystone element, containing essential + information about a software package — such as its dependencies, + versioning, licensing, and more. It often contains the + ``primary-license`` or the overall license of the package, among + other package metadata which are general or ecosystem specific. + + As in, to the JSON object of each file scanned, these extra attributes are added. + + .. code-block:: json + + { + "is_legal": false, + "is_manifest": false, + "is_readme": true, + "is_top_level": true, + "is_key_file": true + } + +---- + .. _cli-mark-source-option: ``--mark-source`` diff --git a/docs/source/reference/scancode-cli/cli-pre-scan-options.rst b/docs/source/reference/scancode-cli/cli-pre-scan-options.rst index 45379bf94ac..ad19570b211 100644 --- a/docs/source/reference/scancode-cli/cli-pre-scan-options.rst +++ b/docs/source/reference/scancode-cli/cli-pre-scan-options.rst @@ -11,99 +11,6 @@ Quick reference ---- -.. _cli-ignore-option: - -``--ignore `` ----------------------- - - In a scan, all files inside the directory specified as an input argument is scanned. But if - there are some files which you don't want to scan, the ``--ignore`` option can be used to do - the same. - - **Example** - - .. code-block:: shell - - scancode --ignore "*.java" samples samples.json - - Here, ScanCode ignores files ending with `.java`, and continues with other files as usual. - - More information on :ref:`glob-pattern-matching`. - ----- - -.. _cli-include-option: - -``--include `` ------------------------ - - In a normal scan, all files inside the directory specified as an input argument is scanned. But - if you want to run the scan on only some selective files, then ``--include`` option can be used - to do the same. - - **Example** - - .. code-block:: shell - - scancode --include "*.java" samples samples.json - - Here, ScanCode selectively scans files that has names ending with `.java`, and ignores all other files. This - is basically complementary in behavior to the ``--ignore`` option. - - See also :ref:`glob-pattern-matching`. - ----- - -.. _cli-classify-option: - -``--classify`` --------------- - - .. admonition:: Sub-option - - The options ``--license-clarity-score`` and ``--tallies-key-files`` are sub-options of - ``--classify``. ``--license-clarity-score`` and ``--tallies-key-files`` are Post-Scan - Options. - - **Example** - - .. code-block:: shell - - scancode -clpieu --json-pp sample_facet.json samples --classify - - This option makes ScanCode further classify scanned files/directories, to determine whether they - fall in these following categories - - - legal - - readme - - top-level - - manifest - - A manifest file in computing is a file containing metadata for a group of accompanying - files that are part of a set or coherent unit. - - - key-file - - A KEY file serves as a keystone element, containing essential - information about a software package — such as its dependencies, - versioning, licensing, and more. It often contains the - ``primary-license`` or the overall license of the package, among - other package metadata which are general or ecosystem specific. - - As in, to the JSON object of each file scanned, these extra attributes are added. - - .. code-block:: json - - { - "is_legal": false, - "is_manifest": false, - "is_readme": true, - "is_top_level": true, - "is_key_file": true - } - ----- - .. _cli-facet-option: ``--facet =`` @@ -154,50 +61,3 @@ Quick reference multiple facets, this whole part is repeated, including the ``--facet`` option. See :ref:`facets` to learn more about what a facet is. - ----- - -.. _glob-pattern-matching: - -Glob Pattern Matching ---------------------- - - All the pre-scan options use pattern matching, so the basics of Glob Pattern Matching is - discussed briefly below. - - Glob pattern matching is useful for matching a group of files, by using patterns in their - names. Then using these patterns, files are grouped and treated differently as required. - - Here are some rules from the `Linux Manual `_ - on glob patterns. Refer the same for more detailed information. - - A string is a wildcard pattern if it contains one of the characters '?', '*' or '['. Globbing - is the operation that expands a wildcard pattern into the list of pathnames matching the - pattern. Matching is defined by: - - - A '?' (not between brackets) matches any single character. - - - A '*' (not between brackets) matches any string, including the empty string. - - - An expression "[...]" where the first character after the leading '[' is not an '!' matches a - single character, namely any of the characters enclosed by the brackets. - - - There is one special convention: two characters separated by '-' denote a range. - - - An expression "[!...]" matches a single character, namely any character that is not matched - by the expression obtained by removing the first '!' from it. - - - A '/' in a pathname cannot be matched by a '?' or '*' wildcard, or by a range like "[.-0]". - - Note that wildcard patterns are not regular expressions, although they are a bit similar. - - For more information on glob pattern matching refer these resources: - - - `Linux Manual `_ - - `Wildcard Match Documentation `_. - - You can also import these Python Libraries to practice UNIX style pattern matching: - - - `fnmatch `_ for File Name matching - - `glob `_ for File Path matching - diff --git a/docs/source/rst-snippets/cli-core-options.rst b/docs/source/rst-snippets/cli-core-options.rst index 8bf86b167af..0f589b2c6c0 100644 --- a/docs/source/rst-snippets/cli-core-options.rst +++ b/docs/source/rst-snippets/cli-core-options.rst @@ -1,37 +1,40 @@ **Core options** ---------------- --n, --processes INTEGER Scan ```` using n parallel processes. +--ignore Ignore files matching ````. - Default: ``(number of CPUs)-1`` +-n, --processes INTEGER Scan ```` using n parallel processes. --v, --verbose Print verbose file-by-file progress messages. + Default: ``(number of CPUs)-1`` --q, --quiet Do not print summary or progress messages. +-c, --config-file FILENAME Path to the configuration file. +-v, --verbose Print verbose file-by-file progress messages. ---timeout FLOAT Stop scanning a file if scanning takes longer - than a timeout in seconds. +-q, --quiet Do not print summary or progress messages. - Default: ``120`` +--timeout FLOAT Stop scanning a file if scanning takes longer + than a timeout in seconds. ---from-json Load codebase from one or more existing JSON scans to: + Default: ``120`` - - apply post-scan options to do additional processing - of scan results - - merge multiple JSON scans into one. +--from-json Load codebase from one or more existing JSON scans to: ---max-in-memory INTEGER Maximum number of files and directories scan - details kept in memory during a scan. - Additional files and directories scan details - above this number are cached on-disk rather - than in memory. Use 0 to use unlimited memory - and disable on-disk caching. Use -1 to use - only on-disk caching. + - apply post-scan options to do additional processing + of scan results + - merge multiple JSON scans into one. - Default: ``10000`` +--max-in-memory INTEGER Maximum number of files and directories scan + details kept in memory during a scan. + Additional files and directories scan details + above this number are cached on-disk rather + than in memory. Use 0 to use unlimited memory + and disable on-disk caching. Use -1 to use + only on-disk caching. ---max-depth INTEGER Descend at most INTEGER levels of directories - including and below the starting point. INTEGER - must be positive or zero for no limit. + Default: ``10000`` - Default: ``0`` +--max-depth INTEGER Descend at most INTEGER levels of directories + including and below the starting point. INTEGER + must be positive or zero for no limit. + + Default: ``0`` diff --git a/docs/source/rst-snippets/cli-pre-scan-options.rst b/docs/source/rst-snippets/cli-pre-scan-options.rst index b880e92abc3..f7361bb1b62 100644 --- a/docs/source/rst-snippets/cli-pre-scan-options.rst +++ b/docs/source/rst-snippets/cli-pre-scan-options.rst @@ -1,10 +1,6 @@ **Pre-scan options** -------------------- ---ignore Ignore files matching ````. - ---include Include files matching ````. - --facet Here ```` represents ``=``. Add the ```` to files with a path matching ````. diff --git a/pyproject-commoncode.toml b/pyproject-commoncode.toml index 6c69ab439b3..3ea4920d29c 100644 --- a/pyproject-commoncode.toml +++ b/pyproject-commoncode.toml @@ -4,7 +4,7 @@ build-backend = "flot.buildapi" [project] name = "commoncode" -version = "32.4.2" +version = "32.5.2" authors = [ { name = "nexB. Inc. and others", email = "info@aboutcode.org" }, ] @@ -42,9 +42,6 @@ metadata_files = [ requires-python = ">=3.10" -[project.urls] -Homepage = "https://github.com/nexB/scancode-toolkit" - dependencies = [ "attrs >= 18.1,!=20.1.0;python_version<'3.11'", "attrs >= 22.1.0;python_version>='3.11'", @@ -55,6 +52,8 @@ dependencies = [ "text_unidecode >= 1.0" ] +[project.urls] +Homepage = "https://github.com/nexB/scancode-toolkit" [project.optional-dependencies] dev = [ diff --git a/pyproject-scancode-toolkit-mini.toml b/pyproject-scancode-toolkit-mini.toml index a816bb2de74..b40f4d07fe5 100644 --- a/pyproject-scancode-toolkit-mini.toml +++ b/pyproject-scancode-toolkit-mini.toml @@ -256,7 +256,6 @@ scancode-train-gibberish-model = "textcode.train_gibberish_model:train_gibberish # scancode_pre_scan is the entry point for pre_scan plugins executed before the # scans. See also plugincode.pre_scan module for details and doc. [project.entry-points.scancode_pre_scan] -ignore = "scancode.plugin_ignore:ProcessIgnore" facet = "summarycode.facet:AddFacet" diff --git a/pyproject-scancode-toolkit.toml b/pyproject-scancode-toolkit.toml index 407d65b9c43..29f8157b8dc 100644 --- a/pyproject-scancode-toolkit.toml +++ b/pyproject-scancode-toolkit.toml @@ -257,7 +257,6 @@ scancode-train-gibberish-model = "textcode.train_gibberish_model:train_gibberish # scancode_pre_scan is the entry point for pre_scan plugins executed before the # scans. See also plugincode.pre_scan module for details and doc. [project.entry-points.scancode_pre_scan] -ignore = "scancode.plugin_ignore:ProcessIgnore" facet = "summarycode.facet:AddFacet" diff --git a/pyproject.toml b/pyproject.toml index cbd405f2bd5..f2371c6bb2b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -262,7 +262,6 @@ scancode-train-gibberish-model = "textcode.train_gibberish_model:train_gibberish # scancode_pre_scan is the entry point for pre_scan plugins executed before the # scans. See also plugincode.pre_scan module for details and doc. [project.entry-points.scancode_pre_scan] -ignore = "scancode.plugin_ignore:ProcessIgnore" facet = "summarycode.facet:AddFacet" diff --git a/src/commoncode/resource.py b/src/commoncode/resource.py index a635a0a860a..70e549cc924 100644 --- a/src/commoncode/resource.py +++ b/src/commoncode/resource.py @@ -40,6 +40,7 @@ from commoncode.datautils import List from commoncode.datautils import Mapping from commoncode.datautils import String +from commoncode.fileset import is_included from commoncode.filetype import is_file as filetype_is_file from commoncode.filetype import is_special from commoncode.fileutils import as_posixpath @@ -48,6 +49,7 @@ from commoncode.fileutils import file_name from commoncode.fileutils import parent_directory from commoncode.fileutils import splitext_name +from commoncode.system import to_os_native_path """ This module provides Codebase and Resource objects as an abstraction for files @@ -63,7 +65,7 @@ # Tracing flags TRACE = False -TRACE_DEEP = False +TRACE_DEEP = True def logger_debug(*args): @@ -99,7 +101,7 @@ def skip_ignored(location): if TRACE_DEEP: logger_debug() logger_debug( - "Codebase.populate: walk: ignored loc:", + "Codebase.populate: walk: skip_ignored:", location, "ignored:", ignored(location), @@ -110,6 +112,42 @@ def skip_ignored(location): return is_special(location) or ignored(location) +def is_ignored(location, includes=tuple(), excludes=tuple()): + + excludes = { + pattern: 'User ignore: Supplied by --ignore' for pattern in excludes + } + + includes = { + pattern: 'User include: Supplied by --include' for pattern in includes + } + + included_from_options = is_included( + path=location, + includes=includes, + excludes=excludes, + ) + + if TRACE_DEEP: + logger_debug( + "Codebase.populate: walk: is_ignored:", + "is_ignored: location:", + location, + "included_from_options:", + included_from_options, + "skip_ignored", + skip_ignored(location) + ) + + if skip_ignored(location) or not included_from_options: + if TRACE_DEEP: + logger_debug("is_ignored: location:", location, "is_skipped",) + + return True + + return False + + def depth_walk( root_location, max_depth, @@ -203,6 +241,8 @@ class Codebase: __slots__ = ( "max_depth", "location", + "includes", + "ignores", "has_single_resource", "resource_attributes", "resource_class", @@ -237,6 +277,8 @@ def __init__( max_in_memory=10000, max_depth=0, paths=tuple(), + ignores=tuple(), + includes=tuple(), *args, **kwargs, ): @@ -299,6 +341,8 @@ def __init__( # finally populate self.paths = self._prepare_clean_paths(paths) + self.includes = self._prepare_clean_paths(includes) + self.ignores = ignores self._populate() def _prepare_clean_paths(self, paths=tuple()): @@ -462,11 +506,17 @@ def _populate(self): return if self.paths: - return self._create_resources_from_paths(root=root, paths=self.paths) + # In case of a list of full paths, we create resources without walking + return self._create_resources_from_full_paths(root=root, paths=self.paths) + # In case we have multiple else: - return self._create_resources_from_root(root=root) + return self._create_resources_from_root( + root=root, + includes=self.includes, + ignores=self.ignores, + ) - def _create_resources_from_paths(self, root, paths): + def _create_resources_from_full_paths(self, root, paths): # without paths we iterate the provided paths. We report an error # if a path is missing on disk. @@ -484,22 +534,21 @@ def _create_resources_from_paths(self, root, paths): msg = f"ERROR: cannot populate codebase: path: {path!r} not found in {res_loc!r}" self.errors.append(msg) raise Exception(path, join(base_location, path)) - continue # create all parents. The last parent is the one we want to use parent = root if TRACE: - logger_debug("Codebase._create_resources_from_paths: parent", parent) + logger_debug("Codebase._create_resources_from_full_paths: parent", parent) for parent_path in get_ancestor_paths(path, include_self=False): if TRACE: logger_debug( - f" Codebase._create_resources_from_paths: parent_path: {parent_path!r}" + f" Codebase._create_resources_from_full_paths: parent_path: {parent_path!r}" ) if not parent_path: continue newpar = parents_by_path.get(parent_path) if TRACE: - logger_debug(" Codebase._create_resources_from_paths: newpar", repr(newpar)) + logger_debug(" Codebase._create_resources_from_full_paths: newpar", repr(newpar)) if not newpar: newpar = self._get_or_create_resource( @@ -510,7 +559,7 @@ def _create_resources_from_paths(self, root, paths): ) if not newpar: raise Exception( - "ERROR: Codebase._create_resources_from_paths:" + "ERROR: Codebase._create_resources_from_full_paths:" f" cannot create parent for: {parent_path!r}" ) parent = newpar @@ -519,7 +568,7 @@ def _create_resources_from_paths(self, root, paths): if TRACE: logger_debug( - f" Codebase._create_resources_from_paths:", + f" Codebase._create_resources_from_full_paths:", f"created newpar: {newpar!r}", ) @@ -530,15 +579,15 @@ def _create_resources_from_paths(self, root, paths): is_file=isfile(res_loc), ) if TRACE: - logger_debug("Codebase._create_resources_from_paths: resource", res) + logger_debug("Codebase._create_resources_from_full_paths: resource", res) - def _create_resources_from_root(self, root): - # without paths we walks the root location top-down + def _create_resources_from_root(self, root, includes, ignores): + # without paths we walk the root location top-down # track resources parents by location during construction. # NOTE: this cannot exhaust memory on a large codebase, because we do # not keep parents already walked and we walk topdown. - parents_by_loc = {root.location: root} + parents_by_loc = {to_os_native_path(root.location): root} def err(_error): """os.walk error handler""" @@ -546,22 +595,55 @@ def err(_error): f"ERROR: cannot populate codebase: {_error}\n{traceback.format_exc()}" ) - # Walk over the directory and build the resource tree - for top, dirs, files in depth_walk( - root_location=root.location, - max_depth=self.max_depth, - error_handler=err, - ): - parent = parents_by_loc.pop(top) - for created in self._create_resources( - parent=parent, - top=top, - dirs=dirs, - files=files, + # ignore creating resources based on path patterns + skip_ignored = partial(is_ignored, excludes=ignores) + + if TRACE_DEEP: + logger_debug(f"parents_by_loc: {parents_by_loc}, ignores: {ignores}, includes: {includes}") + + # in the case of a single input location, walking starts from + # the root and only the root location + if not includes: + includes = [root.location] + else: + # create the directory resources between the common + # prefix and the included locations so that they are + # connected to the root + for created in self._create_resources_common_prefix_to_inputs( + root=root, + includes=includes, ): - # on the plain, bare FS, files cannot be parents if not created.is_file: - parents_by_loc[created.location] = created + parents_by_loc[to_os_native_path(created.location)] = created + + if TRACE_DEEP: + logger_debug(f"parents_by_loc: {parents_by_loc}") + + # we start walking through all the input locations + for included_location in includes: + # Walk over the directory and build the resource tree + for top, dirs, files in depth_walk( + root_location=included_location, + skip_ignored=skip_ignored, + max_depth=self.max_depth, + error_handler=err, + ): + if TRACE_DEEP: + logger_debug(f"parents_by_loc: {parents_by_loc}") + try: + parent = parents_by_loc.pop(top) + except KeyError: + raise Exception(parents_by_loc, includes, root.location, ) + for created in self._create_resources( + parent=parent, + top=top, + dirs=dirs, + files=files, + skip_ignored=skip_ignored, + ): + # on the plain, bare FS, files cannot be parents + if not created.is_file: + parents_by_loc[to_os_native_path(created.location)] = created def _create_resources(self, parent, top, dirs, files, skip_ignored=skip_ignored): """ @@ -575,6 +657,8 @@ def _create_resources(self, parent, top, dirs, files, skip_ignored=skip_ignored) for name in names: location = join(top, name) if skip_ignored(location): + if TRACE_DEEP: + logger_debug(f"_create_resources, depth_walk loop: ignored location: {location}") continue res = self._get_or_create_resource( name=name, @@ -585,6 +669,28 @@ def _create_resources(self, parent, top, dirs, files, skip_ignored=skip_ignored) logger_debug("Codebase.create_resources:", res) yield res + def _create_resources_common_prefix_to_inputs(self, root, includes): + + if TRACE_DEEP: + logger_debug(f"_create_resources_common_prefix_to_inputs: root:{root.location}, includes: {includes}") + + for included_path in includes: + _, _, extra_dir_path = included_path.rpartition(root.location) + extra_dirs = extra_dir_path.strip("/").split("/") + if TRACE_DEEP: + logger_debug(f"_create_resources_common_prefix_to_inputs: root:{root.location}, includes: {includes}") + + dir_resource = root + for dir_segment in extra_dirs: + dir_resource = self._get_or_create_resource( + name=dir_segment, + parent=dir_resource, + is_file=False, + ) + if TRACE: + logger_debug("Codebase.create_resources:", dir_resource) + yield dir_resource + def _create_root_resource(self): """ Create and return the root Resource of this codebase. @@ -1550,8 +1656,8 @@ def clean_path(path): Return a cleaned and normalized POSIX ``path``. """ path = path or "" - # convert to posix and ensure we have no slash at both ends - path = posixpath_normpath(path.replace("\\", "/").strip("/")) + # convert to posix and ensure we have no slash at the end + path = posixpath_normpath(path.replace("\\", "/").rstrip("/")) if path == ".": path = "" return path @@ -1570,8 +1676,8 @@ def strip_first_path_segment(path): '' >>> strip_first_path_segment('foo/bar/baz') 'bar/baz' - >>> strip_first_path_segment('/foo/bar/baz/') - 'bar/baz' + >>> strip_first_path_segment('/foo/bar/baz') + 'foo/bar/baz' >>> strip_first_path_segment('foo/') '' """ diff --git a/src/commoncode/system.py b/src/commoncode/system.py index 0e82a70417f..6bbc8535aef 100644 --- a/src/commoncode/system.py +++ b/src/commoncode/system.py @@ -13,6 +13,15 @@ from commoncode.distro import parse_os_release +def to_os_native_path(path): + """ + Normalize a path to use the native OS path separator. + """ + OS_PATH_SEP = "\\" if on_windows else "/" + + return path.replace("/", OS_PATH_SEP).replace("\\", OS_PATH_SEP).rstrip(OS_PATH_SEP) + + def os_arch(): """ Return a tuple for the current the OS and architecture. diff --git a/src/commoncode/testcase.py b/src/commoncode/testcase.py index af81fc7f5e3..9db033ceb80 100644 --- a/src/commoncode/testcase.py +++ b/src/commoncode/testcase.py @@ -29,7 +29,7 @@ from commoncode.archive import extract_zip_raw from commoncode.archive import tar_can_extract # NOQA from commoncode.system import on_posix -from commoncode.system import on_windows +from commoncode.system import to_os_native_path # a base test dir specific to a given test run # to ensure that multiple tests run can be launched in parallel @@ -39,15 +39,6 @@ timing_threshold = sys.maxsize -def to_os_native_path(path): - """ - Normalize a path to use the native OS path separator. - """ - OS_PATH_SEP = "\\" if on_windows else "/" - - return path.replace("/", OS_PATH_SEP).replace("\\", OS_PATH_SEP).rstrip(OS_PATH_SEP) - - def get_test_loc( test_path, test_data_dir, @@ -93,7 +84,7 @@ class FileDrivenTesting(object): test_data_dir = None - def get_test_loc(self, test_path, copy=False, debug=False, must_exist=True): + def get_test_loc(self, test_path, copy=False, debug=False, must_exist=True, relative=False): """ Given a `test_path` relative to the self.test_data_dir directory, return the location to a test file or directory for this path. Copy to a temp @@ -129,6 +120,11 @@ def get_test_loc(self, test_path, copy=False, debug=False, must_exist=True): # cleanup of VCS that could be left over from checkouts self.remove_vcs(target_dir) test_loc = target_dir + + if relative: + _, _, rel_test_loc = test_loc.rpartition(os.getcwd()) + return rel_test_loc.strip("/").strip("\\") + return test_loc def get_temp_file(self, extension=None, dir_name="td", file_name="tf"): diff --git a/src/scancode/cli.py b/src/scancode/cli.py index 1376c6cfee9..f7fe221c214 100644 --- a/src/scancode/cli.py +++ b/src/scancode/cli.py @@ -17,12 +17,14 @@ import logging import os import platform +import saneyaml import sys import traceback from collections import defaultdict from functools import partial from multiprocessing import TimeoutError +from pathlib import Path from time import sleep from time import time @@ -221,6 +223,25 @@ def default_processes(): callback=validate_input_path, type=click.Path(exists=True, readable=True, path_type=str)) +@click.option('--ignore', + multiple=True, + default=None, + metavar='', + help='Ignore files matching .', + sort_order=10, + help_group=cliutils.CORE_GROUP, + cls=PluggableCommandLineOption, +) + +@click.option('-c', '--config-file', + type=click.File('r'), + required=False, + help='Path to the configuration file.', + sort_order=11, + help_group=cliutils.CORE_GROUP, + cls=PluggableCommandLineOption, +) + @click.option('--strip-root', is_flag=True, default=False, @@ -395,6 +416,8 @@ def default_processes(): def scancode( ctx, input, # NOQA + config_file, + ignore, strip_root, full_root, processes, @@ -505,6 +528,8 @@ def scancode( # run proper success, _results = run_scan( input=input, + ignore=ignore, + config_file=config_file, from_json=from_json, strip_root=strip_root, full_root=full_root, @@ -545,7 +570,9 @@ def scancode( def run_scan( - input, # NOQA + input, # + config_file=None, + ignore=[], from_json=False, strip_root=False, full_root=False, @@ -597,6 +624,9 @@ def echo_func(*_args, **_kwargs): msg = 'At least one input path is required.' raise ScancodeError(msg) + # To support multiple path inputs + include = [] + if not isinstance(input, (list, tuple)): if not isinstance(input, str): msg = 'Unknown format: "{}".'.format(repr(input)) @@ -611,8 +641,6 @@ def echo_func(*_args, **_kwargs): # VirtualCodebase; otherwise we have to process `input` to make it a single # root with excludes. elif not from_json: - # FIXME: support the multiple root better. This is quirky at best - # This is the case where we have a list of input path and the # `from_json` option is not selected: we can handle this IFF they share # a common root directory and none is an absolute path @@ -624,34 +652,39 @@ def echo_func(*_args, **_kwargs): ) raise ScancodeError(msg) + abs_input = [os.path.abspath(i) for i in input] + # find the common prefix directory (note that this is a pre string # operation hence it may return non-existing paths - common_prefix = os.path.commonprefix(input) + common_prefix = os.path.commonprefix(abs_input) if not common_prefix: # we have no common prefix, but all relative. therefore the - # parent/root is the current ddirectory + # parent/root is the current directory common_prefix = str('.') + elif not common_prefix.endswith("/"): + # common prefix has trailing incomplete dirname + # for example the common prefix of "/temp/scancode" + # and "/temp/scans" is "/temp/scan" + common_prefix, _, _ = common_prefix.rpartition("/") elif not os.path.isdir(common_prefix): msg = ( 'Invalid inputs: all input paths must share a ' - 'common single parent directory.' + f'common single parent directory. common part: {common_prefix}' ) raise ScancodeError(msg) - # and we craft a list of synthetic --include path pattern options from - # the input list of paths - included_paths = [as_posixpath(path).rstrip('/') for path in input] - # FIXME: this is a hack as this "include" is from an external plugin!!! - include = list(requested_options.get('include', []) or []) - include.extend(included_paths) - requested_options['include'] = include - - # ... and use the common prefix as our new input + # and we craft a list of include paths where the codebase walks + # will start from, even though the root is the common prefix + include = [as_posixpath(path).rstrip('/') for path in abs_input] input = common_prefix # NOQA + config_ignores = load_configuration_file(config_file) + if config_ignores: + ignore = ignore + tuple(config_ignores) + # build mappings of all options to pass down to plugins standard_options = dict( input=input, @@ -894,6 +927,8 @@ def echo_func(*_args, **_kwargs): try: codebase = codebase_class( location=input, + includes=include, + ignores=ignore, resource_attributes=resource_attributes, codebase_attributes=codebase_attributes, full_root=full_root, @@ -1091,6 +1126,34 @@ def echo_func(*_args, **_kwargs): return success, results +def load_configuration_file(path): + """ + Load scancode configuration values from a file at `path`. + + Currently only supports ignore path patterns specified with + "ignored_patterns". This should be compatible with scancode.io + configuration values whenever possible: + https://scancodeio.readthedocs.io/en/latest/project-configuration.html + """ + ignores = [] + if not path: + return ignores + + click.echo(f"Loading env from {path}") + try: + + config_values = saneyaml.load(path.read()) + ignores = config_values.get("ignored_patterns", []) + except (saneyaml.YAMLError, Exception): + msg = ( + f'Failed to load configuration from "{path}". ' + f"The file format is invalid." + ) + raise ScancodeError(msg + '\n' + traceback.format_exc()) + + return ignores + + def run_codebase_plugins( stage, plugins, diff --git a/src/scancode/outdated.py b/src/scancode/outdated.py index 4be850d8470..2c68dc39e2e 100644 --- a/src/scancode/outdated.py +++ b/src/scancode/outdated.py @@ -83,7 +83,11 @@ def total_seconds(td): class VersionCheckState: - def __init__(self): + def __init__(self, is_test=False): + if is_test: + self.state={} + return + self.statefile_path = os.path.join( scancode_cache_dir, 'scancode-version-check.json') self.lockfile_path = self.statefile_path + '.lockfile' @@ -135,6 +139,7 @@ def check_scancode_version( release_date=scancode_release_date, new_version_url='https://pypi.org/pypi/scancode-toolkit/json', force=False, + is_test=False, ): """ Check for an updated version of scancode-toolkit. Return a message to @@ -146,6 +151,7 @@ def check_scancode_version( installed_version=installed_version, new_version_url=new_version_url, force=force, + is_test=is_test, ) if newer_version: return build_outdated_message( @@ -159,6 +165,7 @@ def fetch_newer_version( installed_version=scancode_version, new_version_url='https://pypi.org/pypi/scancode-toolkit/json', force=False, + is_test=False, ): """ Return a version string if there is an updated version of scancode-toolkit @@ -175,9 +182,10 @@ def fetch_newer_version( try: installed_version = packaging_version.parse(installed_version) - state = VersionCheckState() + state = VersionCheckState(is_test=is_test) current_time = datetime.datetime.utcnow() + latest_version = None # Determine if we need to refresh the state if ('last_check' in state.state and 'latest_version' in state.state): last_check = datetime.datetime.strptime( diff --git a/src/scancode/plugin_ignore.py b/src/scancode/plugin_ignore.py index 70b0e30b10b..3b1b3a06ed0 100644 --- a/src/scancode/plugin_ignore.py +++ b/src/scancode/plugin_ignore.py @@ -37,87 +37,63 @@ def logger_debug(*args): return logger.debug(' '.join(isinstance(a, str) and a or repr(a) for a in args)) -@pre_scan_impl -class ProcessIgnore(PreScanPlugin): +def process_codebase(codebase, ignore=(), include=(), **kwargs): """ - Include or ignore files matching patterns. + WARNING: DEPRECATED, ignore/include moved to codebase import + step in core plugins. + Keep only included and non-ignored Resources in the codebase. """ - options = [ - PluggableCommandLineOption(('--ignore',), - multiple=True, - default=None, - metavar='', - help='Ignore files matching .', - sort_order=10, - help_group=PRE_SCAN_GROUP), - PluggableCommandLineOption(('--include',), - multiple=True, - default=None, - metavar='', - help='Include files matching .', - sort_order=11, - help_group=PRE_SCAN_GROUP) - ] - - def is_enabled(self, ignore, include, **kwargs): - return ignore or include - - def process_codebase(self, codebase, ignore=(), include=(), **kwargs): - """ - Keep only included and non-ignored Resources in the codebase. - """ - - if not (ignore or include): - return - - excludes = { - pattern: 'User ignore: Supplied by --ignore' for pattern in ignore - } - - includes = { - pattern: 'User include: Supplied by --include' for pattern in include - } - - included = partial(is_included, includes=includes, excludes=excludes) - - paths_to_remove = set() - paths_to_remove_add = paths_to_remove.add - paths_to_remove_discard = paths_to_remove.discard - - # Walk codebase top-down to collect the paths of Resources to remove. - for resource in codebase.walk(topdown=True): - if resource.is_root: - continue - - resource_path = resource.path - - if not included(resource_path): - for child in resource.children(codebase): - paths_to_remove_add(child.path) - paths_to_remove_add(resource_path) - else: - # we may have been selected for removal based on a parent dir - # but may be explicitly included. Honor that - paths_to_remove_discard(resource_path) - - if TRACE: - logger_debug('process_codebase: paths_to_remove') - logger_debug(paths_to_remove) - for path in sorted(paths_to_remove): - logger_debug(codebase.get_resource(path)) - - remove_resource = codebase.remove_resource - - # Then, walk bottom-up and remove the non-included Resources from the - # Codebase if the Resource path is in our list of paths to remove. - for resource in codebase.walk(topdown=False): - resource_path = resource.path - if resource.is_root: - continue - # removing dirs will also remove its files - if resource.is_dir: - continue - if resource_path in paths_to_remove: - paths_to_remove_discard(resource_path) - remove_resource(resource) + if not (ignore or include): + return + + excludes = { + pattern: 'User ignore: Supplied by --ignore' for pattern in ignore + } + + includes = { + pattern: 'User include: Supplied by --include' for pattern in include + } + + included = partial(is_included, includes=includes, excludes=excludes) + + paths_to_remove = set() + paths_to_remove_add = paths_to_remove.add + paths_to_remove_discard = paths_to_remove.discard + + # Walk codebase top-down to collect the paths of Resources to remove. + for resource in codebase.walk(topdown=True): + if resource.is_root: + continue + + resource_path = resource.path + + if not included(resource_path): + for child in resource.children(codebase): + paths_to_remove_add(child.path) + paths_to_remove_add(resource_path) + else: + # we may have been selected for removal based on a parent dir + # but may be explicitly included. Honor that + paths_to_remove_discard(resource_path) + + if TRACE: + logger_debug('process_codebase: paths_to_remove') + logger_debug(paths_to_remove) + for path in sorted(paths_to_remove): + logger_debug(codebase.get_resource(path)) + + remove_resource = codebase.remove_resource + + # Then, walk bottom-up and remove the non-included Resources from the + # Codebase if the Resource path is in our list of paths to remove. + for resource in codebase.walk(topdown=False): + resource_path = resource.path + if resource.is_root: + continue + # removing dirs will also remove its files + if resource.is_dir: + continue + if resource_path in paths_to_remove: + paths_to_remove_discard(resource_path) + remove_resource(resource) diff --git a/src/scancode_config.py b/src/scancode_config.py index 20c57a19bef..6e9f634b083 100644 --- a/src/scancode_config.py +++ b/src/scancode_config.py @@ -95,7 +95,7 @@ def _create_dir(location): from subprocess import CalledProcessError # this may fail with exceptions - cmd = 'git', 'describe', '--tags', + cmd = 'git', 'describe', '--tags', '--match="v*"' try: output = check_output(cmd, stderr=STDOUT) __version__ = output.decode('utf-8').strip() diff --git a/tests/commoncode/test_fileset.py b/tests/commoncode/test_fileset.py index ccbfe9df99e..25632ae1a51 100644 --- a/tests/commoncode/test_fileset.py +++ b/tests/commoncode/test_fileset.py @@ -56,6 +56,11 @@ def test_is_included_is_included_exclusions_2(self): assert fileset.is_included("/some/src/this/that", incs, excs) assert not fileset.is_included("/src/dist/build/mylib.so", incs, excs) + def test_is_included_is_included_inside_exclusions(self): + incs = {"/src/*.so": ".scanignore"} + excs = {"/src/*": ".scanignore"} + assert not fileset.is_included("/src/dist/build/mylib.so", incs, excs) + def test_is_included_empty_exclusions(self): incs = {"/src/*": ".scanignore"} excs = {"": ".scanignore"} diff --git a/tests/commoncode/test_resource.py b/tests/commoncode/test_resource.py index b85470eb9d7..07f87dd55e2 100644 --- a/tests/commoncode/test_resource.py +++ b/tests/commoncode/test_resource.py @@ -354,7 +354,7 @@ def test_get_resource_for_multiple_resource_codebase(self): codebase = Codebase(test_codebase) assert codebase.get_resource("resource/a").path == "resource/a" - assert codebase.get_resource("/resource/c").path == "resource/c" + assert codebase.get_resource("resource/c").path == "resource/c" assert codebase.get_resource("resource/dsasda/../b/").path == "resource/b" def test_Resource_build_path(self): diff --git a/tests/licensedcode/test_detect.py b/tests/licensedcode/test_detect.py index 5dbf7b369bf..7459553f2cc 100644 --- a/tests/licensedcode/test_detect.py +++ b/tests/licensedcode/test_detect.py @@ -1075,8 +1075,8 @@ def test_match_has_correct_line_positions_in_automake_perl_file(self): expected = [ # detected, match.lines(), match.qspan, ('gpl-2.0-plus', (12, 25), Span(51, 160)), - ('fsf-unlimited-no-warranty', (231, 238), Span(986, 1049)), - ('warranty-disclaimer', (306, 307), Span(1359, 1381)), + ('fsf-unlimited-no-warranty', (231, 238), Span(998, 1061) ), + ('warranty-disclaimer', (306, 307), Span(1371, 1393)), ] self.check_position('positions/automake.pl', expected) diff --git a/tests/scancode/data/help/help.txt b/tests/scancode/data/help/help.txt index e725888ead4..52bc2e0ce73 100644 --- a/tests/scancode/data/help/help.txt +++ b/tests/scancode/data/help/help.txt @@ -92,8 +92,6 @@ Options: such that all paths have a common root directory. pre-scan: - --ignore Ignore files matching . - --include Include files matching . --facet = Add the to files with a path matching . @@ -138,26 +136,29 @@ Options: which are todo items and needs manual review. core: - --timeout Stop an unfinished file scan after a timeout in - seconds. [default: 120 seconds] - -n, --processes INT Set the number of parallel processes to use. Disable - parallel processing if 0. Also disable threading if - -1. [default: (number of CPUs)-1] - -q, --quiet Do not print summary or progress. - -v, --verbose Print progress as file-by-file path instead of a - progress bar. Print verbose scan counters. - --from-json Load codebase from one or more JSON scan - file(s). - --max-in-memory INTEGER Maximum number of files and directories scan details - kept in memory during a scan. Additional files and - directories scan details above this number are cached - on-disk rather than in memory. Use 0 to use unlimited - memory and disable on-disk caching. Use -1 to use - only on-disk caching. [default: 10000] - --max-depth INTEGER Maximum nesting depth of subdirectories to scan. - Descend at most INTEGER levels of directories below - and including the starting directory. Use 0 for no - scan depth limit. + --ignore Ignore files matching . + --timeout Stop an unfinished file scan after a timeout in + seconds. [default: 120 seconds] + -n, --processes INT Set the number of parallel processes to use. + Disable parallel processing if 0. Also disable + threading if -1. [default: (number of CPUs)-1] + -c, --config-file FILENAME Path to the configuration file. + -q, --quiet Do not print summary or progress. + -v, --verbose Print progress as file-by-file path instead of a + progress bar. Print verbose scan counters. + --from-json Load codebase from one or more JSON scan + file(s). + --max-in-memory INTEGER Maximum number of files and directories scan + details kept in memory during a scan. Additional + files and directories scan details above this + number are cached on-disk rather than in memory. + Use 0 to use unlimited memory and disable on-disk + caching. Use -1 to use only on-disk caching. + [default: 10000] + --max-depth INTEGER Maximum nesting depth of subdirectories to scan. + Descend at most INTEGER levels of directories + below and including the starting directory. Use 0 + for no scan depth limit. documentation: -h, --help Show this message and exit. diff --git a/tests/scancode/data/help/help_linux.txt b/tests/scancode/data/help/help_linux.txt index 6794b19d602..9630f39fb01 100644 --- a/tests/scancode/data/help/help_linux.txt +++ b/tests/scancode/data/help/help_linux.txt @@ -94,8 +94,6 @@ Options: such that all paths have a common root directory. pre-scan: - --ignore Ignore files matching . - --include Include files matching . --facet = Add the to files with a path matching . @@ -140,26 +138,29 @@ Options: which are todo items and needs manual review. core: - --timeout Stop an unfinished file scan after a timeout in - seconds. [default: 120 seconds] - -n, --processes INT Set the number of parallel processes to use. Disable - parallel processing if 0. Also disable threading if - -1. [default: (number of CPUs)-1] - -q, --quiet Do not print summary or progress. - -v, --verbose Print progress as file-by-file path instead of a - progress bar. Print verbose scan counters. - --from-json Load codebase from one or more JSON scan - file(s). - --max-in-memory INTEGER Maximum number of files and directories scan details - kept in memory during a scan. Additional files and - directories scan details above this number are cached - on-disk rather than in memory. Use 0 to use unlimited - memory and disable on-disk caching. Use -1 to use - only on-disk caching. [default: 10000] - --max-depth INTEGER Maximum nesting depth of subdirectories to scan. - Descend at most INTEGER levels of directories below - and including the starting directory. Use 0 for no - scan depth limit. + --ignore Ignore files matching . + --timeout Stop an unfinished file scan after a timeout in + seconds. [default: 120 seconds] + -n, --processes INT Set the number of parallel processes to use. + Disable parallel processing if 0. Also disable + threading if -1. [default: (number of CPUs)-1] + -c, --config-file FILENAME Path to the configuration file. + -q, --quiet Do not print summary or progress. + -v, --verbose Print progress as file-by-file path instead of a + progress bar. Print verbose scan counters. + --from-json Load codebase from one or more JSON scan + file(s). + --max-in-memory INTEGER Maximum number of files and directories scan + details kept in memory during a scan. Additional + files and directories scan details above this + number are cached on-disk rather than in memory. + Use 0 to use unlimited memory and disable on-disk + caching. Use -1 to use only on-disk caching. + [default: 10000] + --max-depth INTEGER Maximum nesting depth of subdirectories to scan. + Descend at most INTEGER levels of directories + below and including the starting directory. Use 0 + for no scan depth limit. documentation: -h, --help Show this message and exit. diff --git a/tests/scancode/data/plugin_ignore/ignore.yaml b/tests/scancode/data/plugin_ignore/ignore.yaml new file mode 100644 index 00000000000..fc52a109624 --- /dev/null +++ b/tests/scancode/data/plugin_ignore/ignore.yaml @@ -0,0 +1,3 @@ +ignored_patterns: + - '*.doc' + - '*/test*' diff --git a/tests/scancode/data/summaries/multiple-input-expected.json b/tests/scancode/data/summaries/multiple-input-expected.json new file mode 100644 index 00000000000..0c0fe6169fb --- /dev/null +++ b/tests/scancode/data/summaries/multiple-input-expected.json @@ -0,0 +1,1278 @@ +{ + "files": [ + { + "path": "summaries", + "type": "directory", + "name": "summaries", + "base_name": "summaries", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha1_git": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 35, + "dirs_count": 13, + "size_count": 1161085, + "scan_errors": [] + }, + { + "path": "summaries/client", + "type": "directory", + "name": "client", + "base_name": "client", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha1_git": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 2, + "dirs_count": 1, + "size_count": 2, + "scan_errors": [] + }, + { + "path": "summaries/client/Images", + "type": "directory", + "name": "Images", + "base_name": "Images", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha1_git": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 2, + "dirs_count": 0, + "size_count": 2, + "scan_errors": [] + }, + { + "path": "summaries/client/Images/applicationCache.png", + "type": "file", + "name": "applicationCache.png", + "base_name": "applicationCache", + "extension": ".png", + "size": 1, + "date": "2026-05-22", + "sha1": "adc83b19e793491b1c6ea0fd8b46cd9f32e592fc", + "md5": "68b329da9893e34099c7d8ad5cb9c940", + "sha256": "01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b", + "sha1_git": "8b137891791fe96927ad78e64b0aad7bded08bdc", + "mime_type": "application/octet-stream", + "file_type": "very short file (no magic)", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "summaries/client/Images/spinner.gif", + "type": "file", + "name": "spinner.gif", + "base_name": "spinner", + "extension": ".gif", + "size": 1, + "date": "2026-05-22", + "sha1": "adc83b19e793491b1c6ea0fd8b46cd9f32e592fc", + "md5": "68b329da9893e34099c7d8ad5cb9c940", + "sha256": "01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b", + "sha1_git": "8b137891791fe96927ad78e64b0aad7bded08bdc", + "mime_type": "application/octet-stream", + "file_type": "very short file (no magic)", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "summaries/counts", + "type": "directory", + "name": "counts", + "base_name": "counts", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha1_git": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 33, + "dirs_count": 10, + "size_count": 1161083, + "scan_errors": [] + }, + { + "path": "summaries/counts/JGroups", + "type": "directory", + "name": "JGroups", + "base_name": "JGroups", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha1_git": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 14, + "dirs_count": 2, + "size_count": 241228, + "scan_errors": [] + }, + { + "path": "summaries/counts/JGroups/EULA", + "type": "file", + "name": "EULA", + "base_name": "EULA", + "extension": "", + "size": 8156, + "date": "2026-05-22", + "sha1": "eb232aa0424eca9c4136904e6143b72aaa9cf4de", + "md5": "0be0aceb8296727efff0ac0bf8e6bdb3", + "sha256": "6ef829995515206ba682183a68f971f00ee91b6bd1b4427f76a6bf364969c1ae", + "sha1_git": "0dcb788ede5b2c0b1659c5c2f2bb0cb40e245fe1", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": "verilog", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "summaries/counts/JGroups/LICENSE", + "type": "file", + "name": "LICENSE", + "base_name": "LICENSE", + "extension": "", + "size": 26430, + "date": "2026-05-22", + "sha1": "e60c2e780886f95df9c9ee36992b8edabec00bcc", + "md5": "7fbc338309ac38fefcd64b04bb903e34", + "sha256": "a190dc9c8043755d90f8b0a75fa66b9e42d4af4c980bf5ddc633f0124db3cee7", + "sha1_git": "b1e3f5a2638797271cbc9b91b856c05ed6942c8f", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "summaries/counts/JGroups/licenses", + "type": "directory", + "name": "licenses", + "base_name": "licenses", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha1_git": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 5, + "dirs_count": 0, + "size_count": 54552, + "scan_errors": [] + }, + { + "path": "summaries/counts/JGroups/licenses/apache-1.1.txt", + "type": "file", + "name": "apache-1.1.txt", + "base_name": "apache-1.1", + "extension": ".txt", + "size": 2885, + "date": "2026-05-22", + "sha1": "6b5608d35c3e304532af43db8bbfc5947bef46a6", + "md5": "276982197c941f4cbf3d218546e17ae2", + "sha256": "b03079c80bc3657f4b9d838f02f036e4611693a0e42b043d5d71b45ac6c5040d", + "sha1_git": "dae2270c2c0118eef91e8a6c841299983b71e771", + "mime_type": "text/plain", + "file_type": "ASCII text, with CRLF line terminators", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "summaries/counts/JGroups/licenses/apache-2.0.txt", + "type": "file", + "name": "apache-2.0.txt", + "base_name": "apache-2.0", + "extension": ".txt", + "size": 11560, + "date": "2026-05-22", + "sha1": "47b573e3824cd5e02a1a3ae99e2735b49e0256e4", + "md5": "d273d63619c9aeaf15cdaf76422c4f87", + "sha256": "3ddf9be5c28fe27dad143a5dc76eea25222ad1dd68934a047064e56ed2fa40c5", + "sha1_git": "75b52484ea471f882c29e02693b4f02dba175b5e", + "mime_type": "text/plain", + "file_type": "ASCII text, with CRLF line terminators", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "summaries/counts/JGroups/licenses/bouncycastle.txt", + "type": "file", + "name": "bouncycastle.txt", + "base_name": "bouncycastle", + "extension": ".txt", + "size": 1186, + "date": "2026-05-22", + "sha1": "74facb0e9a734479f9cd893b5be3fe1bf651b760", + "md5": "9fffd8de865a5705969f62b128381f85", + "sha256": "3d469c451a2a0e97380b90143d979281fadd39be55432b903e6bd18b1b9915d4", + "sha1_git": "3cf73c2f03238a23b56389c301deece6ab625b20", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "summaries/counts/JGroups/licenses/cpl-1.0.txt", + "type": "file", + "name": "cpl-1.0.txt", + "base_name": "cpl-1.0", + "extension": ".txt", + "size": 11987, + "date": "2026-05-22", + "sha1": "681cf776bcd79752543d42490ec7ed22a29fd888", + "md5": "9a6d2c9ae73d59eb3dd38e3909750d14", + "sha256": "d9a768a23056b25ab4b0b48381003ce55f0d32514da5a4e017fa0765b3a887aa", + "sha1_git": "2243be15b296d7f00716bfb6e909d7325dbca0a8", + "mime_type": "text/plain", + "file_type": "ASCII text, with CRLF line terminators", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "summaries/counts/JGroups/licenses/lgpl.txt", + "type": "file", + "name": "lgpl.txt", + "base_name": "lgpl", + "extension": ".txt", + "size": 26934, + "date": "2026-05-22", + "sha1": "8f1a637d2e2ed1bdb9eb01a7dccb5c12cc0557e1", + "md5": "f14599a2f089f6ff8c97e2baa4e3d575", + "sha256": "885a03f54b157961236f46843e79972abfcd6890b6cbb368bc7eca328ff95a12", + "sha1_git": "cbee875ba6ddb0dadab286daf7ccec2f6f64191f", + "mime_type": "text/plain", + "file_type": "ASCII text, with CRLF line terminators", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "summaries/counts/JGroups/src", + "type": "directory", + "name": "src", + "base_name": "src", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha1_git": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 7, + "dirs_count": 0, + "size_count": 152090, + "scan_errors": [] + }, + { + "path": "summaries/counts/JGroups/src/FixedMembershipToken.java", + "type": "file", + "name": "FixedMembershipToken.java", + "base_name": "FixedMembershipToken", + "extension": ".java", + "size": 5144, + "date": "2026-05-22", + "sha1": "5901f73dcc78155a1a2c7b5663a3a11fba400b19", + "md5": "aca9640ec8beee21b098bcf8ecc91442", + "sha256": "aac525060867f5004c7343690f1c197c9a678b334d402e0e9fd117c8b2df73f2", + "sha1_git": "46cf578d6de505d076c7ed49cc791f6597b6f4a9", + "mime_type": "text/x-java", + "file_type": "Java source, ASCII text", + "programming_language": "Java", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "summaries/counts/JGroups/src/GuardedBy.java", + "type": "file", + "name": "GuardedBy.java", + "base_name": "GuardedBy", + "extension": ".java", + "size": 813, + "date": "2026-05-22", + "sha1": "981d67087e65e9a44957c026d4b10817cf77d966", + "md5": "c5064400f759d3e81771005051d17dc1", + "sha256": "7c3e384429f27692534184e1511f70416c04c3f0b30be632710101840996695a", + "sha1_git": "6d9a9ec4a3f12a5619dd42cd560f36fd271fea43", + "mime_type": "text/x-java", + "file_type": "Java source, ASCII text", + "programming_language": "Java", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "summaries/counts/JGroups/src/ImmutableReference.java", + "type": "file", + "name": "ImmutableReference.java", + "base_name": "ImmutableReference", + "extension": ".java", + "size": 1838, + "date": "2026-05-22", + "sha1": "30f56b876d5576d9869e2c5c509b08db57110592", + "md5": "48ca3c72fb9a65c771a321222f118b88", + "sha256": "8a3fb390d4932a92c56e7b999b63b8e5ab55cbe81f65b27439296f279d160bd1", + "sha1_git": "50c720e0bf04f3b06fc8ef4bf7d176c41d6839bc", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": "Java", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "summaries/counts/JGroups/src/RATE_LIMITER.java", + "type": "file", + "name": "RATE_LIMITER.java", + "base_name": "RATE_LIMITER", + "extension": ".java", + "size": 3692, + "date": "2026-05-22", + "sha1": "a8087e5d50da3273536ebda9b87b77aa4ff55deb", + "md5": "4626bdbc48871b55513e1a12991c61a8", + "sha256": "80709043c6c1f4fbd6e7a43c9381da034ab9b67e2e6fee80973a0d4fd33664e0", + "sha1_git": "d0765aa5f296c5f9711b279014331f62ea6f43f4", + "mime_type": "text/x-java", + "file_type": "Java source, ASCII text", + "programming_language": "Java", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "summaries/counts/JGroups/src/RouterStub.java", + "type": "file", + "name": "RouterStub.java", + "base_name": "RouterStub", + "extension": ".java", + "size": 9913, + "date": "2026-05-22", + "sha1": "c1f6818f8ee7bddcc9f444bc94c099729d716d52", + "md5": "eecfe23494acbcd8088c93bc1e83c7f2", + "sha256": "f212de138e8cb0b7eb13521d8ed2620bc41af55093b857da753d7753b1d3438d", + "sha1_git": "1e0b9f9ef4c063cb7e62e9ddd9abf6a596ef7faa", + "mime_type": "text/x-java", + "file_type": "Java source, ASCII text", + "programming_language": "Java", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "summaries/counts/JGroups/src/RouterStubManager.java", + "type": "file", + "name": "RouterStubManager.java", + "base_name": "RouterStubManager", + "extension": ".java", + "size": 8162, + "date": "2026-05-22", + "sha1": "eb419dc94cfe11ca318a3e743a7f9f080e70c751", + "md5": "20bee9631b7c82a45c250e095352aec7", + "sha256": "c39a40d4057256a8fe70f2b69e5f940edcaf8b377b546d537e799ecff3f58b81", + "sha1_git": "47153252434d56c35406e63207e4a6a393fa508f", + "mime_type": "text/x-java", + "file_type": "Java source, ASCII text", + "programming_language": "Java", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "summaries/counts/JGroups/src/S3_PING.java", + "type": "file", + "name": "S3_PING.java", + "base_name": "S3_PING", + "extension": ".java", + "size": 122528, + "date": "2026-05-22", + "sha1": "08dba9986f69719970ead3592dc565465164df0d", + "md5": "83d8324f37d0e3f120bc89865cf0bd39", + "sha256": "c4d59a8837c6320788c74496201e3ecc0ff2100525ebb727bcae6d855b34c548", + "sha1_git": "2f93ec6cc9cb3cf384268b2bce073a9c4fc152f5", + "mime_type": "text/x-java", + "file_type": "Java source, ASCII text", + "programming_language": "Java", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "summaries/counts/README", + "type": "file", + "name": "README", + "base_name": "README", + "extension": "", + "size": 236, + "date": "2026-05-22", + "sha1": "2e07e32c52d607204fad196052d70e3d18fb8636", + "md5": "effc6856ef85a9250fb1a470792b3f38", + "sha256": "165da86bfdf296cd5a0a3e20c1d1ee86d70ecb8a1fa579d6f8cadad8eee85878", + "sha1_git": "1d61df81ffb14fd19f1ac10344a51755e8719282", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "summaries/counts/arch", + "type": "directory", + "name": "arch", + "base_name": "arch", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha1_git": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 1, + "dirs_count": 0, + "size_count": 28103, + "scan_errors": [] + }, + { + "path": "summaries/counts/arch/zlib.tar.gz", + "type": "file", + "name": "zlib.tar.gz", + "base_name": "zlib", + "extension": ".tar.gz", + "size": 28103, + "date": "2026-05-22", + "sha1": "576f0ccfe534d7f5ff5d6400078d3c6586de3abd", + "md5": "20b2370751abfc08bb3556c1d8114b5a", + "sha256": "e6bb199f3b59fffac4092542a516a46b7f922e607d754c21ef5b27334b1f3ba6", + "sha1_git": "b57920bb555f6881693d57da741cd1cce9cf2847", + "mime_type": "application/gzip", + "file_type": "gzip compressed data, last modified: Wed Jul 15 09:08:19 2015, from Unix, original size modulo 2^32 103424", + "programming_language": null, + "is_binary": true, + "is_text": false, + "is_archive": true, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "summaries/counts/screenshot.png", + "type": "file", + "name": "screenshot.png", + "base_name": "screenshot", + "extension": ".png", + "size": 622754, + "date": "2026-05-22", + "sha1": "01ff4b1de0bc6c75c9cca6e46c80c1802d6976d4", + "md5": "b6ef5a90777147423c98b42a6a25e57a", + "sha256": "a1c9905b77a8ff7e72c93abc85d32d9e43353996710b83c5bfa581c5f2af60ad", + "sha1_git": "97155e4a9b903a58abf29d62925d8db01c748a2e", + "mime_type": "image/png", + "file_type": "PNG image data, 2880 x 1666, 8-bit/color RGB, non-interlaced", + "programming_language": null, + "is_binary": true, + "is_text": false, + "is_archive": false, + "is_media": true, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "summaries/counts/zlib", + "type": "directory", + "name": "zlib", + "base_name": "zlib", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha1_git": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 16, + "dirs_count": 5, + "size_count": 268762, + "scan_errors": [] + }, + { + "path": "summaries/counts/zlib/ada", + "type": "directory", + "name": "ada", + "base_name": "ada", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha1_git": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 1, + "dirs_count": 0, + "size_count": 13594, + "scan_errors": [] + }, + { + "path": "summaries/counts/zlib/ada/zlib.ads", + "type": "file", + "name": "zlib.ads", + "base_name": "zlib", + "extension": ".ads", + "size": 13594, + "date": "2026-05-22", + "sha1": "0245a91806d804bf9f0907a3a001a141e9adb61b", + "md5": "71de2670f2e588b51c62e7f6a9046399", + "sha256": "02634bec0d5e4c69d8d2859124380074a57de8d8bd928398379bfacc514236d2", + "sha1_git": "79ffc4095cf46f90a30334466637b4df61dfaa5b", + "mime_type": "text/plain", + "file_type": "ASCII text", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "summaries/counts/zlib/adler32.c", + "type": "file", + "name": "adler32.c", + "base_name": "adler32", + "extension": ".c", + "size": 4968, + "date": "2026-05-22", + "sha1": "0cff4808476ce0b5f6f0ebbc69ee2ab2a0eebe43", + "md5": "ae3bbb54820e1d49fb90cbba222e973f", + "sha256": "341d49ae2703037d2d10c8486f1a1ca3b65e0f10cc9e5fead6bfbbc0b34564ba", + "sha1_git": "a868f073d8a0e35dcb3ec812b41b1d3f0acdd84d", + "mime_type": "text/x-c", + "file_type": "C source, ASCII text", + "programming_language": "C", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "summaries/counts/zlib/deflate.c", + "type": "file", + "name": "deflate.c", + "base_name": "deflate", + "extension": ".c", + "size": 71476, + "date": "2026-05-22", + "sha1": "7b4ace6d698c5dbbfb9a8f047f63228ca54d2e77", + "md5": "cd7826278ce9d9d9ed5abdefef50c3e2", + "sha256": "565e68ddfff5af8efd55f71e122b860ad11527a7d9de40a76af2b16afef24cc0", + "sha1_git": "696957705b756b1457a18c7a23a91affafa17d91", + "mime_type": "text/x-c", + "file_type": "C source, ASCII text", + "programming_language": "C", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "summaries/counts/zlib/deflate.h", + "type": "file", + "name": "deflate.h", + "base_name": "deflate", + "extension": ".h", + "size": 12774, + "date": "2026-05-22", + "sha1": "29ed3b8ca3927576e5889dea5880ca0052942c7d", + "md5": "7ceae74a13201f14c91623116af169c3", + "sha256": "80570c8052491bdc7583600da28a8f1cb32c27ab1cec107ec12c83255d426cf7", + "sha1_git": "ce0299edd19168b97e38667479bd1b5e769a63d0", + "mime_type": "text/x-c", + "file_type": "C source, ASCII text", + "programming_language": "C", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "summaries/counts/zlib/dotzlib", + "type": "directory", + "name": "dotzlib", + "base_name": "dotzlib", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha1_git": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 4, + "dirs_count": 0, + "size_count": 14257, + "scan_errors": [] + }, + { + "path": "summaries/counts/zlib/dotzlib/AssemblyInfo.cs", + "type": "file", + "name": "AssemblyInfo.cs", + "base_name": "AssemblyInfo", + "extension": ".cs", + "size": 2500, + "date": "2026-05-22", + "sha1": "9f1db1177b2e9a014f72bb3cd80be17133e06d16", + "md5": "23d0d7c18846fc31655b6aa89b7c8038", + "sha256": "314afcfb339ea95f5431047b7ab24631b11c3532c7ce5dc2094ed0cf80a7c16d", + "sha1_git": "0491bfc2b036f179f9d3a2f37fd61d9b3b8dd779", + "mime_type": "text/plain", + "file_type": "ASCII text, with CRLF line terminators", + "programming_language": "C#", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "summaries/counts/zlib/dotzlib/ChecksumImpl.cs", + "type": "file", + "name": "ChecksumImpl.cs", + "base_name": "ChecksumImpl", + "extension": ".cs", + "size": 8040, + "date": "2026-05-22", + "sha1": "3807a0e24a57b92ea301559cab7307b8eab52c51", + "md5": "d01b3cb2e75da9b15f05b92b42f6bd33", + "sha256": "e7c047a2c3bcf88d3d002ee3d2d05af414acf53cb4451efacc0f2e95a474ea0f", + "sha1_git": "788b2fcecedb07801588b0e7f6be89b66e4e1e72", + "mime_type": "text/x-c++", + "file_type": "C++ source, ISO-8859 text, with CRLF line terminators", + "programming_language": "C#", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "summaries/counts/zlib/dotzlib/LICENSE_1_0.txt", + "type": "file", + "name": "LICENSE_1_0.txt", + "base_name": "LICENSE_1_0", + "extension": ".txt", + "size": 1359, + "date": "2026-05-22", + "sha1": "892b34f7865d90a6f949f50d95e49625a10bc7f0", + "md5": "81543b22c36f10d20ac9712f8d80ef8d", + "sha256": "36266a8fd073568394cb81cdb2b124f7fdae2c64c1a7ed09db34b4d22efa2951", + "sha1_git": "30aac2cf4793f3aad92ef0a3c88731198c39566e", + "mime_type": "text/plain", + "file_type": "ASCII text, with CRLF line terminators", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "summaries/counts/zlib/dotzlib/readme.txt", + "type": "file", + "name": "readme.txt", + "base_name": "readme", + "extension": ".txt", + "size": 2358, + "date": "2026-05-22", + "sha1": "b1229b826f0096808628474538cea8fec2922a9b", + "md5": "1f20f3168ee63d90de033edac2ce383c", + "sha256": "d04972a91b1563fb4b7acab4b9ff2b84e57368953cc0596d5f5ea17d97315fd0", + "sha1_git": "b2395720d4c5693213001c449ed09869be9bd944", + "mime_type": "text/plain", + "file_type": "ASCII text, with CRLF line terminators", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "summaries/counts/zlib/gcc_gvmat64", + "type": "directory", + "name": "gcc_gvmat64", + "base_name": "gcc_gvmat64", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha1_git": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 1, + "dirs_count": 0, + "size_count": 16413, + "scan_errors": [] + }, + { + "path": "summaries/counts/zlib/gcc_gvmat64/gvmat64.S", + "type": "file", + "name": "gvmat64.S", + "base_name": "gvmat64", + "extension": ".S", + "size": 16413, + "date": "2026-05-22", + "sha1": "742603cba1af98a1432cc02efb019b1a5760adf2", + "md5": "5e772d7302475e5473d0c4c57b9861e8", + "sha256": "22ff411b8b1d1b04aeaa8418b68245400267dc43c6f44104f6ccd37f0daee89f", + "sha1_git": "dd858ddbd16b031aa8aed0794ab120a647b97818", + "mime_type": "text/x-c", + "file_type": "C source, ASCII text, with CRLF line terminators", + "programming_language": "GAS", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "summaries/counts/zlib/infback9", + "type": "directory", + "name": "infback9", + "base_name": "infback9", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha1_git": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 2, + "dirs_count": 0, + "size_count": 23223, + "scan_errors": [] + }, + { + "path": "summaries/counts/zlib/infback9/infback9.c", + "type": "file", + "name": "infback9.c", + "base_name": "infback9", + "extension": ".c", + "size": 21629, + "date": "2026-05-22", + "sha1": "17fb362c03755b12f2dda5b12a68cf38162674bd", + "md5": "23ff5edec0817da303cb1294c1e4205c", + "sha256": "0a715c85a1ce3bb8b5a18d60941ffabc0186a886bcc66ba2ee0c4115a8e274e9", + "sha1_git": "05fb3e338070d67054858cd2fe469e3bbb2044a3", + "mime_type": "text/x-c", + "file_type": "C source, ASCII text", + "programming_language": "C", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "summaries/counts/zlib/infback9/infback9.h", + "type": "file", + "name": "infback9.h", + "base_name": "infback9", + "extension": ".h", + "size": 1594, + "date": "2026-05-22", + "sha1": "d0486a32b558dcaceded5f0746fad62e680a4734", + "md5": "52b1ed99960d3ed7ed60cd20295e64a8", + "sha256": "dda2302f28157fe43a6143f84802af1740393572c2766559593996fd7a5a3245", + "sha1_git": "1073c0a38e6c2c7f51d7638135a08f1471d7320c", + "mime_type": "text/x-c", + "file_type": "C source, ASCII text", + "programming_language": "C", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "summaries/counts/zlib/iostream2", + "type": "directory", + "name": "iostream2", + "base_name": "iostream2", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha1_git": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "files_count": 2, + "dirs_count": 0, + "size_count": 9994, + "scan_errors": [] + }, + { + "path": "summaries/counts/zlib/iostream2/zstream.h", + "type": "file", + "name": "zstream.h", + "base_name": "zstream", + "extension": ".h", + "size": 9283, + "date": "2026-05-22", + "sha1": "fca4540d490fff36bb90fd801cf9cd8fc695bb17", + "md5": "a980b61c1e8be68d5cdb1236ba6b43e7", + "sha256": "d0343e0c57ff58008b6f29643d289c72713aa2d653fe3dcd2e939fc77e7e20b6", + "sha1_git": "43d2332b79b70bb8ead6d84838e6841e349ec818", + "mime_type": "text/x-c++", + "file_type": "C++ source, ASCII text", + "programming_language": "C", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "summaries/counts/zlib/iostream2/zstream_test.cpp", + "type": "file", + "name": "zstream_test.cpp", + "base_name": "zstream_test", + "extension": ".cpp", + "size": 711, + "date": "2026-05-22", + "sha1": "e18a6d55cbbd8b832f8d795530553467e5c74fcf", + "md5": "d32476bde4e6d5f889092fdff6f8cdb0", + "sha256": "f789df183cc58b78751985466380c656308490a9036eb48a7ef79704c3d3f229", + "sha1_git": "6273f62d62a8fa280edcfb798a013e0a0ae84534", + "mime_type": "text/x-c", + "file_type": "C source, ASCII text", + "programming_language": "C++", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "summaries/counts/zlib/zlib.h", + "type": "file", + "name": "zlib.h", + "base_name": "zlib", + "extension": ".h", + "size": 87883, + "date": "2026-05-22", + "sha1": "400d35465f179a4acacb5fe749e6ce20a0bbdb84", + "md5": "64d8a5180bd54ff5452886e4cbb21e14", + "sha256": "726b0569915917b967f87f3f08a1eec039101bf9dcc29d61c0b2b0b8f271b58d", + "sha1_git": "3e0c7672ac51d93782f020bba32eb1207617e70a", + "mime_type": "text/x-c", + "file_type": "C source, ASCII text", + "programming_language": "C", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "summaries/counts/zlib/zutil.c", + "type": "file", + "name": "zutil.c", + "base_name": "zutil", + "extension": ".c", + "size": 7414, + "date": "2026-05-22", + "sha1": "e1af709bff21ae0d4331119a7fc4c19f82932043", + "md5": "fff257bc1656eb60fc585a7dc35f963d", + "sha256": "c5e9927d5a1a1dec514ccdcedfa1e0f01664c58bb33166b4997b50b8001f1d6c", + "sha1_git": "23d2ebef008fdcc00833eba0d9abcd7b9c665531", + "mime_type": "text/x-c", + "file_type": "C source, ASCII text", + "programming_language": "C", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "summaries/counts/zlib/zutil.h", + "type": "file", + "name": "zutil.h", + "base_name": "zutil", + "extension": ".h", + "size": 6766, + "date": "2026-05-22", + "sha1": "b909d27ef9ce51639f76b7ea6b62721e7d1b6bf7", + "md5": "04fcfbb961591c9452c4d0fd1525ffdf", + "sha256": "91cce8e78e83bcdb8c6acb98d4f0686dbdc81ca97d4a36a60c0b48f7ef78f1af", + "sha1_git": "24ab06b1cf60aeba4ade9ab36ff7ad5f73541960", + "mime_type": "text/x-c", + "file_type": "C source, ASCII text", + "programming_language": "C", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/tests/scancode/test_cli.py b/tests/scancode/test_cli.py index 9d038f71e62..6e576186ded 100644 --- a/tests/scancode/test_cli.py +++ b/tests/scancode/test_cli.py @@ -168,7 +168,7 @@ def test_scan_info_returns_full_root(): file_paths = [f['path'] for f in result_data['files']] assert len(file_paths) == 12 # note that we strip paths from leading and trailing slashes - root = fileutils.as_posixpath(test_dir).strip('/') + root = fileutils.as_posixpath(test_dir) assert all(p.startswith(root) for p in file_paths) @@ -184,7 +184,7 @@ def test_scan_info_returns_correct_full_root_with_single_file(): scanned_file = files[0] # and we check that the path is the full path without repeating the file name # note that the path never contain leading and trailing slashes - assert scanned_file['path'] == fileutils.as_posixpath(test_file).strip('/') + assert scanned_file['path'] == fileutils.as_posixpath(test_file) def test_scan_info_returns_does_not_strip_root_with_single_file(): @@ -837,6 +837,17 @@ def test_scan_should_not_fail_with_low_max_in_memory_setting_when_ignoring_files run_scan_click(args, expected_rc=0) +def test_scan_supports_multiple_input_paths(): + test_file_1 = test_env.get_test_loc('summaries/client', relative=True).strip("\\") + test_file_2 = test_env.get_test_loc('summaries/counts', relative=True).strip("\\") + result_file = test_env.get_temp_file('json') + args = ['--info', '-n', '1', test_file_1, test_file_2, '--json', result_file] + run_scan_click(args, expected_rc=0) + expected = test_env.get_test_loc('summaries/multiple-input-expected.json') + check_json_scan(expected_file=expected, result_file=result_file, regen=REGEN_TEST_FIXTURES, remove_file_date=True) + + + def test_get_displayable_summary(): from scancode.cli import get_displayable_summary from commoncode.resource import Codebase diff --git a/tests/scancode/test_outdated.py b/tests/scancode/test_outdated.py index cdac7853b2b..0509c6ea360 100644 --- a/tests/scancode/test_outdated.py +++ b/tests/scancode/test_outdated.py @@ -152,8 +152,8 @@ def jget(*args, **kwargs): json=jget, status_code=200 ) - assert not outdated.fetch_newer_version(force=True) - assert not outdated.check_scancode_version(force=True) + assert not outdated.fetch_newer_version(force=True, is_test=True) + assert not outdated.check_scancode_version(force=True, is_test=True) def test_fetch_newer_version_local_git_version(): diff --git a/tests/scancode/test_plugin_ignore.py b/tests/scancode/test_plugin_ignore.py index 78f2954d76b..2a6bf1ef14c 100644 --- a/tests/scancode/test_plugin_ignore.py +++ b/tests/scancode/test_plugin_ignore.py @@ -14,7 +14,6 @@ from commoncode.fileset import is_included from scancode.cli_test_utils import run_scan_click from scancode.cli_test_utils import load_json_result -from scancode.plugin_ignore import ProcessIgnore from commoncode.resource import Codebase @@ -48,15 +47,13 @@ def test_is_included_glob_file(self): assert not is_included(location, excludes=excludes) def check_ProcessIgnore(self, test_dir, expected, ignore, include=()): - codebase = Codebase(test_dir) - test_plugin = ProcessIgnore() - test_plugin.process_codebase(codebase, ignore=ignore, include=include) + codebase = Codebase(location=test_dir, ignores=ignore, includes=include) resources = [res.strip_root_path for res in codebase.walk(skip_root=True)] assert sorted(resources) == expected def test_ProcessIgnore_with_single_file(self): test_dir = self.extract_test_tar('plugin_ignore/user.tgz') - ignore = ('sample.doc',) + ignore = ('*sample.doc',) expected = [ 'user', 'user/ignore.doc', @@ -69,7 +66,7 @@ def test_ProcessIgnore_with_single_file(self): def test_ProcessIgnore_with_multiple_files(self): test_dir = self.extract_test_tar('plugin_ignore/user.tgz') - ignore = ('ignore.doc', 'sample.doc',) + ignore = ('*ignore.doc', '*sample.doc',) expected = [ 'user', 'user/src', @@ -111,25 +108,10 @@ def test_ProcessIgnore_with_multiple_ignores(self): ] self.check_ProcessIgnore(test_dir, expected, ignore) - def test_ProcessIgnore_include_with_glob_for_extension(self): - test_dir = self.extract_test_tar('plugin_ignore/user.tgz') - include = ('*.doc',) - expected = [ - 'user', - 'user/ignore.doc', - 'user/src', - 'user/src/ignore.doc', - 'user/src/test', - 'user/src/test/sample.doc', - ] - self.check_ProcessIgnore(test_dir, expected, ignore=(), include=include) - def test_ProcessIgnore_process_codebase_does_not_fail_to_access_an_ignored_resourced_cached_to_disk(self): test_dir = self.extract_test_tar('plugin_ignore/user.tgz') - codebase = Codebase(test_dir, max_in_memory=1) - test_plugin = ProcessIgnore() ignore = ['test'] - test_plugin.process_codebase(codebase, ignore=ignore) + Codebase(location=test_dir, max_in_memory=1, ignores=ignore) class TestScanPluginIgnoreFiles(FileDrivenTesting): @@ -238,10 +220,25 @@ def test_scancode_multiple_ignores(self): scan_locs = [x['path'] for x in scan_result['files']] assert scan_locs == [u'user', u'user/src', u'user/src/test'] + def test_scancode_ignore_files_from_config(self): + test_dir = self.extract_test_tar('plugin_ignore/user.tgz') + config_file = self.get_test_loc('plugin_ignore/ignore.yaml') + result_file = self.get_temp_file('json') + args = ['--copyright', '--strip-root', '--config-file', config_file, test_dir, '--json', result_file] + run_scan_click(args) + scan_result = load_json_result(result_file) + assert scan_result['headers'][0]['extra_data']['files_count'] == 0 + scan_locs = [x['path'] for x in scan_result['files']] + expected = [ + u'user', + u'user/src', + ] + assert scan_locs == expected + def test_scancode_codebase_attempt_to_access_an_ignored_resourced_cached_to_disk(self): test_dir = self.extract_test_tar('plugin_ignore/user.tgz') result_file = self.get_temp_file('json') - args = ['--copyright', '--strip-root', '--ignore', 'test', test_dir, '--max-in-memory', '1', '--json', result_file] + args = ['--copyright', '--strip-root', '--ignore', '*test', test_dir, '--max-in-memory', '1', '--json', result_file] run_scan_click(args) scan_result = load_json_result(result_file) assert scan_result['headers'][0]['extra_data']['files_count'] == 2 @@ -251,6 +248,5 @@ def test_scancode_codebase_attempt_to_access_an_ignored_resourced_cached_to_disk u'user/ignore.doc', u'user/src', u'user/src/ignore.doc', - u'user/src/test', ] assert scan_locs == expected