amd · alexandraBara · Jul 16, 2025 · Jul 2, 2025 · Jul 7, 2025 · Jul 7, 2025
@@ -17,10 +17,10 @@ The Node Scraper CLI can be used to run Node Scraper plugins on a target system.
 options are available:
 
 ```sh
-usage: node-scraper [-h] [--sys-name STRING] [--sys-location {LOCAL,REMOTE}]
-                    [--sys-interaction-level {PASSIVE,INTERACTIVE,DISRUPTIVE}] [--sys-sku STRING] [--sys-platform STRING]
-                    [--plugin-configs [STRING ...]] [--system-config STRING] [--connection-config STRING] [--log-path STRING]
-                    [--log-level {CRITICAL,FATAL,ERROR,WARN,WARNING,INFO,DEBUG,NOTSET}] [--gen-reference-config]
+usage: node-scraper [-h] [--sys-name STRING] [--sys-location {LOCAL,REMOTE}] [--sys-interaction-level {PASSIVE,INTERACTIVE,DISRUPTIVE}]
+                    [--sys-sku STRING] [--sys-platform STRING] [--plugin-configs [STRING ...]] [--system-config STRING]
+                    [--connection-config STRING] [--log-path STRING] [--log-level {CRITICAL,FATAL,ERROR,WARN,WARNING,INFO,DEBUG,NOTSET}]
+                    [--gen-reference-config]
                     {run-plugins,describe,gen-plugin-config} ...
 
 node scraper CLI
@@ -38,14 +38,12 @@ options:
   --sys-location {LOCAL,REMOTE}
                         Location of target system (default: LOCAL)
   --sys-interaction-level {PASSIVE,INTERACTIVE,DISRUPTIVE}
-                        Specify system interaction level, used to determine the type of actions that plugins can perform (default:
-                        INTERACTIVE)
+                        Specify system interaction level, used to determine the type of actions that plugins can perform (default: INTERACTIVE)
   --sys-sku STRING      Manually specify SKU of system (default: None)
   --sys-platform STRING
                         Specify system platform (default: None)
   --plugin-configs [STRING ...]
-                        built-in config names or paths to plugin config JSONs. Available built-in configs: NodeStatus (default:
-                        None)
+                        built-in config names or paths to plugin config JSONs. Available built-in configs: NodeStatus (default: None)
   --system-config STRING
                         Path to system config json (default: None)
   --connection-config STRING
@@ -54,7 +52,8 @@ options:
   --log-level {CRITICAL,FATAL,ERROR,WARN,WARNING,INFO,DEBUG,NOTSET}
                         Change python log level (default: INFO)
   --gen-reference-config
-                        Generate reference config. File will be written to ./reference_config.json. (default: False)
+                        Generate reference config from system. Writes to ./reference_config.json. (default: False)
+
 
 ```
 
@@ -254,8 +253,8 @@ Here is an example of a comprehensive plugin config that specifies analyzer args
 ```
 
 2. **'gen-reference-config' command**
-This command can be used generate a reference config that is populated with current system
-configurations. The plugins that use analyzer args, where applied, will be populated with system
+This command can be used to generate a reference config that is populated with current system
+configurations. Plugins that use analyzer args (where applicable) will be populated with system
 data.
 Sample command:
 ```sh
@@ -286,8 +285,16 @@ This will generate the following config:
   },
   "result_collators": {}
 ```
-This can be later used on a different platform for comparison, using the steps at #2:
+This config can later be used on a different platform for comparison, using the steps at #2:
 ```sh
 node-scraper --plugin-configs reference_config.json
 
 ```
+
+An alternate way to generate a reference config is by using log files from a previous run. The
+example below uses log files from 'scraper_logs_<path>/':
+```sh
+node-scraper gen-plugin-config --gen-reference-config-from-logs scraper_logs_<path>/ --output-path custom_output_dir
+```
+This will generate a reference config that includes plugins with logged results in
+'scraper_log_<path>' and save the new config to 'custom_output_dir/reference_config.json'.
@@ -36,6 +36,7 @@
 from nodescraper.cli.dynamicparserbuilder import DynamicParserBuilder
 from nodescraper.cli.helper import (
     generate_reference_config,
+    generate_reference_config_from_logs,
     get_plugin_configs,
     get_system_info,
     log_system_info,
@@ -148,7 +149,7 @@ def build_parser(
         "--gen-reference-config",
         dest="reference_config",
         action="store_true",
-        help="Generate reference config. File will be written to ./reference_config.json.",
+        help="Generate reference config from system. Writes to ./reference_config.json.",
     )
 
     subparsers = parser.add_subparsers(dest="subcmd", help="Subcommands")
@@ -180,6 +181,13 @@ def build_parser(
         help="Generate a config for a plugin or list of plugins",
     )
 
+    config_builder_parser.add_argument(
+        "--gen-reference-config-from-logs",
+        dest="reference_config_from_logs",
+        type=log_path_arg,
+        help="Generate reference config from previous run logfiles. Writes to --output-path/reference_config.json if provided, otherwise ./reference_config.json.",
+    )
+
     config_builder_parser.add_argument(
         "--plugins",
         nargs="*",
@@ -338,6 +346,27 @@ def main(arg_input: Optional[list[str]] = None):
             parse_describe(parsed_args, plugin_reg, config_reg, logger)
 
         if parsed_args.subcmd == "gen-plugin-config":
+
+            if parsed_args.reference_config_from_logs:
+                ref_config = generate_reference_config_from_logs(
+                    parsed_args.reference_config_from_logs, plugin_reg, logger
+                )
+                output_path = os.getcwd()
+                if parsed_args.output_path:
+                    output_path = parsed_args.output_path
+                path = os.path.join(output_path, "reference_config.json")
+                try:
+                    with open(path, "w") as f:
+                        json.dump(
+                            ref_config.model_dump(mode="json", exclude_none=True),
+                            f,
+                            indent=2,
+                        )
+                        logger.info("Reference config written to: %s", path)
+                except Exception as exp:
+                    logger.error(exp)
+                sys.exit(0)
+
             parse_gen_plugin_config(parsed_args, plugin_reg, config_reg, logger)
 
         parsed_plugin_args = {}

@@ -28,13 +28,16 @@
 import logging
 import os
 import sys
-from typing import Optional
+from pathlib import Path
+from typing import Optional, Tuple
+
+from pydantic import BaseModel
 
 from nodescraper.cli.inputargtypes import ModelArgHandler
 from nodescraper.configbuilder import ConfigBuilder
 from nodescraper.configregistry import ConfigRegistry
 from nodescraper.enums import ExecutionStatus, SystemInteractionLevel, SystemLocation
-from nodescraper.models import PluginConfig, PluginResult, SystemInfo
+from nodescraper.models import PluginConfig, PluginResult, SystemInfo, TaskResult
 from nodescraper.pluginexecutor import PluginExecutor
 from nodescraper.pluginregistry import PluginRegistry
 from nodescraper.resultcollators.tablesummary import TableSummary
@@ -283,6 +286,33 @@ def log_system_info(log_path: str | None, system_info: SystemInfo, logger: loggi
             logger.error(exp)
 
 
+def extract_analyzer_args_from_model(
+    plugin_cls: type, data_model: BaseModel, logger: logging.Logger
+) -> Optional[BaseModel]:
+    """Extract analyzer args from a plugin and a data model.
+
+    Args:
+        plugin_cls (type): The plugin class from registry.
+        data_model (BaseModel): System data model.
+        logger (logging.Logger): logger.
+
+    Returns:
+        Optional[BaseModel]: Instance of analyzer args model or None if unavailable.
+    """
+    if not hasattr(plugin_cls, "ANALYZER_ARGS") or not plugin_cls.ANALYZER_ARGS:
+        logger.warning(
+            "Plugin: %s does not support reference config creation. No analyzer args defined.",
+            getattr(plugin_cls, "__name__", str(plugin_cls)),
+        )
+        return None
+
+    try:
+        return plugin_cls.ANALYZER_ARGS.build_from_model(data_model)
+    except NotImplementedError as e:
+        logger.info("%s: %s", plugin_cls.__name__, str(e))
+        return None
+
+
 def generate_reference_config(
     results: list[PluginResult], plugin_reg: PluginRegistry, logger: logging.Logger
 ) -> PluginConfig:
@@ -313,21 +343,82 @@ def generate_reference_config(
             continue
 
         plugin = plugin_reg.plugins.get(obj.source)
-        if not plugin.ANALYZER_ARGS:
-            logger.warning(
-                "Plugin: %s does not support reference config creation. No analyzer args defined, skipping.",
-                obj.source,
-            )
-            continue
 
-        args = None
-        try:
-            args = plugin.ANALYZER_ARGS.build_from_model(data_model)
-        except NotImplementedError as nperr:
-            logger.info(nperr)
+        args = extract_analyzer_args_from_model(plugin, data_model, logger)
+        if not args:
             continue
         plugins[obj.source] = {"analysis_args": {}}
         plugins[obj.source]["analysis_args"] = args.model_dump(exclude_none=True)
     plugin_config.plugins = plugins
 
     return plugin_config
+
+
+def generate_reference_config_from_logs(
+    path: str, plugin_reg: PluginRegistry, logger: logging.Logger
+) -> PluginConfig:
+    """Parse previous log files and generate plugin config with populated analyzer args
+
+    Args:
+        path (str): path to log files
+        plugin_reg (PluginRegistry): plugin registry instance
+        logger (logging.Logger): logger instance
+
+    Returns:
+        PluginConfig: instance of plugin config
+    """
+    found = find_datamodel_and_result(path)
+    plugin_config = PluginConfig()
+    plugins = {}
+    for dm, res in found:
+        result_path = Path(res)
+        res_payload = json.loads(result_path.read_text(encoding="utf-8"))
+        task_res = TaskResult(**res_payload)
+        dm_path = Path(dm)
+        dm_payload = json.loads(dm_path.read_text(encoding="utf-8"))
+        plugin = plugin_reg.plugins.get(task_res.parent)
+        if not plugin:
+            logger.warning(
+                "Plugin %s not found in the plugin registry: %s.",
+                task_res.parent,
+            )
+            continue
+
+        data_model = plugin.DATA_MODEL.model_validate(dm_payload)
+
+        args = extract_analyzer_args_from_model(plugin, data_model, logger)
+        if not args:
+            continue
+
+        plugins[task_res.parent] = {"analysis_args": args.model_dump(exclude_none=True)}
+
+    plugin_config.plugins = plugins
+    return plugin_config
+
+
+def find_datamodel_and_result(base_path: str) -> list[Tuple[str, str]]:
+    """Get datamodel and result files
+
+    Args:
+        base_path (str): location of previous run logs
+
+    Returns:
+        list[Tuple[str, str]]: tuple of datamodel and result json files
+    """
+    tuple_list: list[Tuple[str, str, str]] = []
+    for root, _, files in os.walk(base_path):
+        if "collector" in os.path.basename(root).lower():
+            datamodel_path = None
+            result_path = None
+
+            for fname in files:
+                low = fname.lower()
+                if low.endswith("datamodel.json"):
+                    datamodel_path = os.path.join(root, fname)
+                elif low == "result.json":
+                    result_path = os.path.join(root, fname)
+
+            if datamodel_path and result_path:
+                tuple_list.append((datamodel_path, result_path))
+
+    return tuple_list
@@ -25,9 +25,9 @@
 ###############################################################################
 import datetime
 import logging
-from typing import Optional
+from typing import Any, Optional
 
-from pydantic import BaseModel, Field, field_serializer
+from pydantic import BaseModel, Field, field_serializer, field_validator
 
 from nodescraper.enums import EventPriority, ExecutionStatus
 
@@ -65,6 +65,29 @@ def serialize_status(self, status: ExecutionStatus, _info) -> str:
         """
         return status.name
 
+    @field_validator("status", mode="before")
+    @classmethod
+    def validate_status(cls, v: Any):
+        """Validator to ensure `status` is a valid ExecutionStatus enum.
+
+        Args:
+            v (Any): The input value to validate (can be str or ExecutionStatus).
+
+        Returns:
+            ExecutionStatus: The validated enum value.
+
+        Raises:
+            ValueError: If the string is not a valid enum name.
+        """
+        if isinstance(v, ExecutionStatus):
+            return v
+        if isinstance(v, str):
+            try:
+                return ExecutionStatus[v]
+            except KeyError as err:
+                raise ValueError(f"Unknown status name: {v!r}") from err
+        return v
+
     @property
     def duration(self) -> str | None:
         """return duration of time as a string

@@ -23,10 +23,10 @@
 # SOFTWARE.
 #
 ###############################################################################
-from pydantic import BaseModel, field_serializer
+from pydantic import BaseModel, field_serializer, field_validator
 
 from nodescraper.models import DataModel
-from nodescraper.utils import bytes_to_human_readable
+from nodescraper.utils import bytes_to_human_readable, convert_to_bytes
 
 
 class DeviceStorageData(BaseModel):
@@ -51,6 +51,20 @@ def serialize_used(self, used: int, _info) -> str:
     def serialize_percent(self, percent: float, _info) -> str:
         return f"{percent}%"
 
+    @field_validator("total", "free", "used", mode="before")
+    @classmethod
+    def parse_bytes_fields(cls, v):
+        if isinstance(v, str):
+            return convert_to_bytes(v)
+        return v
+
+    @field_validator("percent", mode="before")
+    @classmethod
+    def parse_percent_field(cls, v):
+        if isinstance(v, str) and v.endswith("%"):
+            return float(v.rstrip("%"))
+        return v
+
 
 class StorageDataModel(DataModel):
     storage_data: dict[str, DeviceStorageData]
@@ -0,0 +1,3 @@
+{
+  "bios_version": "M17"
+}
@@ -0,0 +1,8 @@
+{
+  "status": "OK",
+  "message": "BIOS: M17",
+  "task": "BiosCollector",
+  "parent": "BiosPlugin",
+  "start_time": "2025-07-07T11:11:08.186472",
+  "end_time": "2025-07-07T11:11:08.329110"
+}