diff --git a/cli/decompose/decompose.py b/cli/decompose/decompose.py index 0b193d351..6441488d9 100644 --- a/cli/decompose/decompose.py +++ b/cli/decompose/decompose.py @@ -1,6 +1,7 @@ import json import keyword import re +import shutil from enum import Enum from graphlib import TopologicalSorter from pathlib import Path @@ -16,7 +17,8 @@ class DecompVersion(str, Enum): latest = "latest" v1 = "v1" - # v2 = "v2" + v2 = "v2" + # v3 = "v3" this_file_dir = Path(__file__).resolve().parent @@ -307,27 +309,33 @@ def run( backend_api_key=backend_api_key, ) - # Verify that all user variables are properly defined before use - # This may reorder subtasks if dependencies are out of order - decomp_data = verify_user_variables(decomp_data, input_var) + decomp_dir = out_dir / out_name + val_fn_dir = decomp_dir / "validations" + val_fn_dir.mkdir(parents=True) - with open(out_dir / f"{out_name}.json", "w") as f: + (val_fn_dir / "__init__.py").touch() + + for constraint in decomp_data["identified_constraints"]: + if constraint["val_fn"] is not None: + with open(val_fn_dir / f"{constraint['val_fn_name']}.py", "w") as f: + f.write(constraint["val_fn"] + "\n") + + with open(decomp_dir / f"{out_name}.json", "w") as f: json.dump(decomp_data, f, indent=2) - with open(out_dir / f"{out_name}.py", "w") as f: + with open(decomp_dir / f"{out_name}.py", "w") as f: f.write( m_template.render( - subtasks=decomp_data["subtasks"], user_inputs=input_var + subtasks=decomp_data["subtasks"], + user_inputs=input_var, + identified_constraints=decomp_data["identified_constraints"], ) + "\n" ) except Exception: - created_json = Path(out_dir / f"{out_name}.json") - created_py = Path(out_dir / f"{out_name}.py") - if created_json.exists() and created_json.is_file(): - created_json.unlink() - if created_py.exists() and created_py.is_file(): - created_py.unlink() + decomp_dir = out_dir / out_name + if decomp_dir.exists() and decomp_dir.is_dir(): + shutil.rmtree(decomp_dir) raise Exception diff --git a/cli/decompose/m_decomp_result_v1.py.jinja2 b/cli/decompose/m_decomp_result_v1.py.jinja2 index 7aa1d54f4..1f1e3646e 100644 --- a/cli/decompose/m_decomp_result_v1.py.jinja2 +++ b/cli/decompose/m_decomp_result_v1.py.jinja2 @@ -4,6 +4,14 @@ import os import textwrap import mellea +{%- if "code" in identified_constraints | map(attribute="val_strategy") %} +from mellea.stdlib.requirement import req +{% for c in identified_constraints %} +{%- if c.val_fn %} +from validations.{{ c.val_fn_name }} import validate_input as {{ c.val_fn_name }} +{%- endif %} +{%- endfor %} +{%- endif %} m = mellea.start_session() {%- if user_inputs %} @@ -30,7 +38,14 @@ except KeyError as e: {%- if item.constraints %} requirements=[ {%- for c in item.constraints %} + {%- if c.val_fn %} + req( + {{ c.constraint | tojson}}, + validation_fn={{ c.val_fn_name }}, + ), + {%- else %} {{ c.constraint | tojson}}, + {%- endif %} {%- endfor %} ], {%- else %} diff --git a/cli/decompose/m_decomp_result_v2.py.jinja2 b/cli/decompose/m_decomp_result_v2.py.jinja2 new file mode 100644 index 000000000..9b1bb13c6 --- /dev/null +++ b/cli/decompose/m_decomp_result_v2.py.jinja2 @@ -0,0 +1,91 @@ +{% if user_inputs -%} +import os +{% endif -%} +import textwrap + +import mellea +{%- if "code" in identified_constraints | map(attribute="val_strategy") %} +from mellea.stdlib.requirement import req +{% for c in identified_constraints %} +{%- if c.val_fn %} +from validations.{{ c.val_fn_name }} import validate_input as {{ c.val_fn_name }} +{%- endif %} +{%- endfor %} +{%- endif %} + +m = mellea.start_session() +{%- if user_inputs %} + + +# User Input Variables +try: + {%- for var in user_inputs %} + {{ var | lower }} = os.environ["{{ var | upper }}"] + {%- endfor %} +except KeyError as e: + print(f"ERROR: One or more required environment variables are not set; {e}") + exit(1) +{%- endif %} +{%- for item in subtasks %} + + +{{ item.tag | lower }}_gnrl = textwrap.dedent( + R""" + {{ item.general_instructions | trim | indent(width=4, first=False) }} + """.strip() +) +{{ item.tag | lower }} = m.instruct( + {%- if not item.input_vars_required %} + {{ item.subtask[3:] | trim | tojson }}, + {%- else %} + textwrap.dedent( + R""" + {{ item.subtask[3:] | trim }} + + Here are the input variables and their content: + {%- for var in item.input_vars_required %} + + - {{ var | upper }} = {{ "{{" }}{{ var | upper }}{{ "}}" }} + {%- endfor %} + """.strip() + ), + {%- endif %} + {%- if item.constraints %} + requirements=[ + {%- for c in item.constraints %} + {%- if c.val_fn %} + req( + {{ c.constraint | tojson}}, + validation_fn={{ c.val_fn_name }}, + ), + {%- else %} + {{ c.constraint | tojson}}, + {%- endif %} + {%- endfor %} + ], + {%- else %} + requirements=None, + {%- endif %} + {%- if item.input_vars_required %} + user_variables={ + {%- for var in item.input_vars_required %} + {{ var | upper | tojson }}: {{ var | lower }}, + {%- endfor %} + }, + {%- endif %} + grounding_context={ + "GENERAL_INSTRUCTIONS": {{ item.tag | lower }}_gnrl, + {%- for var in item.depends_on %} + {{ var | upper | tojson }}: {{ var | lower }}.value, + {%- endfor %} + }, +) +assert {{ item.tag | lower }}.value is not None, 'ERROR: task "{{ item.tag | lower }}" execution failed' +{%- if loop.last %} + + +final_answer = {{ item.tag | lower }}.value + +print(final_answer) +{%- endif -%} +{%- endfor -%} diff --git a/cli/decompose/pipeline.py b/cli/decompose/pipeline.py index a574d35d6..f2bc79291 100644 --- a/cli/decompose/pipeline.py +++ b/cli/decompose/pipeline.py @@ -11,10 +11,11 @@ from .prompt_modules import ( constraint_extractor, - # general_instructions, + general_instructions, subtask_constraint_assign, subtask_list, subtask_prompt_generator, + validation_code_generator, validation_decision, ) from .prompt_modules.subtask_constraint_assign import SubtaskPromptConstraintsItem @@ -22,9 +23,16 @@ from .prompt_modules.subtask_prompt_generator import SubtaskPromptItem +class ConstraintValData(TypedDict): + val_strategy: Literal["code", "llm"] + val_fn: str | None + + class ConstraintResult(TypedDict): constraint: str - validation_strategy: str + val_strategy: Literal["code", "llm"] + val_fn: str | None + val_fn_name: str class DecompSubtasksResult(TypedDict): @@ -32,7 +40,7 @@ class DecompSubtasksResult(TypedDict): tag: str constraints: list[ConstraintResult] prompt_template: str - # general_instructions: str + general_instructions: str input_vars_required: list[str] depends_on: list[str] generated_response: NotRequired[str] @@ -72,7 +80,9 @@ def decompose( case DecompBackend.ollama: m_session = MelleaSession( OllamaModelBackend( - model_id=model_id, model_options={ModelOption.CONTEXT_WINDOW: 16384} + model_id=model_id, + base_url=backend_endpoint, + model_options={ModelOption.CONTEXT_WINDOW: 16384}, ) ) case DecompBackend.openai: @@ -115,11 +125,27 @@ def decompose( m_session, task_prompt, enforce_same_words=False ).parse() - constraint_validation_strategies: dict[str, Literal["code", "llm"]] = { - cons_key: validation_decision.generate(m_session, cons_key).parse() + constraint_val_strategy: dict[ + str, dict[Literal["val_strategy"], Literal["code", "llm"]] + ] = { + cons_key: { + "val_strategy": validation_decision.generate(m_session, cons_key).parse() + } for cons_key in task_prompt_constraints } + constraint_val_data: dict[str, ConstraintValData] = {} + + for cons_key in constraint_val_strategy: + constraint_val_data[cons_key] = { + "val_strategy": constraint_val_strategy[cons_key]["val_strategy"], + "val_fn": None, + } + if constraint_val_data[cons_key]["val_strategy"] == "code": + constraint_val_data[cons_key]["val_fn"] = ( + validation_code_generator.generate(m_session, cons_key).parse() + ) + subtask_prompts: list[SubtaskPromptItem] = subtask_prompt_generator.generate( m_session, task_prompt, @@ -142,14 +168,21 @@ def decompose( constraints=[ { "constraint": cons_str, - "validation_strategy": constraint_validation_strategies[cons_str], + "val_strategy": constraint_val_data[cons_str]["val_strategy"], + "val_fn_name": f"val_fn_{task_prompt_constraints.index(cons_str) + 1}", + # >> Always include generated "val_fn" code (experimental) + "val_fn": constraint_val_data[cons_str]["val_fn"], + # >> Include generated "val_fn" code only for the last subtask (experimental) + # "val_fn": constraint_val_data[cons_str]["val_fn"] + # if subtask_i + 1 == len(subtask_prompts_with_constraints) + # else None, } for cons_str in subtask_data.constraints ], prompt_template=subtask_data.prompt_template, - # general_instructions=general_instructions.generate( - # m_session, input_str=subtask_data.prompt_template - # ).parse(), + general_instructions=general_instructions.generate( + m_session, input_str=subtask_data.prompt_template + ).parse(), input_vars_required=list( dict.fromkeys( # Remove duplicates while preserving the original order. [ @@ -173,7 +206,7 @@ def decompose( ) ), ) - for subtask_data in subtask_prompts_with_constraints + for subtask_i, subtask_data in enumerate(subtask_prompts_with_constraints) ] return DecompPipelineResult( @@ -182,9 +215,11 @@ def decompose( identified_constraints=[ { "constraint": cons_str, - "validation_strategy": constraint_validation_strategies[cons_str], + "val_strategy": constraint_val_data[cons_str]["val_strategy"], + "val_fn": constraint_val_data[cons_str]["val_fn"], + "val_fn_name": f"val_fn_{cons_i + 1}", } - for cons_str in task_prompt_constraints + for cons_i, cons_str in enumerate(task_prompt_constraints) ], subtasks=decomp_subtask_result, ) diff --git a/cli/decompose/prompt_modules/__init__.py b/cli/decompose/prompt_modules/__init__.py index 19b7079e3..922bdcbe8 100644 --- a/cli/decompose/prompt_modules/__init__.py +++ b/cli/decompose/prompt_modules/__init__.py @@ -7,4 +7,7 @@ from .subtask_prompt_generator import ( subtask_prompt_generator as subtask_prompt_generator, ) +from .validation_code_generator import ( + validation_code_generator as validation_code_generator, +) from .validation_decision import validation_decision as validation_decision diff --git a/cli/decompose/prompt_modules/subtask_constraint_assign/_prompt/system_template.jinja2 b/cli/decompose/prompt_modules/subtask_constraint_assign/_prompt/system_template.jinja2 index 30baaf93a..e5cad42e7 100644 --- a/cli/decompose/prompt_modules/subtask_constraint_assign/_prompt/system_template.jinja2 +++ b/cli/decompose/prompt_modules/subtask_constraint_assign/_prompt/system_template.jinja2 @@ -8,7 +8,7 @@ You will be provided with the following 4 parameters inside their respective tag 4. : A list of candidate (possible) constraints that can be assigned to the target task. -The list contain the constraints of all tasks on the , your job is to filter and select only the constraints belonging to your target task. +The is a list of constraints identified for the entire , your job is to filter and select only the constraints belonging to your target task. It is possible that none of the constraints in the are relevant or related to your target task. Below, enclosed in tags, are instructions to guide you on how to complete your assignment: diff --git a/cli/decompose/prompt_modules/validation_code_generator/__init__.py b/cli/decompose/prompt_modules/validation_code_generator/__init__.py new file mode 100644 index 000000000..dfb4bd0ce --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/__init__.py @@ -0,0 +1,7 @@ +from ._exceptions import ( + BackendGenerationError as BackendGenerationError, + TagExtractionError as TagExtractionError, +) +from ._validation_code_generator import ( + validation_code_generator as validation_code_generator, +) diff --git a/cli/decompose/prompt_modules/validation_code_generator/_exceptions.py b/cli/decompose/prompt_modules/validation_code_generator/_exceptions.py new file mode 100644 index 000000000..d808b613d --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_exceptions.py @@ -0,0 +1,24 @@ +from typing import Any + + +class ValidationCodeGeneratorError(Exception): + def __init__(self, error_message: str, **kwargs: dict[str, Any]): + self.error_message = error_message + self.__dict__.update(kwargs) + super().__init__( + f'Module Error "validation_code_generator"; {self.error_message}' + ) + + +class BackendGenerationError(ValidationCodeGeneratorError): + """Raised when LLM generation fails in the "validation_code_generator" prompt module.""" + + def __init__(self, error_message: str, **kwargs: dict[str, Any]): + super().__init__(error_message, **kwargs) + + +class TagExtractionError(ValidationCodeGeneratorError): + """Raised when tag extraction fails in the "validation_code_generator" prompt module.""" + + def __init__(self, error_message: str, **kwargs: dict[str, Any]): + super().__init__(error_message, **kwargs) diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/__init__.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/__init__.py new file mode 100644 index 000000000..0b985cbe6 --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/__init__.py @@ -0,0 +1,5 @@ +from ._icl_examples import icl_examples as default_icl_examples +from ._prompt import ( + get_system_prompt as get_system_prompt, + get_user_prompt as get_user_prompt, +) diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/__init__.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/__init__.py new file mode 100644 index 000000000..052fe7c99 --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/__init__.py @@ -0,0 +1,2 @@ +from ._icl_examples import icl_examples as icl_examples +from ._types import ICLExample as ICLExample diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_1/__init__.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_1/__init__.py new file mode 100644 index 000000000..1f9f32ea5 --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_1/__init__.py @@ -0,0 +1 @@ +from ._example import example as example diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_1/_example.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_1/_example.py new file mode 100644 index 000000000..9bb4e23da --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_1/_example.py @@ -0,0 +1,24 @@ +# ruff: noqa: W293 +from .._types import ICLExample + +constraint_requirement = """You must not use any uppercase letters""" + +validation_function = """def validate_input(input: str) -> bool: + \""" + Validates that the input contains only lowercase letters. + + Args: + input (str): The input to validate + + Returns: + bool: True if all characters are lowercase, False otherwise + \""" + try: + return answer.islower() + except Exception: + return False""" + +example: ICLExample = { + "constraint_requirement": constraint_requirement, + "validation_function": validation_function, +} diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_2/__init__.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_2/__init__.py new file mode 100644 index 000000000..1f9f32ea5 --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_2/__init__.py @@ -0,0 +1 @@ +from ._example import example as example diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_2/_example.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_2/_example.py new file mode 100644 index 000000000..6e2d98fe0 --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_2/_example.py @@ -0,0 +1,31 @@ +# ruff: noqa: W293 +from .._types import ICLExample + +constraint_requirement = """The answer must be a JSON with the following keys: +1. "subject" +2. "content\"""" + +validation_function = """import json + +def validate_input(input: str) -> bool: + \""" + Validates that the input is a JSON with required keys: subject and content. + + Args: + input (str): The input to validate + + Returns: + bool: True if JSON has required keys, False otherwise + \""" + try: + data = json.loads(response) + return isinstance(data, dict) and "subject" in data and "content" in data + except json.JSONDecodeError: + return False + except Exception: + return False""" + +example: ICLExample = { + "constraint_requirement": constraint_requirement, + "validation_function": validation_function, +} diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_3/__init__.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_3/__init__.py new file mode 100644 index 000000000..1f9f32ea5 --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_3/__init__.py @@ -0,0 +1 @@ +from ._example import example as example diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_3/_example.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_3/_example.py new file mode 100644 index 000000000..65070a6ed --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_3/_example.py @@ -0,0 +1,58 @@ +# ruff: noqa: W293 +from .._types import ICLExample + +constraint_requirement = "Return a list of requirements, using dash bullets (-), where each item begins with the relevant entity" + +validation_function = """def validate_input(input: str) -> bool: + \""" + Validates that the input is a list of requirements using dash bullets, + where each item begins with the relevant entity. + + Args: + input (str): The input to validate + + Returns: + bool: True if input follows the required format, False otherwise + \""" + try: + if not input or not isinstance(input, str): + return False + + lines = input.strip().split('\n') + + # Check if all lines are empty + if not any(line.strip() for line in lines): + return False + + for line in lines: + line = line.strip() + + # Skip empty lines + if not line: + continue + + # Check if line starts with a dash bullet + if not line.startswith('- '): + return False + + # Check if there's content after the dash bullet + content = line[2:].strip() # Remove '- ' prefix + if not content: + return False + + # Check if content has an entity (word) at the beginning + words = content.split() + if not words: + return False + + # Entity should be the first word - just check it exists + # We're not validating what constitutes a valid entity here + + return True + except Exception: + return False""" + +example: ICLExample = { + "constraint_requirement": constraint_requirement, + "validation_function": validation_function, +} diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_4/__init__.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_4/__init__.py new file mode 100644 index 000000000..1f9f32ea5 --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_4/__init__.py @@ -0,0 +1 @@ +from ._example import example as example diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_4/_example.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_4/_example.py new file mode 100644 index 000000000..f1af01ab1 --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_4/_example.py @@ -0,0 +1,31 @@ +# ruff: noqa: W293 +from .._types import ICLExample + +constraint_requirement = 'Avoid the words "daughter-in-law" and "grandson"' + +validation_function = """def validate_input(input: str) -> bool: + \""" + Validates that the input does not contain the words "daughter-in-law" and "grandson". + + Args: + input (str): The input to validate + + Returns: + bool: True if neither word is found, False otherwise + \""" + try: + if not input: + return False + + # Convert to lowercase for case-insensitive comparison + input_lower = input.lower() + + # Check if either forbidden word is present + return "daughter-in-law" not in input_lower and "grandson" not in input_lower + except Exception: + return False""" + +example: ICLExample = { + "constraint_requirement": constraint_requirement, + "validation_function": validation_function, +} diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_icl_examples.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_icl_examples.py new file mode 100644 index 000000000..c018d2e41 --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_icl_examples.py @@ -0,0 +1,5 @@ +from ._example_1 import example as example_1 +from ._example_2 import example as example_2 +from ._types import ICLExample + +icl_examples: list[ICLExample] = [example_1, example_2] diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_types.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_types.py new file mode 100644 index 000000000..bdd1f2372 --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_types.py @@ -0,0 +1,6 @@ +from typing import TypedDict + + +class ICLExample(TypedDict): + constraint_requirement: str + validation_function: str diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/_prompt.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_prompt.py new file mode 100644 index 000000000..b324180fc --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_prompt.py @@ -0,0 +1,19 @@ +from pathlib import Path + +from jinja2 import Environment, FileSystemLoader + +from ._icl_examples import ICLExample, icl_examples as default_icl_examples + +this_file_dir = Path(__file__).resolve().parent + +environment = Environment(loader=FileSystemLoader(this_file_dir), autoescape=False) +system_template = environment.get_template("system_template.jinja2") +user_template = environment.get_template("user_template.jinja2") + + +def get_system_prompt(icl_examples: list[ICLExample] = default_icl_examples) -> str: + return system_template.render(icl_examples=icl_examples).strip() + + +def get_user_prompt(constraint_requirement: str) -> str: + return user_template.render(constraint_requirement=constraint_requirement).strip() diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/system_template.jinja2 b/cli/decompose/prompt_modules/validation_code_generator/_prompt/system_template.jinja2 new file mode 100644 index 000000000..7b414d0d5 --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/system_template.jinja2 @@ -0,0 +1,77 @@ +You are a Python developer specialized in writing validation functions based on natural language constraints or requirements. + +## Function Requirements + +You will be provided with a constraint/requirement inside the tags. +Your task is to write a Python function capable of validating the against a text input to your function. + +Your code must: +- Be a single Python function. +- Take exactly one string parameter (the text input to be validated). +- Return a boolean value (True if valid or False if invalid). +- Use only standard Python libraries. No third-party dependencies. +- Be deterministic and self-contained. +- If the constraint/requirement mentions data that was not provided, just return `False` (don't need to implement code). + +## Output Format + +Your response must be structured as follows: +- Your Python function must be inside the tags. +- The function signature must be: `def validate_input(input: str) -> bool:`. +- Always enclose your code on a "try..except Exception:" clause and return `False` in case of exceptions. + +## Examples + +Here are some complete examples showing constraints/requirements and their corresponding validation functions: + +{% for item in icl_examples -%} + + +{{ item["constraint_requirement"] }} + + +{{ item["validation_function"] }} + + +All tags are closed and my assignment is finished. + + +{% endfor -%} +That concludes the complete examples of your assignment. + +## Additional Instructions + +When writing your answer, follow these additional instructions below to be successful: +1. The function signature must be: `def validate_input(input: str) -> bool:` +2. The function must handle `None` and empty string inputs by returning `False` +3. Use appropriate Python standard library modules (re, json, etc.) as needed +4. Ensure the function is simple and doesn't have unnecessary complexity +5. The validation logic should directly correspond to the provided constraint/requirement + +## Common Validation Patterns + +Here are some typical validation scenarios you might encounter: + +1. Character limit validation: + - Check if the answer has a specific number of characters or words + - Example: "The answer must be less than 100 characters" + +2. Format validation: + - Validate JSON structure, XML format, or other structured data + - Example: "The answer must be valid JSON with 'name' and 'age' fields" + +3. Content validation: + - Check for specific content patterns like uppercase letters, numbers, etc. + - Example: "The answer must contain at least one uppercase letter" + +4. Pattern matching: + - Use regex to validate specific patterns + - Example: "The answer must be in the format 'Name: [text], Age: [number]'" + +Important: Use only standard Python libraries that don't require additional installation. +Important: Your function must be deterministic and produce consistent results. +Important: You must always close the tags that were opened by using their corresponding close tag. You will be penalized if you don't close all tags. + +Very Important: After closing all tags, finish your assignment by writing (without the double quotes): "All tags are closed and my assignment is finished.". +Very Important: Always enclose your code on a "try..except Exception:" clause and return `False` in case of exceptions. +Very Important: If the constraint/requirement is not clear, or missing information, just return `False`. diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/user_template.jinja2 b/cli/decompose/prompt_modules/validation_code_generator/_prompt/user_template.jinja2 new file mode 100644 index 000000000..867af52e4 --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/user_template.jinja2 @@ -0,0 +1,9 @@ +## Requirements: +- The function name must be: "validate_input" +- The function signature must be: `def validate_input(input: str) -> bool:` + +Now, here is the constraint/requirement for you to write a Python validation function: + + +{{ constraint_requirement }} + diff --git a/cli/decompose/prompt_modules/validation_code_generator/_validation_code_generator.py b/cli/decompose/prompt_modules/validation_code_generator/_validation_code_generator.py new file mode 100644 index 000000000..55949c963 --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_validation_code_generator.py @@ -0,0 +1,113 @@ +import re +from collections.abc import Callable +from typing import Any, TypeVar, final + +from mellea import MelleaSession +from mellea.backends import ModelOption +from mellea.stdlib.components.chat import Message + +from .._prompt_modules import PromptModule, PromptModuleString +from ._exceptions import BackendGenerationError, TagExtractionError +from ._prompt import get_system_prompt, get_user_prompt + +T = TypeVar("T") + +RE_VALIDATION_FUNCTION = re.compile( + r"(.+?)", flags=re.IGNORECASE | re.DOTALL +) + + +@final +class _ValidationCodeGenerator(PromptModule): + @staticmethod + def _default_parser(generated_str: str) -> str: + r"""Default parser of the `validation_code_generator` module. + + _**Disclaimer**: This is a LLM-prompting module, so the results will vary depending + on the size and capabilities of the LLM used. The results are also not guaranteed, so + take a look at this module's Exceptions and plan for unreliable results._ + + Args: + generated_str (`str`): The LLM's answer to be parsed. + + Returns: + str: The extracted Python validation function code. + + Raises: + TagExtractionError: An error occurred trying to extract content from the + generated output. The LLM probably failed to open and close + the \ tags. + """ + validation_function_match = re.search(RE_VALIDATION_FUNCTION, generated_str) + + validation_function_str: str | None = ( + validation_function_match.group(1).strip() + if validation_function_match + else None + ) + + if validation_function_str is None: + raise TagExtractionError( + 'LLM failed to generate correct tags for extraction: ""' + ) + + return validation_function_str + + def generate( + self, + mellea_session: MelleaSession, + input_str: str | None, + max_new_tokens: int = 4096, + parser: Callable[[str], T] = _default_parser, # type: ignore[assignment] + # About the mypy ignore above: https://github.com/python/mypy/issues/3737 + **kwargs: dict[str, Any], + ) -> PromptModuleString[T]: + """Generates a Python validation function based on a provided constraint/requirement. + + Args: + mellea_session (`MelleaSession`): A mellea session with a backend. + input_str (`str`): Natural language constraint/requirement to generate validation code for. + prompt (`str`, optional): The original task prompt for context. Defaults to None. + max_new_tokens (`int`, optional): Maximum tokens to generate. + Defaults to `4096`. + parser (`Callable[[str], Any]`, optional): A string parsing function. + Defaults to `_ValidationCodeGenerator._default_parser`. + + Returns: + PromptModuleString: A `PromptModuleString` class containing the generated output. + + The `PromptModuleString` class behaves like a `str`, but with an additional `parse()` method + to execute the parsing function passed in the `parser` argument of + this method (the `parser` argument defaults to `_ValidationCodeGenerator._default_parser`). + + Raises: + BackendGenerationError: Some error occurred during the LLM generation call. + """ + assert input_str is not None, 'This module requires the "input_str" argument' + + system_prompt = get_system_prompt() + user_prompt = get_user_prompt(constraint_requirement=input_str) + + action = Message("user", user_prompt) + + try: + gen_result = mellea_session.act( + action=action, + model_options={ + ModelOption.SYSTEM_PROMPT: system_prompt, + ModelOption.TEMPERATURE: 0, + ModelOption.MAX_NEW_TOKENS: max_new_tokens, + }, + ).value + except Exception as e: + raise BackendGenerationError(f"LLM generation failed: {e}") + + if gen_result is None: + raise BackendGenerationError( + "LLM generation failed: value attribute is None" + ) + + return PromptModuleString(gen_result, parser) + + +validation_code_generator = _ValidationCodeGenerator() diff --git a/cli/decompose/prompt_modules/validation_decision/_prompt/system_template.jinja2 b/cli/decompose/prompt_modules/validation_decision/_prompt/system_template.jinja2 index 8e5cb00fb..fc0be317c 100644 --- a/cli/decompose/prompt_modules/validation_decision/_prompt/system_template.jinja2 +++ b/cli/decompose/prompt_modules/validation_decision/_prompt/system_template.jinja2 @@ -1,14 +1,14 @@ -You are a Validation Decision Expert specialized in determining whether prompt requirements can be validated deterministically by writing Python code or if they're best suited for LLM validation. +You are a Validation Decision Expert specialized in determining whether prompt requirements can be validated deterministically, by writing Python code, or if they're best suited for LLM validation. ## Decision Criteria ### Code Validation A requirement should be classified as "code" if it: -- Can be checked with deterministic algorithms +- Can be validated deterministically - Involves structured data validation (e.g., JSON schema, regex patterns) -- Requires mathematical computations or logical operations - Can be validated with simple string operations - Has clearly defined success/failure criteria that can be programmatically determined +- Is a straightforward requirement to validate the task output ### LLM Validation A requirement should be classified as "llm" if it: @@ -59,6 +59,7 @@ When writing your answer, follow these additional instructions below to be succe 3. After closing all tags, finish your assignment by writing (without the double quotes): "All tags are closed and my assignment is finished." Important: You must always close the tags that were opened by using their corresponding close tag. You will be penalized if you don't close all tags. +Important: The "code" classification is usually for validating the task output format or other deterministic requirements. Your response must contain exactly one of these two words inside tags: - code diff --git a/test/decompose/test_decompose.py b/test/decompose/test_decompose.py index a04f92b25..c51683fee 100644 --- a/test/decompose/test_decompose.py +++ b/test/decompose/test_decompose.py @@ -23,6 +23,7 @@ def test_no_dependencies(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -31,6 +32,7 @@ def test_no_dependencies(self): { "subtask": "Task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -39,6 +41,7 @@ def test_no_dependencies(self): { "subtask": "Task C", "tag": "TASK_C", + "general_instructions": "", "constraints": [], "prompt_template": "Do C", "input_vars_required": [], @@ -60,6 +63,7 @@ def test_simple_linear_dependency(self): { "subtask": "Task C", "tag": "TASK_C", + "general_instructions": "", "constraints": [], "prompt_template": "Do C", "input_vars_required": [], @@ -68,6 +72,7 @@ def test_simple_linear_dependency(self): { "subtask": "Task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -76,6 +81,7 @@ def test_simple_linear_dependency(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -97,6 +103,7 @@ def test_diamond_dependency(self): { "subtask": "Task D", "tag": "TASK_D", + "general_instructions": "", "constraints": [], "prompt_template": "Do D", "input_vars_required": [], @@ -105,6 +112,7 @@ def test_diamond_dependency(self): { "subtask": "Task C", "tag": "TASK_C", + "general_instructions": "", "constraints": [], "prompt_template": "Do C", "input_vars_required": [], @@ -113,6 +121,7 @@ def test_diamond_dependency(self): { "subtask": "Task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -121,6 +130,7 @@ def test_diamond_dependency(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -142,6 +152,7 @@ def test_case_insensitive_dependencies(self): { "subtask": "Task B", "tag": "task_b", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -150,6 +161,7 @@ def test_case_insensitive_dependencies(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -170,6 +182,7 @@ def test_multiple_independent_chains(self): { "subtask": "Task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -178,6 +191,7 @@ def test_multiple_independent_chains(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -187,6 +201,7 @@ def test_multiple_independent_chains(self): { "subtask": "Task D", "tag": "TASK_D", + "general_instructions": "", "constraints": [], "prompt_template": "Do D", "input_vars_required": [], @@ -195,6 +210,7 @@ def test_multiple_independent_chains(self): { "subtask": "Task C", "tag": "TASK_C", + "general_instructions": "", "constraints": [], "prompt_template": "Do C", "input_vars_required": [], @@ -219,6 +235,7 @@ def test_nonexistent_dependency_ignored(self): { "subtask": "Task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -230,6 +247,7 @@ def test_nonexistent_dependency_ignored(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -250,6 +268,7 @@ def test_renumbers_subtask_descriptions(self): { "subtask": "3. Do task C", "tag": "TASK_C", + "general_instructions": "", "constraints": [], "prompt_template": "Do C", "input_vars_required": [], @@ -258,6 +277,7 @@ def test_renumbers_subtask_descriptions(self): { "subtask": "2. Do task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -266,6 +286,7 @@ def test_renumbers_subtask_descriptions(self): { "subtask": "1. Do task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -287,6 +308,7 @@ def test_renumbers_only_numbered_subtasks(self): { "subtask": "2. Numbered task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -295,6 +317,7 @@ def test_renumbers_only_numbered_subtasks(self): { "subtask": "Unnumbered task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -315,6 +338,7 @@ def test_renumbers_with_complex_reordering(self): { "subtask": "4. Final task", "tag": "TASK_D", + "general_instructions": "", "constraints": [], "prompt_template": "Do D", "input_vars_required": [], @@ -323,6 +347,7 @@ def test_renumbers_with_complex_reordering(self): { "subtask": "3. Third task", "tag": "TASK_C", + "general_instructions": "", "constraints": [], "prompt_template": "Do C", "input_vars_required": [], @@ -331,6 +356,7 @@ def test_renumbers_with_complex_reordering(self): { "subtask": "2. Second task", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -339,6 +365,7 @@ def test_renumbers_with_complex_reordering(self): { "subtask": "1. First task", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -366,6 +393,7 @@ def test_circular_dependency_two_nodes(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -374,6 +402,7 @@ def test_circular_dependency_two_nodes(self): { "subtask": "Task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -390,6 +419,7 @@ def test_circular_dependency_three_nodes(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -398,6 +428,7 @@ def test_circular_dependency_three_nodes(self): { "subtask": "Task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -406,6 +437,7 @@ def test_circular_dependency_three_nodes(self): { "subtask": "Task C", "tag": "TASK_C", + "general_instructions": "", "constraints": [], "prompt_template": "Do C", "input_vars_required": [], @@ -422,6 +454,7 @@ def test_self_dependency(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -459,6 +492,7 @@ def test_no_input_vars_no_dependencies(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -482,6 +516,7 @@ def test_valid_input_vars(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A with {{ USER_INPUT }}", "input_vars_required": ["USER_INPUT"], @@ -504,6 +539,7 @@ def test_case_insensitive_input_vars(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": ["user_input"], # lowercase @@ -527,6 +563,7 @@ def test_valid_dependencies_in_order(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -535,6 +572,7 @@ def test_valid_dependencies_in_order(self): { "subtask": "Task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -559,6 +597,7 @@ def test_dependencies_out_of_order_triggers_reorder(self): { "subtask": "Task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -567,6 +606,7 @@ def test_dependencies_out_of_order_triggers_reorder(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -591,6 +631,7 @@ def test_complex_reordering(self): { "subtask": "Task D", "tag": "TASK_D", + "general_instructions": "", "constraints": [], "prompt_template": "Do D", "input_vars_required": [], @@ -599,6 +640,7 @@ def test_complex_reordering(self): { "subtask": "Task C", "tag": "TASK_C", + "general_instructions": "", "constraints": [], "prompt_template": "Do C", "input_vars_required": [], @@ -607,6 +649,7 @@ def test_complex_reordering(self): { "subtask": "Task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -615,6 +658,7 @@ def test_complex_reordering(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -643,6 +687,7 @@ def test_missing_required_input_var(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": ["MISSING_VAR"], @@ -667,6 +712,7 @@ def test_missing_required_input_var_with_some_provided(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": ["VAR1", "VAR2"], @@ -691,6 +737,7 @@ def test_dependency_on_nonexistent_subtask(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -715,6 +762,7 @@ def test_circular_dependency_detected(self): { "subtask": "Task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -723,6 +771,7 @@ def test_circular_dependency_detected(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -744,6 +793,7 @@ def test_empty_input_var_list_treated_as_none(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": ["REQUIRED_VAR"],