From 8234f325d825d02d42c99b09491a59f475be3eb1 Mon Sep 17 00:00:00 2001 From: csbobby Date: Tue, 10 Feb 2026 15:20:52 +0000 Subject: [PATCH 01/13] Keep origin workflows; sync main from m_M --- .github/workflows/cd.yml | 12 +----------- .github/workflows/ci.yml | 1 - .github/workflows/quality.yml | 19 +++++++++++-------- 3 files changed, 12 insertions(+), 20 deletions(-) diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index a4389b0d3..a2d45ad1c 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -2,12 +2,6 @@ name: "Run CD" on: workflow_dispatch: - inputs: - force_release: - description: 'Force release even if previous checks fail.' - type: boolean - required: false - default: false env: UV_FROZEN: "1" @@ -42,11 +36,7 @@ jobs: run: uv run --no-sync semantic-release changelog --unreleased release: needs: [code-checks, pre-release-check] - # Run this job only if the `TARGET_TAG_V` is set AND (the previous jobs - # were successful OR we are forcing the release). - if: >- - ${{ needs.pre-release-check.outputs.TARGET_TAG_V != '' && - ( success() || inputs.force_release ) }} + if: needs.pre-release-check.outputs.TARGET_TAG_V != '' environment: auto-release runs-on: ubuntu-latest concurrency: release diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4fb08f319..825ebbd0f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -3,7 +3,6 @@ name: "Run CI" on: pull_request: types: [opened, reopened, synchronize] - merge_group: jobs: code-checks: diff --git a/.github/workflows/quality.yml b/.github/workflows/quality.yml index ad82d084f..757772600 100644 --- a/.github/workflows/quality.yml +++ b/.github/workflows/quality.yml @@ -3,9 +3,10 @@ name: Verify Code Quality on: workflow_call: + concurrency: - group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.event.pull_request.number || github.ref_name }} - cancel-in-progress: true + group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.event.pull_request.number || github.ref_name }} + cancel-in-progress: true env: CICD: 1 @@ -14,10 +15,10 @@ env: jobs: quality: runs-on: ubuntu-latest - timeout-minutes: 90 # TODO: need to reduce this after we figure out our testing strategy. + timeout-minutes: 90 # TODO: need to reduce this after we figure out our testing strategy. strategy: matrix: - python-version: ["3.10", "3.11", "3.12"] # Need to add 3.13 once we resolve outlines issues. + python-version: ['3.10', '3.11', '3.12'] # Need to add 3.13 once we resolve outlines issues. steps: - uses: actions/checkout@v4 - name: Free disk space @@ -38,16 +39,18 @@ jobs: - name: Check style and run tests run: pre-commit run --all-files - name: Send failure message pre-commit - if: failure() # This step will only run if a previous step failed + if: failure() # This step will only run if a previous step failed run: echo "The quality verification failed. Please run precommit " - name: Install Ollama run: curl -fsSL https://ollama.com/install.sh | sh - name: Start serving ollama run: nohup ollama serve & - - name: Pull model granite4:micro - run: ollama pull granite4:micro + - name: Pull Llama 3.2:1b model + run: ollama pull llama3.2:1b + - name: Run Tests run: uv run -m pytest -v test - name: Send failure message tests - if: failure() # This step will only run if a previous step failed + if: failure() # This step will only run if a previous step failed run: echo "Tests failed. Please verify that tests are working locally." + From fd7404b1c0832cf283900db0499033cf4c1f275a Mon Sep 17 00:00:00 2001 From: csbobby Date: Tue, 10 Feb 2026 15:26:23 +0000 Subject: [PATCH 02/13] Keep origin workflows; sync main from m_M --- .github/workflows/cd.yml | 12 +----------- .github/workflows/ci.yml | 1 - .github/workflows/quality.yml | 19 +++++++++++-------- 3 files changed, 12 insertions(+), 20 deletions(-) diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index a4389b0d3..a2d45ad1c 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -2,12 +2,6 @@ name: "Run CD" on: workflow_dispatch: - inputs: - force_release: - description: 'Force release even if previous checks fail.' - type: boolean - required: false - default: false env: UV_FROZEN: "1" @@ -42,11 +36,7 @@ jobs: run: uv run --no-sync semantic-release changelog --unreleased release: needs: [code-checks, pre-release-check] - # Run this job only if the `TARGET_TAG_V` is set AND (the previous jobs - # were successful OR we are forcing the release). - if: >- - ${{ needs.pre-release-check.outputs.TARGET_TAG_V != '' && - ( success() || inputs.force_release ) }} + if: needs.pre-release-check.outputs.TARGET_TAG_V != '' environment: auto-release runs-on: ubuntu-latest concurrency: release diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4fb08f319..825ebbd0f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -3,7 +3,6 @@ name: "Run CI" on: pull_request: types: [opened, reopened, synchronize] - merge_group: jobs: code-checks: diff --git a/.github/workflows/quality.yml b/.github/workflows/quality.yml index ad82d084f..757772600 100644 --- a/.github/workflows/quality.yml +++ b/.github/workflows/quality.yml @@ -3,9 +3,10 @@ name: Verify Code Quality on: workflow_call: + concurrency: - group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.event.pull_request.number || github.ref_name }} - cancel-in-progress: true + group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.event.pull_request.number || github.ref_name }} + cancel-in-progress: true env: CICD: 1 @@ -14,10 +15,10 @@ env: jobs: quality: runs-on: ubuntu-latest - timeout-minutes: 90 # TODO: need to reduce this after we figure out our testing strategy. + timeout-minutes: 90 # TODO: need to reduce this after we figure out our testing strategy. strategy: matrix: - python-version: ["3.10", "3.11", "3.12"] # Need to add 3.13 once we resolve outlines issues. + python-version: ['3.10', '3.11', '3.12'] # Need to add 3.13 once we resolve outlines issues. steps: - uses: actions/checkout@v4 - name: Free disk space @@ -38,16 +39,18 @@ jobs: - name: Check style and run tests run: pre-commit run --all-files - name: Send failure message pre-commit - if: failure() # This step will only run if a previous step failed + if: failure() # This step will only run if a previous step failed run: echo "The quality verification failed. Please run precommit " - name: Install Ollama run: curl -fsSL https://ollama.com/install.sh | sh - name: Start serving ollama run: nohup ollama serve & - - name: Pull model granite4:micro - run: ollama pull granite4:micro + - name: Pull Llama 3.2:1b model + run: ollama pull llama3.2:1b + - name: Run Tests run: uv run -m pytest -v test - name: Send failure message tests - if: failure() # This step will only run if a previous step failed + if: failure() # This step will only run if a previous step failed run: echo "Tests failed. Please verify that tests are working locally." + From ff6eedbc267df143aac5a89362e467f4e7a55100 Mon Sep 17 00:00:00 2001 From: csbobby Date: Tue, 10 Feb 2026 15:32:15 +0000 Subject: [PATCH 03/13] Remove pr-update workflow to avoid workflow scope requirement --- .github/workflows/pr-update.yml | 73 --------------------------------- 1 file changed, 73 deletions(-) delete mode 100644 .github/workflows/pr-update.yml diff --git a/.github/workflows/pr-update.yml b/.github/workflows/pr-update.yml deleted file mode 100644 index 33505a0f4..000000000 --- a/.github/workflows/pr-update.yml +++ /dev/null @@ -1,73 +0,0 @@ -name: PR Bot - -on: - pull_request_target: - types: [opened, edited] - -jobs: - update-pr-body: - runs-on: ubuntu-latest - if: ${{ !contains(github.event.pull_request.body, 'mellea-pr-edited-marker') }} - permissions: - pull-requests: write - contents: read - steps: - - name: Checkout code # Checks out the base branch, not PR branch. - uses: actions/checkout@v4 - - - name: Detect PR type from checkboxes - id: detect-type - env: - PR_BODY: ${{ github.event.pull_request.body }} - run: | - PR_TYPE="" - - # Check for checked boxes (supports [x] and [X]) - if echo "$PR_BODY" | grep -qi '\[x\] Component'; then - PR_TYPE="component" - elif echo "$PR_BODY" | grep -qi '\[x\] Requirement'; then - PR_TYPE="requirement" - elif echo "$PR_BODY" | grep -qi '\[x\] Sampling Strategy'; then - PR_TYPE="sampling" - elif echo "$PR_BODY" | grep -qi '\[x\] Tool'; then - PR_TYPE="tool" - elif echo "$PR_BODY" | grep -qi '\[x\] Misc'; then - PR_TYPE="misc" - fi - - if [ -z "$PR_TYPE" ]; then - echo "::error::No PR type selected. Please check one of of the boxes from the original pr template." - exit 1 - fi - - echo "pr_type=$PR_TYPE" >> "$GITHUB_OUTPUT" - echo "Detected PR type: $PR_TYPE" - - - name: Update PR body with checklist - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - PR_BODY: ${{ github.event.pull_request.body }} - PR_TYPE: ${{ steps.detect-type.outputs.pr_type }} - run: | - TEMPLATE_FILE=".github/PULL_REQUEST_TEMPLATE/${PR_TYPE}.md" - - if [ -f "$TEMPLATE_FILE" ]; then - MARKER="" - TEMPLATE_CONTENT=$(cat "$TEMPLATE_FILE") - - NEW_BODY="${MARKER} - ${TEMPLATE_CONTENT}" - - gh pr edit ${{ github.event.pull_request.number }} --body "$NEW_BODY" - echo "Updated PR body with ${PR_TYPE} checklist" - else - echo "::error::Template file not found: $TEMPLATE_FILE" - echo "Something as gone wrong. Contact a maintainer." - exit 1 - fi - - - name: Comment on PR - uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc - with: - message: | - The PR description has been updated. Please fill out the template for your PR to be reviewed. From fdb9894c3ab656b1e1de4a82494e99e2fcdabe22 Mon Sep 17 00:00:00 2001 From: csbobby Date: Tue, 10 Feb 2026 15:37:39 +0000 Subject: [PATCH 04/13] add: robust constraint_extractor parser --- .../_constraint_extractor.py | 33 +++++++++++-------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/cli/decompose/prompt_modules/constraint_extractor/_constraint_extractor.py b/cli/decompose/prompt_modules/constraint_extractor/_constraint_extractor.py index 43558cce4..a4c07b02f 100644 --- a/cli/decompose/prompt_modules/constraint_extractor/_constraint_extractor.py +++ b/cli/decompose/prompt_modules/constraint_extractor/_constraint_extractor.py @@ -53,20 +53,27 @@ def _default_parser(generated_str: str) -> list[str]: 'LLM failed to generate correct tags for extraction: ""' ) - # TODO: Maybe replace this logic with a RegEx? - constraint_extractor_str_upper = constraint_extractor_str.upper() - if ( - "N/A" in constraint_extractor_str_upper - or "N / A" in constraint_extractor_str_upper - or "N/ A" in constraint_extractor_str_upper - or "N /A" in constraint_extractor_str_upper - ): + s = constraint_extractor_str.strip() + s_upper = s.upper() + if s_upper in {"N/A", "N / A", "N/ A", "N /A"}: return [] - return [ - line.strip()[2:] if line.strip()[:2] == "- " else line.strip() - for line in constraint_extractor_str.splitlines() - ] + results: list[str] = [] + + for line in s.splitlines(): + line = line.strip() + if not line: + continue + + # remove bullet / numbering + line = re.sub(r"^\s*(?:[-*•]|\d+[\.\)])\s+", "", line) + + # split inline multi-constraints + parts = re.split(r"\s*(?:;|\s-\s|\s—\s|\s–\s)\s*", line) + + results.extend(p.strip() for p in parts if p.strip()) + + return results def generate( # type: ignore[override] # About the mypy ignore above: @@ -133,4 +140,4 @@ def generate( # type: ignore[override] return PromptModuleString(gen_result, parser) -constraint_extractor = _ConstraintExtractor() +constraint_extractor = _ConstraintExtractor() \ No newline at end of file From 39a2f923c5711e78ff15071dcd3667efc9e12649 Mon Sep 17 00:00:00 2001 From: csbobby Date: Tue, 10 Feb 2026 16:07:10 +0000 Subject: [PATCH 05/13] upd: sync the workflows with the mellea main --- .github/workflows/cd.yml | 14 ++++++- .github/workflows/ci.yml | 3 +- .github/workflows/pr-update.yml | 73 +++++++++++++++++++++++++++++++++ .github/workflows/quality.yml | 21 ++++------ 4 files changed, 96 insertions(+), 15 deletions(-) create mode 100644 .github/workflows/pr-update.yml diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index a2d45ad1c..5b1973f7f 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -2,6 +2,12 @@ name: "Run CD" on: workflow_dispatch: + inputs: + force_release: + description: 'Force release even if previous checks fail.' + type: boolean + required: false + default: false env: UV_FROZEN: "1" @@ -36,7 +42,11 @@ jobs: run: uv run --no-sync semantic-release changelog --unreleased release: needs: [code-checks, pre-release-check] - if: needs.pre-release-check.outputs.TARGET_TAG_V != '' + # Run this job only if the `TARGET_TAG_V` is set AND (the previous jobs + # were successful OR we are forcing the release). + if: >- + ${{ needs.pre-release-check.outputs.TARGET_TAG_V != '' && + ( success() || inputs.force_release ) }} environment: auto-release runs-on: ubuntu-latest concurrency: release @@ -63,4 +73,4 @@ jobs: CHGLOG_FILE: CHANGELOG.md GITHUB_REPOSITORY: ${{ github.repository }} run: ./.github/scripts/release.sh - shell: bash + shell: bash \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 825ebbd0f..ad19aa03e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -3,7 +3,8 @@ name: "Run CI" on: pull_request: types: [opened, reopened, synchronize] + merge_group: jobs: code-checks: - uses: ./.github/workflows/quality.yml + uses: ./.github/workflows/quality.yml \ No newline at end of file diff --git a/.github/workflows/pr-update.yml b/.github/workflows/pr-update.yml new file mode 100644 index 000000000..f99964e24 --- /dev/null +++ b/.github/workflows/pr-update.yml @@ -0,0 +1,73 @@ +name: PR Bot + +on: + pull_request_target: + types: [opened, edited] + +jobs: + update-pr-body: + runs-on: ubuntu-latest + if: ${{ !contains(github.event.pull_request.body, 'mellea-pr-edited-marker') }} + permissions: + pull-requests: write + contents: read + steps: + - name: Checkout code # Checks out the base branch, not PR branch. + uses: actions/checkout@v4 + + - name: Detect PR type from checkboxes + id: detect-type + env: + PR_BODY: ${{ github.event.pull_request.body }} + run: | + PR_TYPE="" + + # Check for checked boxes (supports [x] and [X]) + if echo "$PR_BODY" | grep -qi '\[x\] Component'; then + PR_TYPE="component" + elif echo "$PR_BODY" | grep -qi '\[x\] Requirement'; then + PR_TYPE="requirement" + elif echo "$PR_BODY" | grep -qi '\[x\] Sampling Strategy'; then + PR_TYPE="sampling" + elif echo "$PR_BODY" | grep -qi '\[x\] Tool'; then + PR_TYPE="tool" + elif echo "$PR_BODY" | grep -qi '\[x\] Misc'; then + PR_TYPE="misc" + fi + + if [ -z "$PR_TYPE" ]; then + echo "::error::No PR type selected. Please check one of of the boxes from the original pr template." + exit 1 + fi + + echo "pr_type=$PR_TYPE" >> "$GITHUB_OUTPUT" + echo "Detected PR type: $PR_TYPE" + + - name: Update PR body with checklist + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PR_BODY: ${{ github.event.pull_request.body }} + PR_TYPE: ${{ steps.detect-type.outputs.pr_type }} + run: | + TEMPLATE_FILE=".github/PULL_REQUEST_TEMPLATE/${PR_TYPE}.md" + + if [ -f "$TEMPLATE_FILE" ]; then + MARKER="" + TEMPLATE_CONTENT=$(cat "$TEMPLATE_FILE") + + NEW_BODY="${MARKER} + ${TEMPLATE_CONTENT}" + + gh pr edit ${{ github.event.pull_request.number }} --body "$NEW_BODY" + echo "Updated PR body with ${PR_TYPE} checklist" + else + echo "::error::Template file not found: $TEMPLATE_FILE" + echo "Something as gone wrong. Contact a maintainer." + exit 1 + fi + + - name: Comment on PR + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc + with: + message: | + The PR description has been updated. Please fill out the template for your PR to be reviewed. \ No newline at end of file diff --git a/.github/workflows/quality.yml b/.github/workflows/quality.yml index 757772600..4bb927b0a 100644 --- a/.github/workflows/quality.yml +++ b/.github/workflows/quality.yml @@ -3,10 +3,9 @@ name: Verify Code Quality on: workflow_call: - concurrency: - group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.event.pull_request.number || github.ref_name }} - cancel-in-progress: true + group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.event.pull_request.number || github.ref_name }} + cancel-in-progress: true env: CICD: 1 @@ -15,10 +14,10 @@ env: jobs: quality: runs-on: ubuntu-latest - timeout-minutes: 90 # TODO: need to reduce this after we figure out our testing strategy. + timeout-minutes: 90 # TODO: need to reduce this after we figure out our testing strategy. strategy: matrix: - python-version: ['3.10', '3.11', '3.12'] # Need to add 3.13 once we resolve outlines issues. + python-version: ["3.10", "3.11", "3.12"] # Need to add 3.13 once we resolve outlines issues. steps: - uses: actions/checkout@v4 - name: Free disk space @@ -39,18 +38,16 @@ jobs: - name: Check style and run tests run: pre-commit run --all-files - name: Send failure message pre-commit - if: failure() # This step will only run if a previous step failed + if: failure() # This step will only run if a previous step failed run: echo "The quality verification failed. Please run precommit " - name: Install Ollama run: curl -fsSL https://ollama.com/install.sh | sh - name: Start serving ollama run: nohup ollama serve & - - name: Pull Llama 3.2:1b model - run: ollama pull llama3.2:1b - + - name: Pull model granite4:micro + run: ollama pull granite4:micro - name: Run Tests run: uv run -m pytest -v test - name: Send failure message tests - if: failure() # This step will only run if a previous step failed - run: echo "Tests failed. Please verify that tests are working locally." - + if: failure() # This step will only run if a previous step failed + run: echo "Tests failed. Please verify that tests are working locally." \ No newline at end of file From 72e547023b86787727a72945897c660193aa674c Mon Sep 17 00:00:00 2001 From: csbobby Date: Tue, 10 Feb 2026 20:16:57 +0000 Subject: [PATCH 06/13] upd: add the newlines in the workflows files --- .github/workflows/cd.yml | 2 +- .github/workflows/ci.yml | 2 +- .github/workflows/pr-update.yml | 2 +- .github/workflows/quality.yml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index 5b1973f7f..a4389b0d3 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -73,4 +73,4 @@ jobs: CHGLOG_FILE: CHANGELOG.md GITHUB_REPOSITORY: ${{ github.repository }} run: ./.github/scripts/release.sh - shell: bash \ No newline at end of file + shell: bash diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ad19aa03e..4fb08f319 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,4 +7,4 @@ on: jobs: code-checks: - uses: ./.github/workflows/quality.yml \ No newline at end of file + uses: ./.github/workflows/quality.yml diff --git a/.github/workflows/pr-update.yml b/.github/workflows/pr-update.yml index f99964e24..33505a0f4 100644 --- a/.github/workflows/pr-update.yml +++ b/.github/workflows/pr-update.yml @@ -70,4 +70,4 @@ jobs: uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc with: message: | - The PR description has been updated. Please fill out the template for your PR to be reviewed. \ No newline at end of file + The PR description has been updated. Please fill out the template for your PR to be reviewed. diff --git a/.github/workflows/quality.yml b/.github/workflows/quality.yml index 4bb927b0a..ad82d084f 100644 --- a/.github/workflows/quality.yml +++ b/.github/workflows/quality.yml @@ -50,4 +50,4 @@ jobs: run: uv run -m pytest -v test - name: Send failure message tests if: failure() # This step will only run if a previous step failed - run: echo "Tests failed. Please verify that tests are working locally." \ No newline at end of file + run: echo "Tests failed. Please verify that tests are working locally." From c6862403f63728f35d4cc2aa08e4ead07f95022e Mon Sep 17 00:00:00 2001 From: csbobby Date: Tue, 10 Feb 2026 20:17:58 +0000 Subject: [PATCH 07/13] upd: normalizing whitespace and separators for the constraint NA situations --- .../constraint_extractor/_constraint_extractor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cli/decompose/prompt_modules/constraint_extractor/_constraint_extractor.py b/cli/decompose/prompt_modules/constraint_extractor/_constraint_extractor.py index a4c07b02f..c86a569e3 100644 --- a/cli/decompose/prompt_modules/constraint_extractor/_constraint_extractor.py +++ b/cli/decompose/prompt_modules/constraint_extractor/_constraint_extractor.py @@ -54,8 +54,8 @@ def _default_parser(generated_str: str) -> list[str]: ) s = constraint_extractor_str.strip() - s_upper = s.upper() - if s_upper in {"N/A", "N / A", "N/ A", "N /A"}: + s_norm = s.strip().upper().replace(" ", "") + if s_norm == "N/A": return [] results: list[str] = [] From d2e07d57f3ab2749baac61f40d0a6638c9538cab Mon Sep 17 00:00:00 2001 From: csbobby Date: Thu, 12 Feb 2026 16:34:47 +0000 Subject: [PATCH 08/13] upd: format with a newline --- .../constraint_extractor/_constraint_extractor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/decompose/prompt_modules/constraint_extractor/_constraint_extractor.py b/cli/decompose/prompt_modules/constraint_extractor/_constraint_extractor.py index c86a569e3..8e7d9bd2f 100644 --- a/cli/decompose/prompt_modules/constraint_extractor/_constraint_extractor.py +++ b/cli/decompose/prompt_modules/constraint_extractor/_constraint_extractor.py @@ -140,4 +140,4 @@ def generate( # type: ignore[override] return PromptModuleString(gen_result, parser) -constraint_extractor = _ConstraintExtractor() \ No newline at end of file +constraint_extractor = _ConstraintExtractor() From ff25006a2b4bae5fcca515ed4cdf0626cd7e9295 Mon Sep 17 00:00:00 2001 From: Tulio Coppola Date: Mon, 3 Nov 2025 12:18:31 -0300 Subject: [PATCH 09/13] feat: req validation generator & template v2 Signed-off-by: Tulio Coppola --- cli/decompose/decompose.py | 41 ++++--- cli/decompose/m_decomp_result_v1.py.jinja2 | 15 +++ cli/decompose/m_decomp_result_v2.py.jinja2 | 91 ++++++++++++++ cli/decompose/pipeline.py | 61 ++++++++-- cli/decompose/prompt_modules/__init__.py | 3 + .../_prompt/system_template.jinja2 | 2 +- .../validation_code_generator/__init__.py | 7 ++ .../validation_code_generator/_exceptions.py | 24 ++++ .../_prompt/__init__.py | 5 + .../_prompt/_icl_examples/__init__.py | 2 + .../_icl_examples/_example_1/__init__.py | 1 + .../_icl_examples/_example_1/_example.py | 24 ++++ .../_icl_examples/_example_2/__init__.py | 1 + .../_icl_examples/_example_2/_example.py | 31 +++++ .../_icl_examples/_example_3/__init__.py | 1 + .../_icl_examples/_example_3/_example.py | 58 +++++++++ .../_icl_examples/_example_4/__init__.py | 1 + .../_icl_examples/_example_4/_example.py | 31 +++++ .../_prompt/_icl_examples/_icl_examples.py | 5 + .../_prompt/_icl_examples/_types.py | 6 + .../_prompt/_prompt.py | 19 +++ .../_prompt/system_template.jinja2 | 77 ++++++++++++ .../_prompt/user_template.jinja2 | 9 ++ .../_validation_code_generator.py | 113 ++++++++++++++++++ .../_prompt/system_template.jinja2 | 7 +- 25 files changed, 604 insertions(+), 31 deletions(-) create mode 100644 cli/decompose/m_decomp_result_v2.py.jinja2 create mode 100644 cli/decompose/prompt_modules/validation_code_generator/__init__.py create mode 100644 cli/decompose/prompt_modules/validation_code_generator/_exceptions.py create mode 100644 cli/decompose/prompt_modules/validation_code_generator/_prompt/__init__.py create mode 100644 cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/__init__.py create mode 100644 cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_1/__init__.py create mode 100644 cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_1/_example.py create mode 100644 cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_2/__init__.py create mode 100644 cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_2/_example.py create mode 100644 cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_3/__init__.py create mode 100644 cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_3/_example.py create mode 100644 cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_4/__init__.py create mode 100644 cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_4/_example.py create mode 100644 cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_icl_examples.py create mode 100644 cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_types.py create mode 100644 cli/decompose/prompt_modules/validation_code_generator/_prompt/_prompt.py create mode 100644 cli/decompose/prompt_modules/validation_code_generator/_prompt/system_template.jinja2 create mode 100644 cli/decompose/prompt_modules/validation_code_generator/_prompt/user_template.jinja2 create mode 100644 cli/decompose/prompt_modules/validation_code_generator/_validation_code_generator.py diff --git a/cli/decompose/decompose.py b/cli/decompose/decompose.py index 0b193d351..ae22f5433 100644 --- a/cli/decompose/decompose.py +++ b/cli/decompose/decompose.py @@ -1,6 +1,6 @@ import json import keyword -import re +import shutil from enum import Enum from graphlib import TopologicalSorter from pathlib import Path @@ -16,7 +16,7 @@ class DecompVersion(str, Enum): latest = "latest" v1 = "v1" - # v2 = "v2" + v2 = "v2" this_file_dir = Path(__file__).resolve().parent @@ -307,27 +307,40 @@ def run( backend_api_key=backend_api_key, ) - # Verify that all user variables are properly defined before use - # This may reorder subtasks if dependencies are out of order - decomp_data = verify_user_variables(decomp_data, input_var) + decomp_dir = out_dir / out_name + val_fn_dir = decomp_dir / "validations" + val_fn_dir.mkdir(parents=True) - with open(out_dir / f"{out_name}.json", "w") as f: + (val_fn_dir / "__init__.py").touch() + + for constraint in decomp_data["identified_constraints"]: + if constraint["val_fn"] is not None: + with open(val_fn_dir / f"{constraint['val_fn_name']}.py", "w") as f: + f.write(constraint["val_fn"] + "\n") + + with open(decomp_dir / f"{out_name}.json", "w") as f: json.dump(decomp_data, f, indent=2) - with open(out_dir / f"{out_name}.py", "w") as f: + with open(decomp_dir / f"{out_name}.py", "w") as f: f.write( m_template.render( - subtasks=decomp_data["subtasks"], user_inputs=input_var + subtasks=decomp_data["subtasks"], + user_inputs=input_var, + identified_constraints=decomp_data["identified_constraints"], ) + "\n" ) except Exception: - created_json = Path(out_dir / f"{out_name}.json") - created_py = Path(out_dir / f"{out_name}.py") + # created_json = Path(out_dir / f"{out_name}.json") + # created_py = Path(out_dir / f"{out_name}.py") + + # if created_json.exists() and created_json.is_file(): + # created_json.unlink() + # if created_py.exists() and created_py.is_file(): + # created_py.unlink() - if created_json.exists() and created_json.is_file(): - created_json.unlink() - if created_py.exists() and created_py.is_file(): - created_py.unlink() + decomp_dir = out_dir / out_name + if decomp_dir.exists() and decomp_dir.is_dir(): + shutil.rmtree(decomp_dir) raise Exception diff --git a/cli/decompose/m_decomp_result_v1.py.jinja2 b/cli/decompose/m_decomp_result_v1.py.jinja2 index 7aa1d54f4..1f1e3646e 100644 --- a/cli/decompose/m_decomp_result_v1.py.jinja2 +++ b/cli/decompose/m_decomp_result_v1.py.jinja2 @@ -4,6 +4,14 @@ import os import textwrap import mellea +{%- if "code" in identified_constraints | map(attribute="val_strategy") %} +from mellea.stdlib.requirement import req +{% for c in identified_constraints %} +{%- if c.val_fn %} +from validations.{{ c.val_fn_name }} import validate_input as {{ c.val_fn_name }} +{%- endif %} +{%- endfor %} +{%- endif %} m = mellea.start_session() {%- if user_inputs %} @@ -30,7 +38,14 @@ except KeyError as e: {%- if item.constraints %} requirements=[ {%- for c in item.constraints %} + {%- if c.val_fn %} + req( + {{ c.constraint | tojson}}, + validation_fn={{ c.val_fn_name }}, + ), + {%- else %} {{ c.constraint | tojson}}, + {%- endif %} {%- endfor %} ], {%- else %} diff --git a/cli/decompose/m_decomp_result_v2.py.jinja2 b/cli/decompose/m_decomp_result_v2.py.jinja2 new file mode 100644 index 000000000..9b1bb13c6 --- /dev/null +++ b/cli/decompose/m_decomp_result_v2.py.jinja2 @@ -0,0 +1,91 @@ +{% if user_inputs -%} +import os +{% endif -%} +import textwrap + +import mellea +{%- if "code" in identified_constraints | map(attribute="val_strategy") %} +from mellea.stdlib.requirement import req +{% for c in identified_constraints %} +{%- if c.val_fn %} +from validations.{{ c.val_fn_name }} import validate_input as {{ c.val_fn_name }} +{%- endif %} +{%- endfor %} +{%- endif %} + +m = mellea.start_session() +{%- if user_inputs %} + + +# User Input Variables +try: + {%- for var in user_inputs %} + {{ var | lower }} = os.environ["{{ var | upper }}"] + {%- endfor %} +except KeyError as e: + print(f"ERROR: One or more required environment variables are not set; {e}") + exit(1) +{%- endif %} +{%- for item in subtasks %} + + +{{ item.tag | lower }}_gnrl = textwrap.dedent( + R""" + {{ item.general_instructions | trim | indent(width=4, first=False) }} + """.strip() +) +{{ item.tag | lower }} = m.instruct( + {%- if not item.input_vars_required %} + {{ item.subtask[3:] | trim | tojson }}, + {%- else %} + textwrap.dedent( + R""" + {{ item.subtask[3:] | trim }} + + Here are the input variables and their content: + {%- for var in item.input_vars_required %} + + - {{ var | upper }} = {{ "{{" }}{{ var | upper }}{{ "}}" }} + {%- endfor %} + """.strip() + ), + {%- endif %} + {%- if item.constraints %} + requirements=[ + {%- for c in item.constraints %} + {%- if c.val_fn %} + req( + {{ c.constraint | tojson}}, + validation_fn={{ c.val_fn_name }}, + ), + {%- else %} + {{ c.constraint | tojson}}, + {%- endif %} + {%- endfor %} + ], + {%- else %} + requirements=None, + {%- endif %} + {%- if item.input_vars_required %} + user_variables={ + {%- for var in item.input_vars_required %} + {{ var | upper | tojson }}: {{ var | lower }}, + {%- endfor %} + }, + {%- endif %} + grounding_context={ + "GENERAL_INSTRUCTIONS": {{ item.tag | lower }}_gnrl, + {%- for var in item.depends_on %} + {{ var | upper | tojson }}: {{ var | lower }}.value, + {%- endfor %} + }, +) +assert {{ item.tag | lower }}.value is not None, 'ERROR: task "{{ item.tag | lower }}" execution failed' +{%- if loop.last %} + + +final_answer = {{ item.tag | lower }}.value + +print(final_answer) +{%- endif -%} +{%- endfor -%} diff --git a/cli/decompose/pipeline.py b/cli/decompose/pipeline.py index a574d35d6..f2bc79291 100644 --- a/cli/decompose/pipeline.py +++ b/cli/decompose/pipeline.py @@ -11,10 +11,11 @@ from .prompt_modules import ( constraint_extractor, - # general_instructions, + general_instructions, subtask_constraint_assign, subtask_list, subtask_prompt_generator, + validation_code_generator, validation_decision, ) from .prompt_modules.subtask_constraint_assign import SubtaskPromptConstraintsItem @@ -22,9 +23,16 @@ from .prompt_modules.subtask_prompt_generator import SubtaskPromptItem +class ConstraintValData(TypedDict): + val_strategy: Literal["code", "llm"] + val_fn: str | None + + class ConstraintResult(TypedDict): constraint: str - validation_strategy: str + val_strategy: Literal["code", "llm"] + val_fn: str | None + val_fn_name: str class DecompSubtasksResult(TypedDict): @@ -32,7 +40,7 @@ class DecompSubtasksResult(TypedDict): tag: str constraints: list[ConstraintResult] prompt_template: str - # general_instructions: str + general_instructions: str input_vars_required: list[str] depends_on: list[str] generated_response: NotRequired[str] @@ -72,7 +80,9 @@ def decompose( case DecompBackend.ollama: m_session = MelleaSession( OllamaModelBackend( - model_id=model_id, model_options={ModelOption.CONTEXT_WINDOW: 16384} + model_id=model_id, + base_url=backend_endpoint, + model_options={ModelOption.CONTEXT_WINDOW: 16384}, ) ) case DecompBackend.openai: @@ -115,11 +125,27 @@ def decompose( m_session, task_prompt, enforce_same_words=False ).parse() - constraint_validation_strategies: dict[str, Literal["code", "llm"]] = { - cons_key: validation_decision.generate(m_session, cons_key).parse() + constraint_val_strategy: dict[ + str, dict[Literal["val_strategy"], Literal["code", "llm"]] + ] = { + cons_key: { + "val_strategy": validation_decision.generate(m_session, cons_key).parse() + } for cons_key in task_prompt_constraints } + constraint_val_data: dict[str, ConstraintValData] = {} + + for cons_key in constraint_val_strategy: + constraint_val_data[cons_key] = { + "val_strategy": constraint_val_strategy[cons_key]["val_strategy"], + "val_fn": None, + } + if constraint_val_data[cons_key]["val_strategy"] == "code": + constraint_val_data[cons_key]["val_fn"] = ( + validation_code_generator.generate(m_session, cons_key).parse() + ) + subtask_prompts: list[SubtaskPromptItem] = subtask_prompt_generator.generate( m_session, task_prompt, @@ -142,14 +168,21 @@ def decompose( constraints=[ { "constraint": cons_str, - "validation_strategy": constraint_validation_strategies[cons_str], + "val_strategy": constraint_val_data[cons_str]["val_strategy"], + "val_fn_name": f"val_fn_{task_prompt_constraints.index(cons_str) + 1}", + # >> Always include generated "val_fn" code (experimental) + "val_fn": constraint_val_data[cons_str]["val_fn"], + # >> Include generated "val_fn" code only for the last subtask (experimental) + # "val_fn": constraint_val_data[cons_str]["val_fn"] + # if subtask_i + 1 == len(subtask_prompts_with_constraints) + # else None, } for cons_str in subtask_data.constraints ], prompt_template=subtask_data.prompt_template, - # general_instructions=general_instructions.generate( - # m_session, input_str=subtask_data.prompt_template - # ).parse(), + general_instructions=general_instructions.generate( + m_session, input_str=subtask_data.prompt_template + ).parse(), input_vars_required=list( dict.fromkeys( # Remove duplicates while preserving the original order. [ @@ -173,7 +206,7 @@ def decompose( ) ), ) - for subtask_data in subtask_prompts_with_constraints + for subtask_i, subtask_data in enumerate(subtask_prompts_with_constraints) ] return DecompPipelineResult( @@ -182,9 +215,11 @@ def decompose( identified_constraints=[ { "constraint": cons_str, - "validation_strategy": constraint_validation_strategies[cons_str], + "val_strategy": constraint_val_data[cons_str]["val_strategy"], + "val_fn": constraint_val_data[cons_str]["val_fn"], + "val_fn_name": f"val_fn_{cons_i + 1}", } - for cons_str in task_prompt_constraints + for cons_i, cons_str in enumerate(task_prompt_constraints) ], subtasks=decomp_subtask_result, ) diff --git a/cli/decompose/prompt_modules/__init__.py b/cli/decompose/prompt_modules/__init__.py index 19b7079e3..922bdcbe8 100644 --- a/cli/decompose/prompt_modules/__init__.py +++ b/cli/decompose/prompt_modules/__init__.py @@ -7,4 +7,7 @@ from .subtask_prompt_generator import ( subtask_prompt_generator as subtask_prompt_generator, ) +from .validation_code_generator import ( + validation_code_generator as validation_code_generator, +) from .validation_decision import validation_decision as validation_decision diff --git a/cli/decompose/prompt_modules/subtask_constraint_assign/_prompt/system_template.jinja2 b/cli/decompose/prompt_modules/subtask_constraint_assign/_prompt/system_template.jinja2 index 30baaf93a..e5cad42e7 100644 --- a/cli/decompose/prompt_modules/subtask_constraint_assign/_prompt/system_template.jinja2 +++ b/cli/decompose/prompt_modules/subtask_constraint_assign/_prompt/system_template.jinja2 @@ -8,7 +8,7 @@ You will be provided with the following 4 parameters inside their respective tag 4. : A list of candidate (possible) constraints that can be assigned to the target task. -The list contain the constraints of all tasks on the , your job is to filter and select only the constraints belonging to your target task. +The is a list of constraints identified for the entire , your job is to filter and select only the constraints belonging to your target task. It is possible that none of the constraints in the are relevant or related to your target task. Below, enclosed in tags, are instructions to guide you on how to complete your assignment: diff --git a/cli/decompose/prompt_modules/validation_code_generator/__init__.py b/cli/decompose/prompt_modules/validation_code_generator/__init__.py new file mode 100644 index 000000000..dfb4bd0ce --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/__init__.py @@ -0,0 +1,7 @@ +from ._exceptions import ( + BackendGenerationError as BackendGenerationError, + TagExtractionError as TagExtractionError, +) +from ._validation_code_generator import ( + validation_code_generator as validation_code_generator, +) diff --git a/cli/decompose/prompt_modules/validation_code_generator/_exceptions.py b/cli/decompose/prompt_modules/validation_code_generator/_exceptions.py new file mode 100644 index 000000000..d808b613d --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_exceptions.py @@ -0,0 +1,24 @@ +from typing import Any + + +class ValidationCodeGeneratorError(Exception): + def __init__(self, error_message: str, **kwargs: dict[str, Any]): + self.error_message = error_message + self.__dict__.update(kwargs) + super().__init__( + f'Module Error "validation_code_generator"; {self.error_message}' + ) + + +class BackendGenerationError(ValidationCodeGeneratorError): + """Raised when LLM generation fails in the "validation_code_generator" prompt module.""" + + def __init__(self, error_message: str, **kwargs: dict[str, Any]): + super().__init__(error_message, **kwargs) + + +class TagExtractionError(ValidationCodeGeneratorError): + """Raised when tag extraction fails in the "validation_code_generator" prompt module.""" + + def __init__(self, error_message: str, **kwargs: dict[str, Any]): + super().__init__(error_message, **kwargs) diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/__init__.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/__init__.py new file mode 100644 index 000000000..0b985cbe6 --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/__init__.py @@ -0,0 +1,5 @@ +from ._icl_examples import icl_examples as default_icl_examples +from ._prompt import ( + get_system_prompt as get_system_prompt, + get_user_prompt as get_user_prompt, +) diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/__init__.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/__init__.py new file mode 100644 index 000000000..052fe7c99 --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/__init__.py @@ -0,0 +1,2 @@ +from ._icl_examples import icl_examples as icl_examples +from ._types import ICLExample as ICLExample diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_1/__init__.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_1/__init__.py new file mode 100644 index 000000000..1f9f32ea5 --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_1/__init__.py @@ -0,0 +1 @@ +from ._example import example as example diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_1/_example.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_1/_example.py new file mode 100644 index 000000000..9bb4e23da --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_1/_example.py @@ -0,0 +1,24 @@ +# ruff: noqa: W293 +from .._types import ICLExample + +constraint_requirement = """You must not use any uppercase letters""" + +validation_function = """def validate_input(input: str) -> bool: + \""" + Validates that the input contains only lowercase letters. + + Args: + input (str): The input to validate + + Returns: + bool: True if all characters are lowercase, False otherwise + \""" + try: + return answer.islower() + except Exception: + return False""" + +example: ICLExample = { + "constraint_requirement": constraint_requirement, + "validation_function": validation_function, +} diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_2/__init__.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_2/__init__.py new file mode 100644 index 000000000..1f9f32ea5 --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_2/__init__.py @@ -0,0 +1 @@ +from ._example import example as example diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_2/_example.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_2/_example.py new file mode 100644 index 000000000..6e2d98fe0 --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_2/_example.py @@ -0,0 +1,31 @@ +# ruff: noqa: W293 +from .._types import ICLExample + +constraint_requirement = """The answer must be a JSON with the following keys: +1. "subject" +2. "content\"""" + +validation_function = """import json + +def validate_input(input: str) -> bool: + \""" + Validates that the input is a JSON with required keys: subject and content. + + Args: + input (str): The input to validate + + Returns: + bool: True if JSON has required keys, False otherwise + \""" + try: + data = json.loads(response) + return isinstance(data, dict) and "subject" in data and "content" in data + except json.JSONDecodeError: + return False + except Exception: + return False""" + +example: ICLExample = { + "constraint_requirement": constraint_requirement, + "validation_function": validation_function, +} diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_3/__init__.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_3/__init__.py new file mode 100644 index 000000000..1f9f32ea5 --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_3/__init__.py @@ -0,0 +1 @@ +from ._example import example as example diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_3/_example.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_3/_example.py new file mode 100644 index 000000000..65070a6ed --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_3/_example.py @@ -0,0 +1,58 @@ +# ruff: noqa: W293 +from .._types import ICLExample + +constraint_requirement = "Return a list of requirements, using dash bullets (-), where each item begins with the relevant entity" + +validation_function = """def validate_input(input: str) -> bool: + \""" + Validates that the input is a list of requirements using dash bullets, + where each item begins with the relevant entity. + + Args: + input (str): The input to validate + + Returns: + bool: True if input follows the required format, False otherwise + \""" + try: + if not input or not isinstance(input, str): + return False + + lines = input.strip().split('\n') + + # Check if all lines are empty + if not any(line.strip() for line in lines): + return False + + for line in lines: + line = line.strip() + + # Skip empty lines + if not line: + continue + + # Check if line starts with a dash bullet + if not line.startswith('- '): + return False + + # Check if there's content after the dash bullet + content = line[2:].strip() # Remove '- ' prefix + if not content: + return False + + # Check if content has an entity (word) at the beginning + words = content.split() + if not words: + return False + + # Entity should be the first word - just check it exists + # We're not validating what constitutes a valid entity here + + return True + except Exception: + return False""" + +example: ICLExample = { + "constraint_requirement": constraint_requirement, + "validation_function": validation_function, +} diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_4/__init__.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_4/__init__.py new file mode 100644 index 000000000..1f9f32ea5 --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_4/__init__.py @@ -0,0 +1 @@ +from ._example import example as example diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_4/_example.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_4/_example.py new file mode 100644 index 000000000..f1af01ab1 --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_4/_example.py @@ -0,0 +1,31 @@ +# ruff: noqa: W293 +from .._types import ICLExample + +constraint_requirement = 'Avoid the words "daughter-in-law" and "grandson"' + +validation_function = """def validate_input(input: str) -> bool: + \""" + Validates that the input does not contain the words "daughter-in-law" and "grandson". + + Args: + input (str): The input to validate + + Returns: + bool: True if neither word is found, False otherwise + \""" + try: + if not input: + return False + + # Convert to lowercase for case-insensitive comparison + input_lower = input.lower() + + # Check if either forbidden word is present + return "daughter-in-law" not in input_lower and "grandson" not in input_lower + except Exception: + return False""" + +example: ICLExample = { + "constraint_requirement": constraint_requirement, + "validation_function": validation_function, +} diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_icl_examples.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_icl_examples.py new file mode 100644 index 000000000..c018d2e41 --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_icl_examples.py @@ -0,0 +1,5 @@ +from ._example_1 import example as example_1 +from ._example_2 import example as example_2 +from ._types import ICLExample + +icl_examples: list[ICLExample] = [example_1, example_2] diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_types.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_types.py new file mode 100644 index 000000000..bdd1f2372 --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_types.py @@ -0,0 +1,6 @@ +from typing import TypedDict + + +class ICLExample(TypedDict): + constraint_requirement: str + validation_function: str diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/_prompt.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_prompt.py new file mode 100644 index 000000000..b324180fc --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_prompt.py @@ -0,0 +1,19 @@ +from pathlib import Path + +from jinja2 import Environment, FileSystemLoader + +from ._icl_examples import ICLExample, icl_examples as default_icl_examples + +this_file_dir = Path(__file__).resolve().parent + +environment = Environment(loader=FileSystemLoader(this_file_dir), autoescape=False) +system_template = environment.get_template("system_template.jinja2") +user_template = environment.get_template("user_template.jinja2") + + +def get_system_prompt(icl_examples: list[ICLExample] = default_icl_examples) -> str: + return system_template.render(icl_examples=icl_examples).strip() + + +def get_user_prompt(constraint_requirement: str) -> str: + return user_template.render(constraint_requirement=constraint_requirement).strip() diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/system_template.jinja2 b/cli/decompose/prompt_modules/validation_code_generator/_prompt/system_template.jinja2 new file mode 100644 index 000000000..7b414d0d5 --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/system_template.jinja2 @@ -0,0 +1,77 @@ +You are a Python developer specialized in writing validation functions based on natural language constraints or requirements. + +## Function Requirements + +You will be provided with a constraint/requirement inside the tags. +Your task is to write a Python function capable of validating the against a text input to your function. + +Your code must: +- Be a single Python function. +- Take exactly one string parameter (the text input to be validated). +- Return a boolean value (True if valid or False if invalid). +- Use only standard Python libraries. No third-party dependencies. +- Be deterministic and self-contained. +- If the constraint/requirement mentions data that was not provided, just return `False` (don't need to implement code). + +## Output Format + +Your response must be structured as follows: +- Your Python function must be inside the tags. +- The function signature must be: `def validate_input(input: str) -> bool:`. +- Always enclose your code on a "try..except Exception:" clause and return `False` in case of exceptions. + +## Examples + +Here are some complete examples showing constraints/requirements and their corresponding validation functions: + +{% for item in icl_examples -%} + + +{{ item["constraint_requirement"] }} + + +{{ item["validation_function"] }} + + +All tags are closed and my assignment is finished. + + +{% endfor -%} +That concludes the complete examples of your assignment. + +## Additional Instructions + +When writing your answer, follow these additional instructions below to be successful: +1. The function signature must be: `def validate_input(input: str) -> bool:` +2. The function must handle `None` and empty string inputs by returning `False` +3. Use appropriate Python standard library modules (re, json, etc.) as needed +4. Ensure the function is simple and doesn't have unnecessary complexity +5. The validation logic should directly correspond to the provided constraint/requirement + +## Common Validation Patterns + +Here are some typical validation scenarios you might encounter: + +1. Character limit validation: + - Check if the answer has a specific number of characters or words + - Example: "The answer must be less than 100 characters" + +2. Format validation: + - Validate JSON structure, XML format, or other structured data + - Example: "The answer must be valid JSON with 'name' and 'age' fields" + +3. Content validation: + - Check for specific content patterns like uppercase letters, numbers, etc. + - Example: "The answer must contain at least one uppercase letter" + +4. Pattern matching: + - Use regex to validate specific patterns + - Example: "The answer must be in the format 'Name: [text], Age: [number]'" + +Important: Use only standard Python libraries that don't require additional installation. +Important: Your function must be deterministic and produce consistent results. +Important: You must always close the tags that were opened by using their corresponding close tag. You will be penalized if you don't close all tags. + +Very Important: After closing all tags, finish your assignment by writing (without the double quotes): "All tags are closed and my assignment is finished.". +Very Important: Always enclose your code on a "try..except Exception:" clause and return `False` in case of exceptions. +Very Important: If the constraint/requirement is not clear, or missing information, just return `False`. diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/user_template.jinja2 b/cli/decompose/prompt_modules/validation_code_generator/_prompt/user_template.jinja2 new file mode 100644 index 000000000..867af52e4 --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/user_template.jinja2 @@ -0,0 +1,9 @@ +## Requirements: +- The function name must be: "validate_input" +- The function signature must be: `def validate_input(input: str) -> bool:` + +Now, here is the constraint/requirement for you to write a Python validation function: + + +{{ constraint_requirement }} + diff --git a/cli/decompose/prompt_modules/validation_code_generator/_validation_code_generator.py b/cli/decompose/prompt_modules/validation_code_generator/_validation_code_generator.py new file mode 100644 index 000000000..9ebe35a6e --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_validation_code_generator.py @@ -0,0 +1,113 @@ +import re +from collections.abc import Callable +from typing import Any, TypeVar, final + +from mellea import MelleaSession +from mellea.backends.types import ModelOption +from mellea.stdlib.chat import Message + +from .._prompt_modules import PromptModule, PromptModuleString +from ._exceptions import BackendGenerationError, TagExtractionError +from ._prompt import get_system_prompt, get_user_prompt + +T = TypeVar("T") + +RE_VALIDATION_FUNCTION = re.compile( + r"(.+?)", flags=re.IGNORECASE | re.DOTALL +) + + +@final +class _ValidationCodeGenerator(PromptModule): + @staticmethod + def _default_parser(generated_str: str) -> str: + r"""Default parser of the `validation_code_generator` module. + + _**Disclaimer**: This is a LLM-prompting module, so the results will vary depending + on the size and capabilities of the LLM used. The results are also not guaranteed, so + take a look at this module's Exceptions and plan for unreliable results._ + + Args: + generated_str (`str`): The LLM's answer to be parsed. + + Returns: + str: The extracted Python validation function code. + + Raises: + TagExtractionError: An error occurred trying to extract content from the + generated output. The LLM probably failed to open and close + the \ tags. + """ + validation_function_match = re.search(RE_VALIDATION_FUNCTION, generated_str) + + validation_function_str: str | None = ( + validation_function_match.group(1).strip() + if validation_function_match + else None + ) + + if validation_function_str is None: + raise TagExtractionError( + 'LLM failed to generate correct tags for extraction: ""' + ) + + return validation_function_str + + def generate( + self, + mellea_session: MelleaSession, + input_str: str | None, + max_new_tokens: int = 4096, + parser: Callable[[str], T] = _default_parser, # type: ignore[assignment] + # About the mypy ignore above: https://github.com/python/mypy/issues/3737 + **kwargs: dict[str, Any], + ) -> PromptModuleString[T]: + """Generates a Python validation function based on a provided constraint/requirement. + + Args: + mellea_session (`MelleaSession`): A mellea session with a backend. + input_str (`str`): Natural language constraint/requirement to generate validation code for. + prompt (`str`, optional): The original task prompt for context. Defaults to None. + max_new_tokens (`int`, optional): Maximum tokens to generate. + Defaults to `4096`. + parser (`Callable[[str], Any]`, optional): A string parsing function. + Defaults to `_ValidationCodeGenerator._default_parser`. + + Returns: + PromptModuleString: A `PromptModuleString` class containing the generated output. + + The `PromptModuleString` class behaves like a `str`, but with an additional `parse()` method + to execute the parsing function passed in the `parser` argument of + this method (the `parser` argument defaults to `_ValidationCodeGenerator._default_parser`). + + Raises: + BackendGenerationError: Some error occurred during the LLM generation call. + """ + assert input_str is not None, 'This module requires the "input_str" argument' + + system_prompt = get_system_prompt() + user_prompt = get_user_prompt(constraint_requirement=input_str) + + action = Message("user", user_prompt) + + try: + gen_result = mellea_session.act( + action=action, + model_options={ + ModelOption.SYSTEM_PROMPT: system_prompt, + ModelOption.TEMPERATURE: 0, + ModelOption.MAX_NEW_TOKENS: max_new_tokens, + }, + ).value + except Exception as e: + raise BackendGenerationError(f"LLM generation failed: {e}") + + if gen_result is None: + raise BackendGenerationError( + "LLM generation failed: value attribute is None" + ) + + return PromptModuleString(gen_result, parser) + + +validation_code_generator = _ValidationCodeGenerator() diff --git a/cli/decompose/prompt_modules/validation_decision/_prompt/system_template.jinja2 b/cli/decompose/prompt_modules/validation_decision/_prompt/system_template.jinja2 index 8e5cb00fb..fc0be317c 100644 --- a/cli/decompose/prompt_modules/validation_decision/_prompt/system_template.jinja2 +++ b/cli/decompose/prompt_modules/validation_decision/_prompt/system_template.jinja2 @@ -1,14 +1,14 @@ -You are a Validation Decision Expert specialized in determining whether prompt requirements can be validated deterministically by writing Python code or if they're best suited for LLM validation. +You are a Validation Decision Expert specialized in determining whether prompt requirements can be validated deterministically, by writing Python code, or if they're best suited for LLM validation. ## Decision Criteria ### Code Validation A requirement should be classified as "code" if it: -- Can be checked with deterministic algorithms +- Can be validated deterministically - Involves structured data validation (e.g., JSON schema, regex patterns) -- Requires mathematical computations or logical operations - Can be validated with simple string operations - Has clearly defined success/failure criteria that can be programmatically determined +- Is a straightforward requirement to validate the task output ### LLM Validation A requirement should be classified as "llm" if it: @@ -59,6 +59,7 @@ When writing your answer, follow these additional instructions below to be succe 3. After closing all tags, finish your assignment by writing (without the double quotes): "All tags are closed and my assignment is finished." Important: You must always close the tags that were opened by using their corresponding close tag. You will be penalized if you don't close all tags. +Important: The "code" classification is usually for validating the task output format or other deterministic requirements. Your response must contain exactly one of these two words inside tags: - code From 6b7f53c45c92a2c834664dd30079769753c4a2b6 Mon Sep 17 00:00:00 2001 From: Bobby Date: Mon, 12 Jan 2026 16:27:39 +0000 Subject: [PATCH 10/13] fix: constraint extractor --- .../constraint_extractor/_constraint_extractor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/decompose/prompt_modules/constraint_extractor/_constraint_extractor.py b/cli/decompose/prompt_modules/constraint_extractor/_constraint_extractor.py index 8e7d9bd2f..c86a569e3 100644 --- a/cli/decompose/prompt_modules/constraint_extractor/_constraint_extractor.py +++ b/cli/decompose/prompt_modules/constraint_extractor/_constraint_extractor.py @@ -140,4 +140,4 @@ def generate( # type: ignore[override] return PromptModuleString(gen_result, parser) -constraint_extractor = _ConstraintExtractor() +constraint_extractor = _ConstraintExtractor() \ No newline at end of file From afc4ca95c968ade1f3f841e1a678dc475adffb9e Mon Sep 17 00:00:00 2001 From: csbobby Date: Thu, 26 Feb 2026 16:00:27 +0000 Subject: [PATCH 11/13] clean: pre-commit clean --- cli/decompose/decompose.py | 9 ++------- .../constraint_extractor/_constraint_extractor.py | 2 +- .../_validation_code_generator.py | 4 ++-- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/cli/decompose/decompose.py b/cli/decompose/decompose.py index ae22f5433..6441488d9 100644 --- a/cli/decompose/decompose.py +++ b/cli/decompose/decompose.py @@ -1,5 +1,6 @@ import json import keyword +import re import shutil from enum import Enum from graphlib import TopologicalSorter @@ -17,6 +18,7 @@ class DecompVersion(str, Enum): latest = "latest" v1 = "v1" v2 = "v2" + # v3 = "v3" this_file_dir = Path(__file__).resolve().parent @@ -331,13 +333,6 @@ def run( + "\n" ) except Exception: - # created_json = Path(out_dir / f"{out_name}.json") - # created_py = Path(out_dir / f"{out_name}.py") - - # if created_json.exists() and created_json.is_file(): - # created_json.unlink() - # if created_py.exists() and created_py.is_file(): - # created_py.unlink() decomp_dir = out_dir / out_name if decomp_dir.exists() and decomp_dir.is_dir(): diff --git a/cli/decompose/prompt_modules/constraint_extractor/_constraint_extractor.py b/cli/decompose/prompt_modules/constraint_extractor/_constraint_extractor.py index c86a569e3..8e7d9bd2f 100644 --- a/cli/decompose/prompt_modules/constraint_extractor/_constraint_extractor.py +++ b/cli/decompose/prompt_modules/constraint_extractor/_constraint_extractor.py @@ -140,4 +140,4 @@ def generate( # type: ignore[override] return PromptModuleString(gen_result, parser) -constraint_extractor = _ConstraintExtractor() \ No newline at end of file +constraint_extractor = _ConstraintExtractor() diff --git a/cli/decompose/prompt_modules/validation_code_generator/_validation_code_generator.py b/cli/decompose/prompt_modules/validation_code_generator/_validation_code_generator.py index 9ebe35a6e..55949c963 100644 --- a/cli/decompose/prompt_modules/validation_code_generator/_validation_code_generator.py +++ b/cli/decompose/prompt_modules/validation_code_generator/_validation_code_generator.py @@ -3,8 +3,8 @@ from typing import Any, TypeVar, final from mellea import MelleaSession -from mellea.backends.types import ModelOption -from mellea.stdlib.chat import Message +from mellea.backends import ModelOption +from mellea.stdlib.components.chat import Message from .._prompt_modules import PromptModule, PromptModuleString from ._exceptions import BackendGenerationError, TagExtractionError From 048663abf7012e83e954ce76837e68d145a5b27e Mon Sep 17 00:00:00 2001 From: csbobby Date: Thu, 26 Feb 2026 16:00:56 +0000 Subject: [PATCH 12/13] fix: test --- test/decompose/test_decompose.py | 50 ++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/test/decompose/test_decompose.py b/test/decompose/test_decompose.py index a04f92b25..c51683fee 100644 --- a/test/decompose/test_decompose.py +++ b/test/decompose/test_decompose.py @@ -23,6 +23,7 @@ def test_no_dependencies(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -31,6 +32,7 @@ def test_no_dependencies(self): { "subtask": "Task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -39,6 +41,7 @@ def test_no_dependencies(self): { "subtask": "Task C", "tag": "TASK_C", + "general_instructions": "", "constraints": [], "prompt_template": "Do C", "input_vars_required": [], @@ -60,6 +63,7 @@ def test_simple_linear_dependency(self): { "subtask": "Task C", "tag": "TASK_C", + "general_instructions": "", "constraints": [], "prompt_template": "Do C", "input_vars_required": [], @@ -68,6 +72,7 @@ def test_simple_linear_dependency(self): { "subtask": "Task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -76,6 +81,7 @@ def test_simple_linear_dependency(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -97,6 +103,7 @@ def test_diamond_dependency(self): { "subtask": "Task D", "tag": "TASK_D", + "general_instructions": "", "constraints": [], "prompt_template": "Do D", "input_vars_required": [], @@ -105,6 +112,7 @@ def test_diamond_dependency(self): { "subtask": "Task C", "tag": "TASK_C", + "general_instructions": "", "constraints": [], "prompt_template": "Do C", "input_vars_required": [], @@ -113,6 +121,7 @@ def test_diamond_dependency(self): { "subtask": "Task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -121,6 +130,7 @@ def test_diamond_dependency(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -142,6 +152,7 @@ def test_case_insensitive_dependencies(self): { "subtask": "Task B", "tag": "task_b", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -150,6 +161,7 @@ def test_case_insensitive_dependencies(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -170,6 +182,7 @@ def test_multiple_independent_chains(self): { "subtask": "Task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -178,6 +191,7 @@ def test_multiple_independent_chains(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -187,6 +201,7 @@ def test_multiple_independent_chains(self): { "subtask": "Task D", "tag": "TASK_D", + "general_instructions": "", "constraints": [], "prompt_template": "Do D", "input_vars_required": [], @@ -195,6 +210,7 @@ def test_multiple_independent_chains(self): { "subtask": "Task C", "tag": "TASK_C", + "general_instructions": "", "constraints": [], "prompt_template": "Do C", "input_vars_required": [], @@ -219,6 +235,7 @@ def test_nonexistent_dependency_ignored(self): { "subtask": "Task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -230,6 +247,7 @@ def test_nonexistent_dependency_ignored(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -250,6 +268,7 @@ def test_renumbers_subtask_descriptions(self): { "subtask": "3. Do task C", "tag": "TASK_C", + "general_instructions": "", "constraints": [], "prompt_template": "Do C", "input_vars_required": [], @@ -258,6 +277,7 @@ def test_renumbers_subtask_descriptions(self): { "subtask": "2. Do task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -266,6 +286,7 @@ def test_renumbers_subtask_descriptions(self): { "subtask": "1. Do task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -287,6 +308,7 @@ def test_renumbers_only_numbered_subtasks(self): { "subtask": "2. Numbered task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -295,6 +317,7 @@ def test_renumbers_only_numbered_subtasks(self): { "subtask": "Unnumbered task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -315,6 +338,7 @@ def test_renumbers_with_complex_reordering(self): { "subtask": "4. Final task", "tag": "TASK_D", + "general_instructions": "", "constraints": [], "prompt_template": "Do D", "input_vars_required": [], @@ -323,6 +347,7 @@ def test_renumbers_with_complex_reordering(self): { "subtask": "3. Third task", "tag": "TASK_C", + "general_instructions": "", "constraints": [], "prompt_template": "Do C", "input_vars_required": [], @@ -331,6 +356,7 @@ def test_renumbers_with_complex_reordering(self): { "subtask": "2. Second task", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -339,6 +365,7 @@ def test_renumbers_with_complex_reordering(self): { "subtask": "1. First task", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -366,6 +393,7 @@ def test_circular_dependency_two_nodes(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -374,6 +402,7 @@ def test_circular_dependency_two_nodes(self): { "subtask": "Task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -390,6 +419,7 @@ def test_circular_dependency_three_nodes(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -398,6 +428,7 @@ def test_circular_dependency_three_nodes(self): { "subtask": "Task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -406,6 +437,7 @@ def test_circular_dependency_three_nodes(self): { "subtask": "Task C", "tag": "TASK_C", + "general_instructions": "", "constraints": [], "prompt_template": "Do C", "input_vars_required": [], @@ -422,6 +454,7 @@ def test_self_dependency(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -459,6 +492,7 @@ def test_no_input_vars_no_dependencies(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -482,6 +516,7 @@ def test_valid_input_vars(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A with {{ USER_INPUT }}", "input_vars_required": ["USER_INPUT"], @@ -504,6 +539,7 @@ def test_case_insensitive_input_vars(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": ["user_input"], # lowercase @@ -527,6 +563,7 @@ def test_valid_dependencies_in_order(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -535,6 +572,7 @@ def test_valid_dependencies_in_order(self): { "subtask": "Task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -559,6 +597,7 @@ def test_dependencies_out_of_order_triggers_reorder(self): { "subtask": "Task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -567,6 +606,7 @@ def test_dependencies_out_of_order_triggers_reorder(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -591,6 +631,7 @@ def test_complex_reordering(self): { "subtask": "Task D", "tag": "TASK_D", + "general_instructions": "", "constraints": [], "prompt_template": "Do D", "input_vars_required": [], @@ -599,6 +640,7 @@ def test_complex_reordering(self): { "subtask": "Task C", "tag": "TASK_C", + "general_instructions": "", "constraints": [], "prompt_template": "Do C", "input_vars_required": [], @@ -607,6 +649,7 @@ def test_complex_reordering(self): { "subtask": "Task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -615,6 +658,7 @@ def test_complex_reordering(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -643,6 +687,7 @@ def test_missing_required_input_var(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": ["MISSING_VAR"], @@ -667,6 +712,7 @@ def test_missing_required_input_var_with_some_provided(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": ["VAR1", "VAR2"], @@ -691,6 +737,7 @@ def test_dependency_on_nonexistent_subtask(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -715,6 +762,7 @@ def test_circular_dependency_detected(self): { "subtask": "Task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -723,6 +771,7 @@ def test_circular_dependency_detected(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -744,6 +793,7 @@ def test_empty_input_var_list_treated_as_none(self): { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": ["REQUIRED_VAR"], From 464005b9dfef2ba6b033798fc2ef9dedd0a8bfe5 Mon Sep 17 00:00:00 2001 From: csbobby Date: Thu, 26 Feb 2026 17:05:09 +0000 Subject: [PATCH 13/13] revert: ci workflow --- .github/workflows/cd.yml | 12 +++++++++++- .github/workflows/ci.yml | 1 + .github/workflows/quality.yml | 14 ++++++-------- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index a2d45ad1c..a4389b0d3 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -2,6 +2,12 @@ name: "Run CD" on: workflow_dispatch: + inputs: + force_release: + description: 'Force release even if previous checks fail.' + type: boolean + required: false + default: false env: UV_FROZEN: "1" @@ -36,7 +42,11 @@ jobs: run: uv run --no-sync semantic-release changelog --unreleased release: needs: [code-checks, pre-release-check] - if: needs.pre-release-check.outputs.TARGET_TAG_V != '' + # Run this job only if the `TARGET_TAG_V` is set AND (the previous jobs + # were successful OR we are forcing the release). + if: >- + ${{ needs.pre-release-check.outputs.TARGET_TAG_V != '' && + ( success() || inputs.force_release ) }} environment: auto-release runs-on: ubuntu-latest concurrency: release diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 825ebbd0f..4fb08f319 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -3,6 +3,7 @@ name: "Run CI" on: pull_request: types: [opened, reopened, synchronize] + merge_group: jobs: code-checks: diff --git a/.github/workflows/quality.yml b/.github/workflows/quality.yml index caffd2f4a..2e343d4f2 100644 --- a/.github/workflows/quality.yml +++ b/.github/workflows/quality.yml @@ -3,10 +3,9 @@ name: Verify Code Quality on: workflow_call: - concurrency: - group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.event.pull_request.number || github.ref_name }} - cancel-in-progress: true + group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.event.pull_request.number || github.ref_name }} + cancel-in-progress: true env: CICD: 1 @@ -15,10 +14,10 @@ env: jobs: quality: runs-on: ubuntu-latest - timeout-minutes: 90 # TODO: need to reduce this after we figure out our testing strategy. + timeout-minutes: 90 # TODO: need to reduce this after we figure out our testing strategy. strategy: matrix: - python-version: ['3.10', '3.11', '3.12'] # Need to add 3.13 once we resolve outlines issues. + python-version: ["3.10", "3.11", "3.12"] # Need to add 3.13 once we resolve outlines issues. steps: - uses: actions/checkout@v4 - name: Free disk space @@ -39,7 +38,7 @@ jobs: - name: Check style and run tests run: pre-commit run --all-files - name: Send failure message pre-commit - if: failure() # This step will only run if a previous step failed + if: failure() # This step will only run if a previous step failed run: echo "The quality verification failed. Please run precommit " - name: Install Ollama run: curl -fsSL https://ollama.com/install.sh | sh @@ -52,6 +51,5 @@ jobs: - name: Run Tests run: uv run -m pytest -v test - name: Send failure message tests - if: failure() # This step will only run if a previous step failed + if: failure() # This step will only run if a previous step failed run: echo "Tests failed. Please verify that tests are working locally." -