kernelbot/src/libkernelbot/audit.py at 2367e191e60a6f113aa936cd421ecde33836423f · gpu-mode/kernelbot · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import asyncio
import json
import os

from libkernelbot.leaderboard_db import LeaderboardDB
from libkernelbot.utils import setup_logging

logger = setup_logging(__name__)

OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
AUDIT_MODEL = "openai/gpt-4o-mini"
AUDIT_TIMEOUT_SECONDS = int(os.getenv("AUDIT_TIMEOUT_SECONDS", "20"))
AUDIT_MAX_REFERENCE_CHARS = int(os.getenv("AUDIT_MAX_REFERENCE_CHARS", "120000"))
AUDIT_MAX_SUBMISSION_CHARS = int(os.getenv("AUDIT_MAX_SUBMISSION_CHARS", "60000"))

AUDIT_SYSTEM = """You are a code auditor for a GPU kernel programming competition.

You will be given reference/evaluation code and a user's submitted solution.

Your job is to determine if the submission is cheating. Common cheating patterns:
- Hardcoding expected outputs instead of computing them
- Bypassing the evaluation/correctness checks
- Exploiting the test harness rather than solving the problem
- Returning precomputed values without doing real computation
- Patching or monkey-patching evaluation functions

Respond with ONLY a JSON object (no markdown, no extra text):
{"is_cheating": true/false, "explanation": "brief explanation"}"""


def _truncate(content: str, max_chars: int) -> str:
    if len(content) <= max_chars:
        return content
    omitted = len(content) - max_chars
    return f"{content[:max_chars]}\n\n... [TRUNCATED {omitted} CHARS] ..."


def _parse_audit_result(raw_content: str) -> dict:
    content = raw_content.strip()
    if content.startswith("```"):
        lines = content.splitlines()
        if lines and lines[0].startswith("```"):
            lines = lines[1:]
        if lines and lines[-1].startswith("```"):
            lines = lines[:-1]
        content = "\n".join(lines).strip()
        if content.startswith("json"):
            content = content[4:].strip()
    parsed = json.loads(content)
    if not isinstance(parsed, dict):
        raise ValueError("Audit response is not a JSON object")
    return parsed


async def audit_submission(submission_id: int, db: LeaderboardDB) -> dict | None:
    api_key = os.environ.get("OPENROUTER_API_KEY")
    if not api_key:
        logger.debug("OPENROUTER_API_KEY not set, skipping audit for submission %s", submission_id)
        return None

    try:
        import openai

        with db:
            submission = db.get_submission_by_id(submission_id)
            if submission is None:
                logger.warning("Submission %s not found for audit", submission_id)
                return None

            task_json = db.get_leaderboard_task_by_id(submission["leaderboard_id"])

        if not task_json:
            logger.warning("No task found for leaderboard %s", submission["leaderboard_id"])
            return None

        # Extract reference code from the task files
        reference_code = ""
        if isinstance(task_json, dict) and "files" in task_json:
            for filename, content in task_json["files"].items():
                reference_code += f"--- {filename} ---\n{content}\n\n"

        if not reference_code:
            reference_code = json.dumps(task_json, indent=2)

        submission_code = submission["code"]
        reference_code = _truncate(reference_code, AUDIT_MAX_REFERENCE_CHARS)
        submission_code = _truncate(submission_code, AUDIT_MAX_SUBMISSION_CHARS)
        user_msg = (
            "Reference/evaluation code:\n```\n"
            + reference_code
            + "\n```\n\nSubmitted code:\n```\n"
            + submission_code
            + "\n```"
        )

        client = openai.AsyncOpenAI(api_key=api_key, base_url=OPENROUTER_BASE_URL)
        async with asyncio.timeout(AUDIT_TIMEOUT_SECONDS):
            response = await client.chat.completions.create(
                model=AUDIT_MODEL,
                messages=[
                    {"role": "system", "content": AUDIT_SYSTEM},
                    {"role": "user", "content": user_msg},
                ],
                temperature=0,
                max_tokens=512,
            )

        result_text = response.choices[0].message.content
        if not result_text:
            logger.warning("Empty audit response for submission %s", submission_id)
            return None

        result = _parse_audit_result(result_text)

        is_cheating = bool(result.get("is_cheating", False))
        explanation = str(result.get("explanation", ""))

        with db:
            db.create_submission_audit(submission_id, is_cheating, explanation, AUDIT_MODEL)

        logger.info("Audit for submission %s: is_cheating=%s", submission_id, is_cheating)
        return {"is_cheating": is_cheating, "explanation": explanation, "model": AUDIT_MODEL}

    except Exception:
        logger.exception("Failed to audit submission %s", submission_id)
        return None