Skip to content

Commit f9fb1aa

Browse files
committed
feat: add ability to define additional analysis prompts
Adds the `analysisPrompts` field to the environment config that allows users to define their own prompts for analyzing the eval results. Example: ``` { // Usual config fields... analysisPrompts: [{name: 'Custom analysis', path: './custom-analysis.md'}] } ```
1 parent e63c69d commit f9fb1aa

File tree

6 files changed

+106
-5
lines changed

6 files changed

+106
-5
lines changed

report-app/src/app/pages/report-viewer/report-viewer.html

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,18 @@ <h4>Repair System Prompt</h4>
225225
</expansion-panel>
226226
}
227227

228+
@if (report.details.summary.additionalAiAnalysis !== undefined) {
229+
@for (item of report.details.summary.additionalAiAnalysis; track item) {
230+
<expansion-panel size="large" class="root-section">
231+
<expansion-panel-header>
232+
<img src="gemini.webp" alt="Gemini Logo" height="30" width="30" />
233+
{{item.name}}
234+
</expansion-panel-header>
235+
<div [innerHTML]="item.summary"></div>
236+
</expansion-panel>
237+
}
238+
}
239+
228240
@if (missingDeps().length > 0) {
229241
<expansion-panel size="large" class="root-section">
230242
<expansion-panel-header>

runner/configuration/environment-config.ts

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import {
88
LocalExecutorConfig,
99
localExecutorConfigSchema,
1010
} from '../orchestration/executors/local-executor-config.js';
11+
import {RatingContextFilter, ReportContextFilter} from '../shared-interfaces.js';
1112

1213
export const environmentConfigSchema = z.object({
1314
/** Display name for the environment. */
@@ -98,6 +99,24 @@ export const environmentConfigSchema = z.object({
9899
* It's useful to ensure that the set of ratings hasn't changed between two runs.
99100
*/
100101
expectedRatingHash: z.string().optional(),
102+
103+
/**
104+
* Prompts to use when for additional analysis of the eval results.
105+
*/
106+
analysisPrompts: z
107+
.array(
108+
z.object({
109+
name: z.string(),
110+
path: z.string(),
111+
reportsFilter: z
112+
.enum([ReportContextFilter.AllReports, ReportContextFilter.NonPerfectReports])
113+
.optional(),
114+
ratingsFilter: z
115+
.enum([RatingContextFilter.AllRatings, RatingContextFilter.NonPerfectRatings])
116+
.optional(),
117+
}),
118+
)
119+
.optional(),
101120
});
102121

103122
/**

runner/configuration/environment.ts

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ import {
77
FrameworkInfo,
88
MultiStepPromptDefinition,
99
PromptDefinition,
10+
RatingContextFilter,
11+
ReportContextFilter,
1012
RootPromptDefinition,
1113
} from '../shared-interfaces.js';
1214
import {UserFacingError} from '../utils/errors.js';
@@ -22,6 +24,13 @@ interface CategoryConfig {
2224
maxPoints: number;
2325
}
2426

27+
interface AnalysisPrompt {
28+
name: string;
29+
prompt: string;
30+
reportsFilter: ReportContextFilter;
31+
ratingsFilter: RatingContextFilter;
32+
}
33+
2534
/** Represents a single prompt evaluation environment. */
2635
export class Environment {
2736
/** Path at which the environment is defined. */
@@ -56,6 +65,9 @@ export class Environment {
5665
*/
5766
readonly ratingHash: string;
5867

68+
/** Additional analysis prompts defined by the user. */
69+
readonly analysisPrompts: AnalysisPrompt[];
70+
5971
/** Ratings configured at the environment level. */
6072
private readonly ratings: Rating[];
6173

@@ -88,6 +100,7 @@ export class Environment {
88100
this.ratingCategories = this.getRatingCategories(config);
89101
this.ratings = this.resolveRatings(config);
90102
this.ratingHash = this.getRatingHash(this.ratings, this.ratingCategories);
103+
this.analysisPrompts = this.resolveAnalysisPrompts(config);
91104
this.validateRatingHash(this.ratingHash, config);
92105
}
93106

@@ -262,7 +275,7 @@ export class Environment {
262275
isEditing: boolean,
263276
metadata: Metadata,
264277
): Promise<PromptDefinition<Metadata>> {
265-
const {result, contextFiles} = await this.renderEnvironmentPrompt(relativePath);
278+
const {result, contextFiles} = this.renderEnvironmentPrompt(relativePath);
266279

267280
return {
268281
name: name,
@@ -360,13 +373,13 @@ export class Environment {
360373
}
361374

362375
/** Renders a prompt from a path relative to the environment config. */
363-
private async renderEnvironmentPrompt(relativePath: string) {
376+
private renderEnvironmentPrompt(relativePath: string) {
364377
const path = resolve(this.rootPath, relativePath);
365378
return this.renderPrompt(readFileSync(path, 'utf8'), path);
366379
}
367380

368381
private async renderSystemPrompt(relativePath: string) {
369-
const result = await this.renderEnvironmentPrompt(relativePath);
382+
const result = this.renderEnvironmentPrompt(relativePath);
370383

371384
// Optional hooks for post processing environment system prompts. Useful for e.g.
372385
// supporting `@` references from Gemini CLI or inside g3.
@@ -446,4 +459,21 @@ export class Environment {
446459
);
447460
}
448461
}
462+
463+
private resolveAnalysisPrompts(config: EnvironmentConfig): AnalysisPrompt[] {
464+
const result: AnalysisPrompt[] = [];
465+
466+
config.analysisPrompts?.forEach(({name, path, reportsFilter, ratingsFilter}) => {
467+
const prompt = this.renderEnvironmentPrompt(path).result;
468+
469+
result.push({
470+
name,
471+
prompt,
472+
reportsFilter: reportsFilter ?? ReportContextFilter.NonPerfectReports,
473+
ratingsFilter: ratingsFilter ?? RatingContextFilter.NonPerfectRatings,
474+
});
475+
});
476+
477+
return result;
478+
}
449479
}

runner/orchestration/generate-summary.ts

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import {GenkitRunner} from '../codegen/genkit/genkit-runner.js';
22
import {Environment} from '../configuration/environment.js';
33
import {redX} from '../reporting/format.js';
4+
import {chatWithReportAI} from '../reporting/report-ai-chat.js';
45
import {summarizeReportWithAI} from '../reporting/report-ai-summary.js';
56
import {AssessmentResult, CompletionStats, RunSummary} from '../shared-interfaces.js';
67

@@ -43,7 +44,7 @@ export async function prepareSummary(
4344

4445
let aiSummary: string | undefined = undefined;
4546
if (generateAiSummaryLlm) {
46-
console.log(`✨ Generating AI summary for evaluation run..`);
47+
console.log(`✨ Generating AI summary for evaluation run...`);
4748
try {
4849
const result = await summarizeReportWithAI(generateAiSummaryLlm, abortSignal, assessments);
4950
inputTokens += result.usage.inputTokens;
@@ -61,6 +62,42 @@ export async function prepareSummary(
6162
}
6263
}
6364

65+
const additionalAiAnalysis: {name: string; summary: string}[] = [];
66+
if (generateAiSummaryLlm && env.analysisPrompts.length > 0) {
67+
console.log(`✨ Generating additional AI analysis...`);
68+
69+
await Promise.all(
70+
env.analysisPrompts.map(async config => {
71+
try {
72+
const result = await chatWithReportAI(
73+
generateAiSummaryLlm,
74+
config.prompt,
75+
abortSignal,
76+
assessments,
77+
[],
78+
model,
79+
{
80+
reportContextFilter: config.reportsFilter,
81+
ratingContextFilter: config.ratingsFilter,
82+
},
83+
undefined,
84+
);
85+
inputTokens += result.usage.inputTokens;
86+
outputTokens += result.usage.outputTokens;
87+
thinkingTokens += result.usage.thinkingTokens;
88+
totalTokens += result.usage.totalTokens;
89+
additionalAiAnalysis.push({name: config.name, summary: result.responseHtml});
90+
} catch (e) {
91+
console.log(`${redX()} Failed custom analysis called "${config.name}".`);
92+
93+
if (process.env.DEBUG === '1' && (e as Partial<Error>).stack) {
94+
console.error((e as Error).stack);
95+
}
96+
}
97+
}),
98+
);
99+
}
100+
64101
const executorInfo = await env.executor.getExecutorInfo?.();
65102

66103
return {
@@ -78,6 +115,7 @@ export async function prepareSummary(
78115
},
79116
},
80117
aiSummary,
118+
additionalAiAnalysis,
81119
completionStats: completionStats,
82120
usage: {
83121
inputTokens,

runner/reporting/report-ai-chat.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ ${serializeReportForPrompt(assessmentsToProcess, contextFilters)}
8787
includeThoughts: false,
8888
},
8989
timeout: {
90-
description: `Generating summary for report`,
90+
description: `Chatting with AI`,
9191
durationInMins: 3,
9292
},
9393
abortSignal,

runner/shared-interfaces.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -437,6 +437,8 @@ export interface RunSummary {
437437
completionStats?: CompletionStats;
438438
/** AI summary (as HTML code) of all assessments in this run/report. */
439439
aiSummary?: string;
440+
/** Additional user-defined AI analysis. */
441+
additionalAiAnalysis?: {name: string; summary: string}[];
440442
/**
441443
* Information about the runner that was used for the eval.
442444
* Optional since some older reports might not have it.

0 commit comments

Comments
 (0)