apify
diff --git a/‎eslint.config.mjs‎
Lines changed: 3 additions & 1 deletion b/‎eslint.config.mjs‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎evals/config.ts‎
Lines changed: 14 additions & 11 deletions b/‎evals/config.ts‎
Lines changed: 14 additions & 11 deletions
diff --git a/‎evals/create-dataset.ts‎
Lines changed: 2 additions & 2 deletions b/‎evals/create-dataset.ts‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎evals/evaluation-utils.ts‎
Lines changed: 15 additions & 56 deletions b/‎evals/evaluation-utils.ts‎
Lines changed: 15 additions & 56 deletions
diff --git a/‎evals/run-evaluation.ts‎
Lines changed: 2 additions & 2 deletions b/‎evals/run-evaluation.ts‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎evals/shared/config.ts‎
Lines changed: 50 additions & 0 deletions b/‎evals/shared/config.ts‎
Lines changed: 50 additions & 0 deletions
diff --git a/‎evals/shared/openai-tools.ts‎
Lines changed: 39 additions & 0 deletions b/‎evals/shared/openai-tools.ts‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎evals/shared/test-case-loader.ts‎
Lines changed: 86 additions & 0 deletions b/‎evals/shared/test-case-loader.ts‎
Lines changed: 86 additions & 0 deletions
@@ -24,7 +24,9 @@ export default [
         ignores: [
             '**/dist', // Build output directory
             '**/.venv', // Python virtual environment (if present)
-            'evals/**', // Evaluation scripts directory
+            'evals/*.ts', // Top-level evaluation scripts
+            'evals/*.md', // Documentation files
+            'evals/*.json', // Test case data files
         ],
     },
     // Apply the shared Apify TypeScript ESLint configuration
 
@@ -6,6 +6,9 @@ import { readFileSync } from 'node:fs';
 import { dirname, join } from 'node:path';
 import { fileURLToPath } from 'node:url';
 
+// Re-export shared config
+export { OPENROUTER_CONFIG, sanitizeHeaderValue, validateEnvVars, getRequiredEnvVars } from './shared/config.js';
+
 // Read version from test-cases.json
 function getTestCasesVersion(): string {
     const currentFilename = fileURLToPath(import.meta.url);
@@ -156,24 +159,24 @@ The response must be exactly:
 Decision: either "correct" or "incorrect".
 Explanation: brief explanation of the decision.
 `
-export function getRequiredEnvVars(): Record<string, string | undefined> {
+/**
+ * Get required environment variables for Phoenix-based evaluations
+ * Extends shared config with Phoenix-specific variables
+ * Note: OPENROUTER_BASE_URL is optional (defaults to https://openrouter.ai/api/v1)
+ */
+export function getPhoenixEnvVars(): Record<string, string | undefined> {
     return {
         PHOENIX_BASE_URL: process.env.PHOENIX_BASE_URL,
         PHOENIX_API_KEY: process.env.PHOENIX_API_KEY,
         OPENROUTER_API_KEY: process.env.OPENROUTER_API_KEY,
-        OPENROUTER_BASE_URL: process.env.OPENROUTER_BASE_URL,
     };
 }
 
-// Removes newlines and trims whitespace. Useful for Authorization header values
-// because CI secrets sometimes include trailing newlines or quotes.
-export function sanitizeHeaderValue(value?: string): string | undefined {
-    if (value == null) return value;
-    return value.replace(/[\r\n]/g, '').trim().replace(/^"|"$/g, '');
-}
-
-export function validateEnvVars(): boolean {
-    const envVars = getRequiredEnvVars();
+/**
+ * Validate Phoenix-specific environment variables
+ */
+export function validatePhoenixEnvVars(): boolean {
+    const envVars = getPhoenixEnvVars();
     const missing = Object.entries(envVars)
         .filter(([, value]) => !value)
         .map(([key]) => key);
 
@@ -14,7 +14,7 @@ import { hideBin } from 'yargs/helpers';
 
 import log from '@apify/log';
 
-import { sanitizeHeaderValue, validateEnvVars } from './config.js';
+import { sanitizeHeaderValue, validatePhoenixEnvVars } from './config.js';
 import { loadTestCases, filterByCategory, filterById, type TestCase } from './evaluation-utils.js';
 
 // Set log level to debug
@@ -81,7 +81,7 @@ async function createDatasetFromTestCases(
     log.info('Creating Phoenix dataset from test cases...');
 
     // Validate environment variables
-    if (!validateEnvVars()) {
+    if (!validatePhoenixEnvVars()) {
         process.exit(1);
     }
 
 
@@ -2,10 +2,6 @@
  * Shared evaluation utilities extracted from run-evaluation.ts
  */
 
-import { readFileSync } from 'node:fs';
-import { dirname as pathDirname, join } from 'node:path';
-import { fileURLToPath } from 'node:url';
-
 import OpenAI from 'openai';
 import { createOpenAI } from '@ai-sdk/openai';
 import { asEvaluator } from '@arizeai/phoenix-client/experiments';
@@ -24,50 +20,24 @@ import {
     TEMPERATURE,
     sanitizeHeaderValue
 } from './config.js';
+import { loadTestCases as loadTestCasesShared, filterByCategory, filterById } from './shared/test-case-loader.js';
+import { transformToolsToOpenAIFormat } from './shared/openai-tools.js';
+import type { ToolSelectionTestCase, TestData } from './shared/types.js';
 
-type ExampleInputOnly = { input: Record<string, unknown>, metadata?: Record<string, unknown>, output?: never };
+// Re-export types for backwards compatibility
+export type TestCase = ToolSelectionTestCase;
+export type { TestData } from './shared/types.js';
 
-export type TestCase = {
-    id: string;
-    category: string;
-    query: string;
-    context?: string | string[];
-    expectedTools?: string[];
-    reference?: string;
-};
-
-export type TestData = {
-    version: string;
-    testCases: TestCase[];
-};
-
-// eslint-disable-next-line consistent-return
-export function loadTestCases(filePath: string): TestData {
-    const filename = fileURLToPath(import.meta.url);
-    const dirname = pathDirname(filename);
-    const testCasesPath = join(dirname, filePath);
-
-    try {
-        const fileContent = readFileSync(testCasesPath, 'utf-8');
-        return JSON.parse(fileContent) as TestData;
-    } catch {
-        log.error(`Error: Test cases file not found at ${testCasesPath}`);
-        process.exit(1);
-    }
-}
-
-export function filterByCategory(testCases: TestCase[], category: string): TestCase[] {
-    // Convert wildcard pattern to regex
-    const pattern = category.replace(/\*/g, '.*');
-    const regex = new RegExp(`^${pattern}$`);
-
-    return testCases.filter((testCase) => regex.test(testCase.category));
-}
+// Re-export shared functions for backwards compatibility
+export { filterByCategory, filterById } from './shared/test-case-loader.js';
 
-export function filterById(testCases: TestCase[], idPattern: string): TestCase[] {
-    const regex = new RegExp(idPattern);
+type ExampleInputOnly = { input: Record<string, unknown>, metadata?: Record<string, unknown>, output?: never };
 
-    return testCases.filter((testCase) => regex.test(testCase.id));
+/**
+ * Load test cases from a JSON file (wrapper around shared function)
+ */
+export function loadTestCases(filePath: string): TestData {
+    return loadTestCasesShared(filePath);
 }
 
 export async function loadTools(): Promise<ToolBase[]> {
@@ -76,22 +46,11 @@ export async function loadTools(): Promise<ToolBase[]> {
     return urlTools.map((t: ToolEntry) => getToolPublicFieldOnly(t)) as ToolBase[];
 }
 
-export function transformToolsToOpenAIFormat(tools: ToolBase[]): OpenAI.Chat.Completions.ChatCompletionTool[] {
-    return tools.map((tool) => ({
-        type: 'function',
-        function: {
-            name: tool.name,
-            description: tool.description,
-            parameters: tool.inputSchema as OpenAI.Chat.ChatCompletionTool['function']['parameters'],
-        },
-    }));
-}
-
 export function createOpenRouterTask(modelName: string, tools: ToolBase[]) {
     const toolsOpenAI = transformToolsToOpenAIFormat(tools);
 
     return async (example: ExampleInputOnly): Promise<{
-        tool_calls: Array<{ function?: { name?: string } }>;
+        tool_calls: OpenAI.Chat.Completions.ChatCompletionMessageToolCall[];
         llm_response: string;
         query: string;
         context: string;
 
@@ -28,7 +28,7 @@ import {
     EVALUATOR_NAMES,
     type EvaluatorName,
     sanitizeHeaderValue,
-    validateEnvVars
+    validatePhoenixEnvVars
 } from './config.js';
 
 type EvaluatorResult = {
@@ -202,7 +202,7 @@ function printResults(results: EvaluatorResult[]): void {
 async function main(datasetName: string): Promise<number> {
     log.info('Starting MCP tool calling evaluation');
 
-    if (!validateEnvVars()) {
+    if (!validatePhoenixEnvVars()) {
         return 1;
     }
 
 
@@ -0,0 +1,50 @@
+/**
+ * Shared configuration for evaluation systems
+ * Contains OpenRouter config, environment validation, and common utilities
+ */
+
+/**
+ * OpenRouter API configuration
+ * OPENROUTER_BASE_URL is optional and defaults to the standard OpenRouter API URL
+ */
+export const OPENROUTER_CONFIG = {
+    baseURL: process.env.OPENROUTER_BASE_URL || 'https://openrouter.ai/api/v1',
+    apiKey: process.env.OPENROUTER_API_KEY || '',
+};
+
+/**
+ * Get required environment variables
+ * Note: OPENROUTER_BASE_URL is optional (defaults to https://openrouter.ai/api/v1)
+ */
+export function getRequiredEnvVars(): Record<string, string | undefined> {
+    return {
+        OPENROUTER_API_KEY: process.env.OPENROUTER_API_KEY,
+    };
+}
+
+/**
+ * Removes newlines and trims whitespace. Useful for Authorization header values
+ * because CI secrets sometimes include trailing newlines or quotes.
+ */
+export function sanitizeHeaderValue(value?: string): string | undefined {
+    if (value == null) return value;
+    return value.replace(/[\r\n]/g, '').trim().replace(/^"|"$/g, '');
+}
+
+/**
+ * Validate that all required environment variables are present
+ */
+export function validateEnvVars(): boolean {
+    const envVars = getRequiredEnvVars();
+    const missing = Object.entries(envVars)
+        .filter(([, value]) => !value)
+        .map(([key]) => key);
+
+    if (missing.length > 0) {
+        // eslint-disable-next-line no-console
+        console.error(`Missing required environment variables: ${missing.join(', ')}`);
+        return false;
+    }
+
+    return true;
+}
@@ -0,0 +1,39 @@
+/**
+ * Convert tool definitions to OpenAI format
+ * Unified function for both MCP tools and internal ToolBase types
+ */
+
+import type OpenAI from 'openai';
+
+import type { McpTool } from './types.js';
+
+/**
+ * Generic tool interface that matches both ToolBase and McpTool
+ */
+type GenericTool = {
+    name: string;
+    description?: string;
+    inputSchema: Record<string, unknown>;
+}
+
+/**
+ * Convert tools to OpenAI Chat Completion format
+ * Works with both MCP tools and ToolBase from the server
+ */
+export function transformToolsToOpenAIFormat(tools: GenericTool[]): OpenAI.Chat.Completions.ChatCompletionTool[] {
+    return tools.map((tool) => ({
+        type: 'function' as const,
+        function: {
+            name: tool.name,
+            description: tool.description || '',
+            parameters: tool.inputSchema,
+        },
+    }));
+}
+
+/**
+ * Alias for MCP-specific usage (keeps backwards compatibility)
+ */
+export function mcpToolsToOpenAiTools(mcpTools: McpTool[]): OpenAI.Chat.Completions.ChatCompletionTool[] {
+    return transformToolsToOpenAIFormat(mcpTools);
+}
@@ -0,0 +1,86 @@
+/**
+ * Shared test case loading and filtering utilities
+ */
+
+import { readFileSync } from 'node:fs';
+import { dirname as pathDirname, join } from 'node:path';
+import { fileURLToPath } from 'node:url';
+
+import type { BaseTestCase, TestData } from './types.js';
+
+/**
+ * Load test cases from a JSON file
+ * Supports both relative and absolute paths
+ *
+ * @param filePath - Path to test cases JSON file (relative to caller or absolute)
+ * @returns Test data with version and test cases
+ */
+export function loadTestCases(filePath: string): TestData {
+    const filename = fileURLToPath(import.meta.url);
+    const dirname = pathDirname(filename);
+
+    // Support both relative (from evals/) and absolute paths
+    let testCasesPath: string;
+    if (filePath.startsWith('/')) {
+        testCasesPath = filePath;
+    } else {
+        // Relative to evals/ directory (two levels up from shared/)
+        testCasesPath = join(dirname, '..', filePath);
+    }
+
+    const fileContent = readFileSync(testCasesPath, 'utf-8');
+    return JSON.parse(fileContent) as TestData;
+}
+
+/**
+ * Filter test cases by category
+ * Supports wildcard patterns (e.g., "search-actors*" matches "search-actors-1", "search-actors-2", etc.)
+ *
+ * @param testCases - Array of test cases to filter
+ * @param category - Category pattern (supports * wildcard)
+ * @returns Filtered test cases
+ */
+export function filterByCategory<T extends BaseTestCase>(testCases: T[], category: string): T[] {
+    // Convert wildcard pattern to regex
+    const pattern = category.replace(/\*/g, '.*');
+    const regex = new RegExp(`^${pattern}$`);
+
+    return testCases.filter((testCase) => regex.test(testCase.category));
+}
+
+/**
+ * Filter test cases by ID using regex pattern
+ *
+ * @param testCases - Array of test cases to filter
+ * @param idPattern - Regex pattern to match against test case IDs
+ * @returns Filtered test cases
+ */
+export function filterById<T extends BaseTestCase>(testCases: T[], idPattern: string): T[] {
+    const regex = new RegExp(idPattern);
+    return testCases.filter((testCase) => regex.test(testCase.id));
+}
+
+/**
+ * Filter test cases by ID or category
+ * Generic filter function for workflow evaluations
+ *
+ * @param testCases - Array of test cases to filter
+ * @param options - Filter options (id and/or category)
+ * @returns Filtered test cases
+ */
+export function filterTestCases<T extends BaseTestCase>(
+    testCases: T[],
+    options: { id?: string; category?: string },
+): T[] {
+    let filtered = testCases;
+
+    if (options.id) {
+        filtered = filterById(filtered, options.id);
+    }
+
+    if (options.category) {
+        filtered = filterByCategory(filtered, options.category);
+    }
+
+    return filtered;
+}