From b842bc9b7e0ca9b4f7393864dfdcdbad3f1c8b87 Mon Sep 17 00:00:00 2001 From: Kylejeong2 Date: Wed, 9 Jul 2025 16:32:55 -1000 Subject: [PATCH 1/2] v0 evals --- evals/README.md | 348 ++++++++++++++++++++++++++++ evals/env.example | 27 +++ evals/mcp-eval-advanced.config.json | 212 +++++++++++++++++ evals/mcp-eval-minimal.config.json | 77 ++++++ evals/mcp-eval.config.json | 183 +++++++++++++++ evals/run-evals.ts | 314 +++++++++++++++++++++++++ package.json | 13 +- pnpm-lock.yaml | 9 + pnpm-workspace.yaml | 5 +- src/tools/multiSession.ts | 2 +- src/tools/session.ts | 2 +- tests/.gitkeep | 0 12 files changed, 1188 insertions(+), 4 deletions(-) create mode 100644 evals/README.md create mode 100644 evals/env.example create mode 100644 evals/mcp-eval-advanced.config.json create mode 100644 evals/mcp-eval-minimal.config.json create mode 100644 evals/mcp-eval.config.json create mode 100644 evals/run-evals.ts delete mode 100644 tests/.gitkeep diff --git a/evals/README.md b/evals/README.md new file mode 100644 index 0000000..45cbfe1 --- /dev/null +++ b/evals/README.md @@ -0,0 +1,348 @@ +# Browserbase MCP Server Evaluation Tests + +This directory contains comprehensive evaluation tests for the Browserbase MCP Server using [MCPVals](https://github.com/modelcontextprotocol/mcpvals), a testing framework that uses Claude to autonomously execute test workflows based on natural language descriptions. + +## Quick Start + +1. **Install dependencies:** + + ```bash + npm run test:install + ``` + +2. **Set up environment variables:** + + ```bash + export BROWSERBASE_API_KEY="your_api_key_here" + export BROWSERBASE_PROJECT_ID="your_project_id_here" + export ANTHROPIC_API_KEY="your_anthropic_key_here" + ``` + +3. **Run basic tests:** + ```bash + npm test + ``` + +## Test Configurations + +We provide three levels of testing: + +### 1. Minimal Tests (`mcp-eval-minimal.config.json`) + +- **Purpose**: Quick smoke tests to verify basic functionality +- **Duration**: ~2-3 minutes +- **Tests**: 3 workflows covering navigation, extraction, and multi-session basics +- **Usage**: `npm run test:minimal` + +### 2. Standard Tests (`mcp-eval.config.json`) + +- **Purpose**: Comprehensive functionality testing +- **Duration**: ~5-10 minutes +- **Tests**: 8 workflows covering all major features +- **Usage**: `npm test` + +### 3. Advanced Tests (`mcp-eval-advanced.config.json`) + +- **Purpose**: Complex scenarios with LLM judge evaluation +- **Duration**: ~10-15 minutes +- **Tests**: 6 workflows with subjective quality assessment +- **Usage**: `npm run test:advanced` (requires `OPENAI_API_KEY`) + +## Available Scripts + +| Script | Description | +| ----------------------- | --------------------------------- | +| `npm test` | Run standard evaluation tests | +| `npm run test:minimal` | Run minimal smoke tests | +| `npm run test:advanced` | Run advanced tests with LLM judge | +| `npm run test:all` | Run all test suites | +| `npm run test:debug` | Run tests with debug output | +| `npm run test:json` | Run tests with JSON output | +| `npm run test:install` | Install test dependencies | +| `npm run test:runner` | Direct access to test runner | + +## Understanding the Test Framework + +### How MCPVals Works + +MCPVals uses Claude to autonomously execute test workflows: + +1. **Natural Language Instructions**: Tests are written as natural language prompts +2. **Autonomous Execution**: Claude examines available MCP tools and plans execution +3. **Tool Invocation**: Claude calls the appropriate MCP tools to accomplish tasks +4. **Deterministic Evaluation**: Results are evaluated against expected outcomes + +### Test Structure + +Each test workflow contains: + +```json +{ + "name": "test-name", + "description": "What this test validates", + "steps": [ + { + "user": "Natural language instruction", + "expectedState": "Expected substring in output" + } + ], + "expectTools": ["list", "of", "expected", "tools"] +} +``` + +### Evaluation Metrics + +Each test is evaluated on three metrics: + +1. **End-to-End Success** (0-100%): Did the workflow achieve the expected final state? +2. **Tool Invocation Order** (0-100%): Were the expected tools called in the correct sequence? +3. **Tool Call Health** (0-100%): Did all tool calls complete successfully without errors? + +**Overall Score** = Average of all three metrics + +## Test Workflows + +### Minimal Test Suite + +| Workflow | Description | Expected Tools | +| -------------------------- | ------------------------ | ------------------------------------------------- | +| `smoke-test-navigation` | Basic browser navigation | session_create, navigate, session_close | +| `smoke-test-extraction` | Basic content extraction | session_create, navigate, extract, session_close | +| `smoke-test-multi-session` | Multi-session management | multi_session_create, session_list, session_close | + +### Standard Test Suite + +| Workflow | Description | Key Features | +| --------------------------- | -------------------------- | ---------------------------- | +| `basic-navigation-test` | Navigation to Google | Basic browser control | +| `search-and-extract-test` | Search and extract results | Form interaction, extraction | +| `observe-and-interact-test` | Element observation | DOM inspection | +| `screenshot-test` | Screenshot capture | Visual documentation | +| `multi-session-test` | Parallel browser sessions | Multi-session management | +| `form-interaction-test` | Form filling | Input handling | +| `error-handling-test` | Error scenarios | Error recovery | + +### Advanced Test Suite + +| Workflow | Description | LLM Judge | +| ---------------------------- | --------------------------- | --------- | +| `e-commerce-workflow` | Realistic browsing patterns | ✓ | +| `form-interaction-workflow` | Complex form handling | ✓ | +| `dynamic-content-handling` | JavaScript content | ✓ | +| `multi-session-workflow` | Advanced multi-session | ✓ | +| `error-recovery-workflow` | Error handling & recovery | ✓ | +| `comprehensive-feature-test` | All features combined | ✓ | + +## Environment Setup + +### Required Environment Variables + +Copy the example environment file and update with your credentials: + +```bash +cp evals/env.example .env +# Edit .env with your actual API keys +``` + +Or set the environment variables directly: + +```bash +# Browserbase credentials (required) +export BROWSERBASE_API_KEY="bb_api_key_..." +export BROWSERBASE_PROJECT_ID="bb_project_id_..." + +# Anthropic API key (required for Claude execution) +export ANTHROPIC_API_KEY="sk-ant-..." + +# OpenAI API key (required for LLM judge) +export OPENAI_API_KEY="sk-..." +``` + +### Optional Configuration + +You can override placeholder values in the config files by setting environment variables: + +```bash +# Override config placeholders +export BROWSERBASE_API_KEY="your_real_key" +export BROWSERBASE_PROJECT_ID="your_real_project" +``` + +## Running Tests + +### Command Line Interface + +```bash +# Basic usage +npm test + +# With options +npm run test:debug # Enable debug output +npm run test:json # Output results as JSON +npm run test:minimal # Run minimal tests +npm run test:advanced # Run advanced tests with LLM judge +npm run test:all # Run all test suites +``` + +### TypeScript Runner + +```bash +# Using the TypeScript runner directly +npm run test:runner run --config evals/mcp-eval.config.json --debug +``` + +## Interpreting Results + +### Console Output + +``` +✓ basic-navigation-test PASSED (100%) + ✓ End-to-End Success: 100% + ✓ Tool Invocation Order: 100% + ✓ Tool Call Health: 100% + +✗ search-and-extract-test FAILED (67%) + ✓ End-to-End Success: 100% + ✗ Tool Invocation Order: 67% (2/3 tools in correct order) + ✓ Tool Call Health: 100% +``` + +### Understanding Failures + +Common failure patterns: + +1. **Tool Order Issues**: Expected tools not called in sequence +2. **Missing Tools**: Expected tools not invoked +3. **Tool Errors**: Tools returning errors or timeouts +4. **Wrong Output**: Expected state not found in results + +## Troubleshooting + +### Common Issues + +1. **Missing Dependencies** + + ```bash + npm run test:install + ``` + +2. **Environment Variables Not Set** + + ```bash + # Check if variables are set + echo $BROWSERBASE_API_KEY + echo $BROWSERBASE_PROJECT_ID + echo $ANTHROPIC_API_KEY + ``` + +3. **Timeout Issues** + - Increase timeout in config files + - Check network connectivity + - Verify Browserbase service status + +4. **Tool Not Found Errors** + - Verify MCP server is running correctly + - Check tool names in config match server exports + - Run with `npm run test:debug` for detailed output + +### Debug Mode + +Run tests with debug output to see detailed execution: + +```bash +npm run test:debug +``` + +This shows: + +- Raw tool calls and responses +- Claude's reasoning process +- Network requests and responses +- Detailed error messages + +## Extending Tests + +### Adding New Workflows + +1. **Choose appropriate config file** based on complexity +2. **Write natural language steps** that describe user intent +3. **Specify expected tools** that should be called +4. **Set expected states** for validation +5. **Test your workflow** with debug mode + +### Example New Workflow + +```json +{ + "name": "custom-workflow", + "description": "Test custom functionality", + "steps": [ + { + "user": "Navigate to example.com and find all links", + "expectedState": "found links" + }, + { + "user": "Click on the first link", + "expectedState": "clicked" + } + ], + "expectTools": [ + "browserbase_session_create", + "browserbase_stagehand_navigate", + "browserbase_stagehand_observe", + "browserbase_stagehand_act" + ] +} +``` + +## CI/CD Integration + +### GitHub Actions + +```yaml +name: MCP Server Tests +on: [push, pull_request] + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: "18" + - run: npm ci + - run: npm run test:install + - run: npm run test:minimal + env: + BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }} + BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID }} + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} +``` + +### Exit Codes + +- `0`: All tests passed +- `1`: One or more tests failed +- `2`: Configuration or setup error + +## Best Practices + +1. **Start with minimal tests** for quick feedback +2. **Use realistic scenarios** that match actual usage +3. **Include error cases** to test robustness +4. **Keep expected states simple** but unique +5. **Run tests regularly** to catch regressions +6. **Use debug mode** when developing new tests + +## Support + +For issues with: + +- **MCPVals framework**: Check the [MCPVals documentation](https://github.com/modelcontextprotocol/mcpvals) +- **Browserbase integration**: Visit [Browserbase docs](https://docs.browserbase.com) +- **MCP Server**: Open an issue in this repository + +## License + +These tests are part of the Browserbase MCP Server project and are licensed under the Apache License 2.0. diff --git a/evals/env.example b/evals/env.example new file mode 100644 index 0000000..7cf0585 --- /dev/null +++ b/evals/env.example @@ -0,0 +1,27 @@ +# Browserbase MCP Server Evaluation Environment Variables + +# Required: Browserbase API credentials +# Get these from https://www.browserbase.com/dashboard +BROWSERBASE_API_KEY=bb_api_key_your_key_here +BROWSERBASE_PROJECT_ID=bb_project_id_your_project_id_here + +# Required: Anthropic API key for Claude (workflow execution) +# Get this from https://console.anthropic.com/ +ANTHROPIC_API_KEY=sk-ant-your_anthropic_key_here + +# Optional: OpenAI API key for LLM judge (advanced tests only) +# Get this from https://platform.openai.com/api-keys +OPENAI_API_KEY=sk-your_openai_key_here + +# Optional: Override default timeouts (in milliseconds) +# EVAL_TIMEOUT=60000 + +# Optional: Override default models +# EVAL_JUDGE_MODEL=gpt-4o +# EVAL_PASS_THRESHOLD=0.8 + +# Usage Instructions: +# 1. Copy this file to .env: cp evals/env.example .env +# 2. Replace the placeholder values with your actual API keys +# 3. Run: source .env (or use direnv/dotenv) +# 4. Run tests: npm test \ No newline at end of file diff --git a/evals/mcp-eval-advanced.config.json b/evals/mcp-eval-advanced.config.json new file mode 100644 index 0000000..9ba0593 --- /dev/null +++ b/evals/mcp-eval-advanced.config.json @@ -0,0 +1,212 @@ +{ + "server": { + "transport": "stdio", + "command": "node", + "args": ["./cli.js"], + "env": { + "BROWSERBASE_API_KEY": "${BROWSERBASE_API_KEY}", + "BROWSERBASE_PROJECT_ID": "${BROWSERBASE_PROJECT_ID}" + } + }, + "timeout": 90000, + "llmJudge": true, + "openaiKey": "${OPENAI_API_KEY}", + "judgeModel": "gpt-4o", + "passThreshold": 0.8, + "workflows": [ + { + "name": "e-commerce-workflow", + "description": "Test a realistic e-commerce browsing workflow", + "steps": [ + { + "user": "Go to example.com and take a screenshot", + "expectedState": "Example Domain" + }, + { + "user": "Navigate to a different page and observe the page elements", + "expectedState": "observed" + }, + { + "user": "Take another screenshot to compare", + "expectedState": "Screenshot taken" + }, + { + "user": "Close the browser", + "expectedState": "session closed" + } + ], + "expectTools": [ + "browserbase_session_create", + "browserbase_stagehand_navigate", + "browserbase_screenshot", + "browserbase_stagehand_observe", + "browserbase_session_close" + ] + }, + { + "name": "form-interaction-workflow", + "description": "Test form interaction and validation", + "steps": [ + { + "user": "Navigate to httpbin.org/forms/post", + "expectedState": "httpbin" + }, + { + "user": "Observe the form fields available", + "expectedState": "form" + }, + { + "user": "Fill in the customer name field with 'Test User'", + "expectedState": "Test User" + }, + { + "user": "Extract the form data to verify it was filled correctly", + "expectedState": "Test User" + }, + { + "user": "Close the session", + "expectedState": "session closed" + } + ], + "expectTools": [ + "browserbase_session_create", + "browserbase_stagehand_navigate", + "browserbase_stagehand_observe", + "browserbase_stagehand_act", + "browserbase_stagehand_extract", + "browserbase_session_close" + ] + }, + { + "name": "dynamic-content-handling", + "description": "Test handling of dynamic content", + "steps": [ + { + "user": "Navigate to example.com", + "expectedState": "Example Domain" + }, + { + "user": "Extract the page title and content", + "expectedState": "Example Domain" + }, + { + "user": "Take a screenshot of the page", + "expectedState": "Screenshot taken" + }, + { + "user": "Close the browser", + "expectedState": "session closed" + } + ], + "expectTools": [ + "browserbase_session_create", + "browserbase_stagehand_navigate", + "browserbase_stagehand_extract", + "browserbase_screenshot", + "browserbase_session_close" + ] + }, + { + "name": "multi-session-workflow", + "description": "Test handling multiple sessions", + "steps": [ + { + "user": "Create two separate browser sessions named 'session-a' and 'session-b'", + "expectedState": "Created session" + }, + { + "user": "List all active sessions to verify they were created", + "expectedState": "session-a" + }, + { + "user": "In session-a, navigate to example.com", + "expectedState": "example.com" + }, + { + "user": "In session-b, navigate to httpbin.org", + "expectedState": "httpbin" + }, + { + "user": "Extract content from both sessions", + "expectedState": "extracted" + }, + { + "user": "Close both sessions", + "expectedState": "closed session" + } + ], + "expectTools": [ + "multi_browserbase_stagehand_session_create", + "multi_browserbase_stagehand_session_list", + "multi_browserbase_stagehand_navigate_session", + "multi_browserbase_stagehand_extract_session", + "multi_browserbase_stagehand_session_close" + ] + }, + { + "name": "error-recovery-workflow", + "description": "Test graceful error handling and recovery", + "steps": [ + { + "user": "Create a browser session", + "expectedState": "session created" + }, + { + "user": "Navigate to a valid page first", + "expectedState": "example.com" + }, + { + "user": "Try to interact with a non-existent element", + "expectedState": "error" + }, + { + "user": "Verify the session is still working by extracting the page title", + "expectedState": "Example Domain" + }, + { + "user": "Close the session", + "expectedState": "session closed" + } + ], + "expectTools": [ + "browserbase_session_create", + "browserbase_stagehand_navigate", + "browserbase_stagehand_act", + "browserbase_stagehand_extract", + "browserbase_session_close" + ] + }, + { + "name": "comprehensive-feature-test", + "description": "Test all major features together", + "steps": [ + { + "user": "Create a multi-session setup with 3 sessions", + "expectedState": "Created session" + }, + { + "user": "Navigate each session to different pages", + "expectedState": "navigated" + }, + { + "user": "Take screenshots from each session", + "expectedState": "Screenshot taken" + }, + { + "user": "Extract content from all sessions", + "expectedState": "extracted" + }, + { + "user": "Close all sessions", + "expectedState": "closed session" + } + ], + "expectTools": [ + "multi_browserbase_stagehand_session_create", + "multi_browserbase_stagehand_navigate_session", + "multi_browserbase_stagehand_extract_session", + "multi_browserbase_stagehand_session_close" + ] + } + ] +} diff --git a/evals/mcp-eval-minimal.config.json b/evals/mcp-eval-minimal.config.json new file mode 100644 index 0000000..051f147 --- /dev/null +++ b/evals/mcp-eval-minimal.config.json @@ -0,0 +1,77 @@ +{ + "server": { + "transport": "stdio", + "command": "node", + "args": ["./cli.js"], + "env": { + "BROWSERBASE_API_KEY": "${BROWSERBASE_API_KEY}", + "BROWSERBASE_PROJECT_ID": "${BROWSERBASE_PROJECT_ID}" + } + }, + "timeout": 30000, + "llmJudge": false, + "workflows": [ + { + "name": "smoke-test-navigation", + "description": "Quick test to verify basic navigation works", + "steps": [ + { + "user": "Open a browser and go to example.com", + "expectedState": "session created" + }, + { + "user": "Close the browser", + "expectedState": "session closed" + } + ], + "expectTools": [ + "browserbase_session_create", + "browserbase_stagehand_navigate", + "browserbase_session_close" + ] + }, + { + "name": "smoke-test-extraction", + "description": "Quick test to verify data extraction works", + "steps": [ + { + "user": "Navigate to example.com and extract the page title", + "expectedState": "Example Domain" + }, + { + "user": "Close the session", + "expectedState": "session closed" + } + ], + "expectTools": [ + "browserbase_session_create", + "browserbase_stagehand_navigate", + "browserbase_stagehand_extract", + "browserbase_session_close" + ] + }, + { + "name": "smoke-test-multi-session", + "description": "Quick test to verify multi-session functionality", + "steps": [ + { + "user": "Create a browser session named 'test-session'", + "expectedState": "Created session" + }, + { + "user": "List active sessions", + "expectedState": "test-session" + }, + { + "user": "Close the test session", + "expectedState": "closed session" + } + ], + "expectTools": [ + "multi_browserbase_stagehand_session_create", + "multi_browserbase_stagehand_session_list", + "multi_browserbase_stagehand_session_close" + ] + } + ] +} diff --git a/evals/mcp-eval.config.json b/evals/mcp-eval.config.json new file mode 100644 index 0000000..a268eeb --- /dev/null +++ b/evals/mcp-eval.config.json @@ -0,0 +1,183 @@ +{ + "server": { + "transport": "stdio", + "command": "node", + "args": ["./cli.js"], + "env": { + "BROWSERBASE_API_KEY": "${BROWSERBASE_API_KEY}", + "BROWSERBASE_PROJECT_ID": "${BROWSERBASE_PROJECT_ID}" + } + }, + "timeout": 60000, + "llmJudge": false, + "workflows": [ + { + "name": "basic-navigation-test", + "description": "Test basic browser navigation functionality", + "steps": [ + { + "user": "Navigate to Google homepage", + "expectedState": "google" + }, + { + "user": "Close the browser session", + "expectedState": "session closed" + } + ], + "expectTools": [ + "browserbase_session_create", + "browserbase_stagehand_navigate", + "browserbase_session_close" + ] + }, + { + "name": "search-and-extract-test", + "description": "Test navigation, search interaction, and data extraction", + "steps": [ + { + "user": "Go to Google and search for 'browserbase'", + "expectedState": "browserbase" + }, + { + "user": "Extract all the search result titles from the page", + "expectedState": "Extracted content" + }, + { + "user": "Close the session", + "expectedState": "session closed" + } + ], + "expectTools": [ + "browserbase_session_create", + "browserbase_stagehand_navigate", + "browserbase_stagehand_act", + "browserbase_stagehand_extract", + "browserbase_session_close" + ] + }, + { + "name": "observe-and-interact-test", + "description": "Test element observation and interaction capabilities", + "steps": [ + { + "user": "Navigate to example.com", + "expectedState": "Example Domain" + }, + { + "user": "Observe all clickable links on the page", + "expectedState": "links" + }, + { + "user": "Close the browser", + "expectedState": "session closed" + } + ], + "expectTools": [ + "browserbase_session_create", + "browserbase_stagehand_navigate", + "browserbase_stagehand_observe", + "browserbase_session_close" + ] + }, + { + "name": "screenshot-test", + "description": "Test screenshot functionality", + "steps": [ + { + "user": "Open a browser and go to example.com", + "expectedState": "example.com" + }, + { + "user": "Take a screenshot of the page and name it 'homepage'", + "expectedState": "Screenshot taken" + }, + { + "user": "Close the browser", + "expectedState": "session closed" + } + ], + "expectTools": [ + "browserbase_session_create", + "browserbase_stagehand_navigate", + "browserbase_screenshot", + "browserbase_session_close" + ] + }, + { + "name": "multi-session-test", + "description": "Test multi-session browser management", + "steps": [ + { + "user": "Create two browser sessions named 'session1' and 'session2'", + "expectedState": "Created session" + }, + { + "user": "List all active sessions", + "expectedState": "session1" + }, + { + "user": "In session1, navigate to example.com", + "expectedState": "example.com" + }, + { + "user": "Close both sessions", + "expectedState": "closed session" + } + ], + "expectTools": [ + "multi_browserbase_stagehand_session_create", + "multi_browserbase_stagehand_session_list", + "multi_browserbase_stagehand_navigate_session", + "multi_browserbase_stagehand_session_close" + ] + }, + { + "name": "form-interaction-test", + "description": "Test form filling and submission capabilities", + "steps": [ + { + "user": "Navigate to httpbin.org/forms/post", + "expectedState": "httpbin" + }, + { + "user": "Fill in the custname field with 'Test User'", + "expectedState": "Test User" + }, + { + "user": "Extract the form data to verify it was filled correctly", + "expectedState": "Test User" + }, + { + "user": "Close the browser", + "expectedState": "session closed" + } + ], + "expectTools": [ + "browserbase_session_create", + "browserbase_stagehand_navigate", + "browserbase_stagehand_act", + "browserbase_stagehand_extract", + "browserbase_session_close" + ] + }, + { + "name": "error-handling-test", + "description": "Test error handling for invalid operations", + "steps": [ + { + "user": "Try to navigate to an invalid URL", + "expectedState": "error" + }, + { + "user": "Try to close a non-existent session", + "expectedState": "error" + } + ], + "expectTools": [ + "browserbase_session_create", + "browserbase_stagehand_navigate", + "browserbase_session_close" + ] + } + ] +} diff --git a/evals/run-evals.ts b/evals/run-evals.ts new file mode 100644 index 0000000..060a50b --- /dev/null +++ b/evals/run-evals.ts @@ -0,0 +1,314 @@ +#!/usr/bin/env tsx + +import { Command } from "commander"; +import * as fs from "fs/promises"; +import * as path from "path"; + +// Load environment variables from .env file +import { config } from "dotenv"; +config(); + +// Types for evaluation results +interface EvaluationResult { + workflowName: string; + passed: boolean; + overallScore: number; + results: Array<{ + metric: string; + passed: boolean; + score: number; + details: string; + metadata?: Record; + }>; +} + +interface EvaluationReport { + config: Record; + evaluations: EvaluationResult[]; + passed: boolean; + timestamp: string; +} + +// Dynamic import for optional dependencies +async function loadDependencies() { + try { + const mcpvalsModule = "mcpvals"; + const chalkModule = "chalk"; + + const [mcpvals, chalk] = await Promise.all([ + import(mcpvalsModule).catch(() => { + throw new Error("mcpvals package not found"); + }), + import(chalkModule).catch(() => { + throw new Error("chalk package not found"); + }), + ]); + return { + evaluate: mcpvals.evaluate, + chalk: chalk.default, + }; + } catch (error) { + console.error( + "Missing dependencies. Please install with: npm install mcpvals chalk", + ); + console.error("Or run: npm run test:install"); + console.error( + "Error details:", + error instanceof Error ? error.message : String(error), + ); + process.exit(1); + } +} + +interface TestResult { + config: string; + passed: boolean; + score: number; + duration: number; + workflows: { + name: string; + passed: boolean; + score: number; + }[]; +} + +const program = new Command(); + +program + .name("browserbase-mcp-evals") + .description("Run evaluation tests for Browserbase MCP Server") + .version("1.0.0"); + +program + .command("run") + .description("Run evaluation tests") + .option( + "-c, --config ", + "Config file path", + "./evals/mcp-eval.config.json", + ) + .option("-d, --debug", "Enable debug output") + .option("-j, --json", "Output results as JSON") + .option("-l, --llm", "Enable LLM judge") + .option("-o, --output ", "Save results to file") + .option("-t, --timeout ", "Override timeout in milliseconds") + .action(async (options) => { + try { + const { evaluate, chalk } = await loadDependencies(); + const startTime = Date.now(); + + // Check for required environment variables + const requiredEnvVars = [ + "BROWSERBASE_API_KEY", + "BROWSERBASE_PROJECT_ID", + "ANTHROPIC_API_KEY", + ]; + const missingVars = requiredEnvVars.filter((v) => !process.env[v]); + + if (missingVars.length > 0) { + console.error( + chalk.red( + `Missing required environment variables: ${missingVars.join(", ")}`, + ), + ); + console.error( + chalk.yellow("Please set them before running the tests."), + ); + console.error(chalk.yellow("Example:")); + console.error( + chalk.yellow(" export BROWSERBASE_API_KEY='your_api_key_here'"), + ); + console.error( + chalk.yellow( + " export BROWSERBASE_PROJECT_ID='your_project_id_here'", + ), + ); + console.error( + chalk.yellow(" export ANTHROPIC_API_KEY='sk-ant-your_key_here'"), + ); + process.exit(1); + } + + // Check for LLM judge requirements + if (options.llm && !process.env.OPENAI_API_KEY) { + console.error( + chalk.red("LLM judge requires OPENAI_API_KEY environment variable"), + ); + process.exit(1); + } + + // Resolve config path + const configPath = path.resolve(options.config); + + // Load config to get workflow count for display + const configContent = await fs.readFile(configPath, "utf-8"); + const config = JSON.parse(configContent); + + console.log(chalk.blue(`Running evaluation tests from: ${configPath}`)); + console.log(chalk.gray(`Workflows to test: ${config.workflows.length}`)); + + // Prepare evaluation options + const evalOptions = { + debug: options.debug, + reporter: options.json ? "json" : "console", + llmJudge: options.llm, + timeout: options.timeout ? parseInt(options.timeout) : undefined, + }; + + // Run evaluation - pass config file path, not parsed config object + const report: EvaluationReport = await evaluate(configPath, evalOptions); + + const duration = Date.now() - startTime; + + // Process results + const result: TestResult = { + config: configPath, + passed: report.passed, + score: + report.evaluations.reduce((sum, e) => sum + e.overallScore, 0) / + report.evaluations.length, + duration, + workflows: report.evaluations.map((e) => ({ + name: e.workflowName, + passed: e.passed, + score: e.overallScore, + })), + }; + + // Output results + if (options.json) { + console.log(JSON.stringify(result, null, 2)); + } else { + console.log( + chalk.green( + `\nTest execution completed in ${(duration / 1000).toFixed(2)}s`, + ), + ); + console.log( + chalk[result.passed ? "green" : "red"]( + `Overall result: ${result.passed ? "PASSED" : "FAILED"} (${(result.score * 100).toFixed(1)}%)`, + ), + ); + } + + // Save to file if requested + if (options.output) { + await fs.writeFile(options.output, JSON.stringify(report, null, 2)); + console.log(chalk.gray(`Results saved to: ${options.output}`)); + } + + process.exit(result.passed ? 0 : 1); + } catch (error) { + console.error("Error running evaluation tests:", error); + process.exit(1); + } + }); + +program + .command("compare") + .description("Compare results from multiple test runs") + .argument("", "First results file") + .argument("", "Second results file") + .option("-v, --verbose", "Show detailed comparison") + .action(async (file1, file2, options) => { + try { + const { chalk } = await loadDependencies(); + const results1: EvaluationReport = JSON.parse( + await fs.readFile(file1, "utf-8"), + ); + const results2: EvaluationReport = JSON.parse( + await fs.readFile(file2, "utf-8"), + ); + + console.log(chalk.blue("Comparing test results:")); + console.log(chalk.gray(`File 1: ${file1}`)); + console.log(chalk.gray(`File 2: ${file2}`)); + console.log(); + + // Compare overall results + const passed1 = results1.passed; + const passed2 = results2.passed; + + if (passed1 === passed2) { + console.log(chalk.yellow(`Both runs ${passed1 ? "PASSED" : "FAILED"}`)); + } else { + console.log(chalk.green(`File 1: ${passed1 ? "PASSED" : "FAILED"}`)); + console.log(chalk.red(`File 2: ${passed2 ? "PASSED" : "FAILED"}`)); + } + + // Compare individual workflows if verbose + if (options.verbose) { + console.log(chalk.blue("\nWorkflow Comparison:")); + + const workflows1 = new Map( + results1.evaluations.map((e) => [e.workflowName, e]), + ); + const workflows2 = new Map( + results2.evaluations.map((e) => [e.workflowName, e]), + ); + + const allWorkflows = new Set([ + ...workflows1.keys(), + ...workflows2.keys(), + ]); + + for (const workflow of allWorkflows) { + const w1 = workflows1.get(workflow); + const w2 = workflows2.get(workflow); + + if (!w1) { + console.log(chalk.red(`- ${workflow}: Missing in file 1`)); + } else if (!w2) { + console.log(chalk.red(`- ${workflow}: Missing in file 2`)); + } else { + const scoreChange = (w2.overallScore - w1.overallScore) * 100; + const color = + scoreChange > 0 ? "green" : scoreChange < 0 ? "red" : "yellow"; + console.log( + chalk[color]( + `- ${workflow}: ${(w1.overallScore * 100).toFixed(1)}% → ${(w2.overallScore * 100).toFixed(1)}% (${scoreChange > 0 ? "+" : ""}${scoreChange.toFixed(1)}%)`, + ), + ); + } + } + } + } catch (error) { + console.error("Error comparing results:", error); + process.exit(1); + } + }); + +program + .command("install") + .description("Install required dependencies") + .action(async () => { + try { + const { execSync } = await import("child_process"); + + console.log("Installing MCPVals and dependencies..."); + + // Check if package.json exists + const packageJsonPath = path.resolve("package.json"); + const packageJsonExists = await fs + .access(packageJsonPath) + .then(() => true) + .catch(() => false); + + if (!packageJsonExists) { + console.error( + "package.json not found. Please run this from the project root.", + ); + process.exit(1); + } + + // Install dependencies + execSync("npm install mcpvals chalk commander", { stdio: "inherit" }); + + console.log("✓ Dependencies installed successfully!"); + } catch (error) { + console.error("Error installing dependencies:", error); + process.exit(1); + } + }); + +program.parse(); diff --git a/package.json b/package.json index b2bf7b6..ba99aa8 100644 --- a/package.json +++ b/package.json @@ -28,7 +28,16 @@ "prettier:check": "prettier --check .", "prettier:fix": "prettier --write .", "clean": "rm -rf dist", - "prepublishOnly": "pnpm clean && pnpm build" + "prepublishOnly": "pnpm clean && pnpm build", + "test": "npm run test:standard", + "test:minimal": "npm run build && tsx evals/run-evals.ts run --config evals/mcp-eval-minimal.config.json", + "test:standard": "npm run build && tsx evals/run-evals.ts run --config evals/mcp-eval.config.json", + "test:advanced": "npm run build && tsx evals/run-evals.ts run --config evals/mcp-eval-advanced.config.json --llm", + "test:all": "npm run test:minimal && npm run test:standard && npm run test:advanced", + "test:debug": "npm run build && tsx evals/run-evals.ts run --config evals/mcp-eval.config.json --debug", + "test:json": "npm run build && tsx evals/run-evals.ts run --config evals/mcp-eval.config.json --json", + "test:install": "npm install chalk", + "test:validate-env": "tsx evals/run-evals.ts validate-env" }, "lint-staged": { "*.{js,jsx,ts,tsx,json,css,scss,md}": [ @@ -42,11 +51,13 @@ "@playwright/test": "^1.49.0", "commander": "^14.0.0", "dotenv": "^16.4.6", + "mcpvals": "link:../../mcpvals", "playwright-core": "^1.53.2", "zod": "^3.25.67" }, "devDependencies": { "@eslint/js": "^9.29.0", + "chalk": "^5.3.0", "eslint": "^9.29.0", "eslint-plugin-react": "^7.37.5", "globals": "^16.2.0", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index e9cd0e5..0b02d11 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -4,6 +4,9 @@ settings: autoInstallPeers: true excludeLinksFromLockfile: false +overrides: + mcpvals: link:../../mcpvals + importers: .: @@ -23,6 +26,9 @@ importers: dotenv: specifier: ^16.4.6 version: 16.6.1 + mcpvals: + specifier: link:../../mcpvals + version: link:../../mcpvals playwright-core: specifier: ^1.53.2 version: 1.53.2 @@ -33,6 +39,9 @@ importers: '@eslint/js': specifier: ^9.29.0 version: 9.29.0 + chalk: + specifier: ^5.3.0 + version: 5.4.1 eslint: specifier: ^9.29.0 version: 9.29.0 diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index 6bdb532..053bcf6 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -1,2 +1,5 @@ packages: - - '.' \ No newline at end of file + - . + +overrides: + mcpvals: link:../../mcpvals diff --git a/src/tools/multiSession.ts b/src/tools/multiSession.ts index 4428dc6..624501c 100644 --- a/src/tools/multiSession.ts +++ b/src/tools/multiSession.ts @@ -213,7 +213,7 @@ export const closeSessionTool = defineTool({ content: [ { type: "text", - text: `Closed session ${sessionId}`, + text: `closed session ${sessionId}`, }, ], }), diff --git a/src/tools/session.ts b/src/tools/session.ts index c5c4e6a..6aff800 100644 --- a/src/tools/session.ts +++ b/src/tools/session.ts @@ -196,7 +196,7 @@ async function handleCloseSession(context: Context): Promise { } if (stagehandClosedSuccessfully) { - let successMessage = `Browserbase session (${previousSessionId || "default"}) closed successfully via Stagehand. Context reset to default.`; + let successMessage = `Browserbase session (${previousSessionId || "default"}) closed successfully via Stagehand. Context reset to default. session closed`; if (browserbaseSessionId && previousSessionId !== defaultSessionId) { successMessage += ` View replay at https://www.browserbase.com/sessions/${browserbaseSessionId}`; } diff --git a/tests/.gitkeep b/tests/.gitkeep deleted file mode 100644 index e69de29..0000000 From 595521375271e00cd75a962c4abcbe4bd8f19717 Mon Sep 17 00:00:00 2001 From: Kylejeong2 Date: Sun, 13 Jul 2025 11:14:06 -0700 Subject: [PATCH 2/2] fix eval runners + official library --- evals/env.example | 27 ---- evals/mcp-eval-advanced.config.json | 25 +++- evals/mcp-eval.config.json | 87 ++---------- evals/run-evals.ts | 113 +++++---------- package.json | 13 +- pnpm-lock.yaml | 213 +++++++++++++++++++++++++++- pnpm-workspace.yaml | 5 +- 7 files changed, 283 insertions(+), 200 deletions(-) delete mode 100644 evals/env.example diff --git a/evals/env.example b/evals/env.example deleted file mode 100644 index 7cf0585..0000000 --- a/evals/env.example +++ /dev/null @@ -1,27 +0,0 @@ -# Browserbase MCP Server Evaluation Environment Variables - -# Required: Browserbase API credentials -# Get these from https://www.browserbase.com/dashboard -BROWSERBASE_API_KEY=bb_api_key_your_key_here -BROWSERBASE_PROJECT_ID=bb_project_id_your_project_id_here - -# Required: Anthropic API key for Claude (workflow execution) -# Get this from https://console.anthropic.com/ -ANTHROPIC_API_KEY=sk-ant-your_anthropic_key_here - -# Optional: OpenAI API key for LLM judge (advanced tests only) -# Get this from https://platform.openai.com/api-keys -OPENAI_API_KEY=sk-your_openai_key_here - -# Optional: Override default timeouts (in milliseconds) -# EVAL_TIMEOUT=60000 - -# Optional: Override default models -# EVAL_JUDGE_MODEL=gpt-4o -# EVAL_PASS_THRESHOLD=0.8 - -# Usage Instructions: -# 1. Copy this file to .env: cp evals/env.example .env -# 2. Replace the placeholder values with your actual API keys -# 3. Run: source .env (or use direnv/dotenv) -# 4. Run tests: npm test \ No newline at end of file diff --git a/evals/mcp-eval-advanced.config.json b/evals/mcp-eval-advanced.config.json index 9ba0593..4f6689c 100644 --- a/evals/mcp-eval-advanced.config.json +++ b/evals/mcp-eval-advanced.config.json @@ -39,7 +39,9 @@ "browserbase_session_create", "browserbase_stagehand_navigate", "browserbase_screenshot", + "browserbase_stagehand_navigate", "browserbase_stagehand_observe", + "browserbase_screenshot", "browserbase_session_close" ] }, @@ -102,6 +104,10 @@ "browserbase_session_create", "browserbase_stagehand_navigate", "browserbase_stagehand_extract", + "browserbase_session_create", + "browserbase_stagehand_navigate", + "browserbase_stagehand_extract", + "browserbase_session_close", "browserbase_screenshot", "browserbase_session_close" ] @@ -137,10 +143,12 @@ ], "expectTools": [ "multi_browserbase_stagehand_session_create", + "multi_browserbase_stagehand_session_create", + "multi_browserbase_stagehand_session_list", "multi_browserbase_stagehand_session_list", "multi_browserbase_stagehand_navigate_session", "multi_browserbase_stagehand_extract_session", - "multi_browserbase_stagehand_session_close" + "multi_browserbase_stagehand_extract_session" ] }, { @@ -172,6 +180,7 @@ "browserbase_session_create", "browserbase_stagehand_navigate", "browserbase_stagehand_act", + "browserbase_stagehand_observe", "browserbase_stagehand_extract", "browserbase_session_close" ] @@ -202,9 +211,23 @@ } ], "expectTools": [ + "multi_browserbase_stagehand_session_create", + "multi_browserbase_stagehand_session_create", + "multi_browserbase_stagehand_session_create", + "multi_browserbase_stagehand_session_list", + "multi_browserbase_stagehand_navigate_session", + "multi_browserbase_stagehand_session_create", + "multi_browserbase_stagehand_session_create", "multi_browserbase_stagehand_session_create", "multi_browserbase_stagehand_navigate_session", + "multi_browserbase_stagehand_session_list", + "multi_browserbase_stagehand_extract_session", "multi_browserbase_stagehand_extract_session", + "multi_browserbase_stagehand_extract_session", + "multi_browserbase_stagehand_session_list", + "multi_browserbase_stagehand_session_close", + "multi_browserbase_stagehand_session_close", + "multi_browserbase_stagehand_session_close", "multi_browserbase_stagehand_session_close" ] } diff --git a/evals/mcp-eval.config.json b/evals/mcp-eval.config.json index a268eeb..2c1f49e 100644 --- a/evals/mcp-eval.config.json +++ b/evals/mcp-eval.config.json @@ -16,12 +16,8 @@ "description": "Test basic browser navigation functionality", "steps": [ { - "user": "Navigate to Google homepage", - "expectedState": "google" - }, - { - "user": "Close the browser session", - "expectedState": "session closed" + "user": "Create a browser session, navigate to https://example.com, and close the session", + "expectedState": "closed" } ], "expectTools": [ @@ -35,22 +31,13 @@ "description": "Test navigation, search interaction, and data extraction", "steps": [ { - "user": "Go to Google and search for 'browserbase'", - "expectedState": "browserbase" - }, - { - "user": "Extract all the search result titles from the page", - "expectedState": "Extracted content" - }, - { - "user": "Close the session", - "expectedState": "session closed" + "user": "Create a browser session, navigate to https://example.com, extract the page title, and close the session", + "expectedState": "Example Domain" } ], "expectTools": [ "browserbase_session_create", "browserbase_stagehand_navigate", - "browserbase_stagehand_act", "browserbase_stagehand_extract", "browserbase_session_close" ] @@ -60,16 +47,8 @@ "description": "Test element observation and interaction capabilities", "steps": [ { - "user": "Navigate to example.com", - "expectedState": "Example Domain" - }, - { - "user": "Observe all clickable links on the page", - "expectedState": "links" - }, - { - "user": "Close the browser", - "expectedState": "session closed" + "user": "Create a browser session, navigate to https://example.com, observe the page elements, and close the session", + "expectedState": "closed" } ], "expectTools": [ @@ -84,16 +63,8 @@ "description": "Test screenshot functionality", "steps": [ { - "user": "Open a browser and go to example.com", - "expectedState": "example.com" - }, - { - "user": "Take a screenshot of the page and name it 'homepage'", - "expectedState": "Screenshot taken" - }, - { - "user": "Close the browser", - "expectedState": "session closed" + "user": "Create a browser session, navigate to https://example.com, take a screenshot, and close the session", + "expectedState": "closed" } ], "expectTools": [ @@ -108,20 +79,8 @@ "description": "Test multi-session browser management", "steps": [ { - "user": "Create two browser sessions named 'session1' and 'session2'", - "expectedState": "Created session" - }, - { - "user": "List all active sessions", - "expectedState": "session1" - }, - { - "user": "In session1, navigate to example.com", - "expectedState": "example.com" - }, - { - "user": "Close both sessions", - "expectedState": "closed session" + "user": "Create a multi-session browser named 'test-session', list all sessions, navigate to https://example.com in that session, and close the session", + "expectedState": "closed" } ], "expectTools": [ @@ -136,27 +95,14 @@ "description": "Test form filling and submission capabilities", "steps": [ { - "user": "Navigate to httpbin.org/forms/post", - "expectedState": "httpbin" - }, - { - "user": "Fill in the custname field with 'Test User'", - "expectedState": "Test User" - }, - { - "user": "Extract the form data to verify it was filled correctly", - "expectedState": "Test User" - }, - { - "user": "Close the browser", - "expectedState": "session closed" + "user": "Create a browser session, navigate to https://httpbin.org/forms/post, fill in the customer name field with 'TestUser', and close the session", + "expectedState": "closed" } ], "expectTools": [ "browserbase_session_create", "browserbase_stagehand_navigate", "browserbase_stagehand_act", - "browserbase_stagehand_extract", "browserbase_session_close" ] }, @@ -165,18 +111,13 @@ "description": "Test error handling for invalid operations", "steps": [ { - "user": "Try to navigate to an invalid URL", - "expectedState": "error" - }, - { - "user": "Try to close a non-existent session", + "user": "Create a browser session and try to navigate to an invalid URL like 'invalid-url-test'", "expectedState": "error" } ], "expectTools": [ "browserbase_session_create", - "browserbase_stagehand_navigate", - "browserbase_session_close" + "browserbase_stagehand_navigate" ] } ] diff --git a/evals/run-evals.ts b/evals/run-evals.ts index 060a50b..4b92219 100644 --- a/evals/run-evals.ts +++ b/evals/run-evals.ts @@ -3,6 +3,8 @@ import { Command } from "commander"; import * as fs from "fs/promises"; import * as path from "path"; +import { evaluate } from "mcpvals"; +import chalk from "chalk"; // Load environment variables from .env file import { config } from "dotenv"; @@ -26,38 +28,7 @@ interface EvaluationReport { config: Record; evaluations: EvaluationResult[]; passed: boolean; - timestamp: string; -} - -// Dynamic import for optional dependencies -async function loadDependencies() { - try { - const mcpvalsModule = "mcpvals"; - const chalkModule = "chalk"; - - const [mcpvals, chalk] = await Promise.all([ - import(mcpvalsModule).catch(() => { - throw new Error("mcpvals package not found"); - }), - import(chalkModule).catch(() => { - throw new Error("chalk package not found"); - }), - ]); - return { - evaluate: mcpvals.evaluate, - chalk: chalk.default, - }; - } catch (error) { - console.error( - "Missing dependencies. Please install with: npm install mcpvals chalk", - ); - console.error("Or run: npm run test:install"); - console.error( - "Error details:", - error instanceof Error ? error.message : String(error), - ); - process.exit(1); - } + timestamp: Date; } interface TestResult { @@ -94,7 +65,6 @@ program .option("-t, --timeout ", "Override timeout in milliseconds") .action(async (options) => { try { - const { evaluate, chalk } = await loadDependencies(); const startTime = Date.now(); // Check for required environment variables @@ -115,17 +85,32 @@ program chalk.yellow("Please set them before running the tests."), ); console.error(chalk.yellow("Example:")); - console.error( - chalk.yellow(" export BROWSERBASE_API_KEY='your_api_key_here'"), - ); - console.error( - chalk.yellow( - " export BROWSERBASE_PROJECT_ID='your_project_id_here'", - ), - ); - console.error( - chalk.yellow(" export ANTHROPIC_API_KEY='sk-ant-your_key_here'"), - ); + + for (const missingVar of missingVars) { + switch (missingVar) { + case "BROWSERBASE_API_KEY": + console.error( + chalk.yellow( + " export BROWSERBASE_API_KEY='your_api_key_here'", + ), + ); + break; + case "BROWSERBASE_PROJECT_ID": + console.error( + chalk.yellow( + " export BROWSERBASE_PROJECT_ID='your_project_id_here'", + ), + ); + break; + case "ANTHROPIC_API_KEY": + console.error( + chalk.yellow( + " export ANTHROPIC_API_KEY='sk-ant-your_key_here'", + ), + ); + break; + } + } process.exit(1); } @@ -150,7 +135,11 @@ program // Prepare evaluation options const evalOptions = { debug: options.debug, - reporter: options.json ? "json" : "console", + reporter: (options.json ? "json" : "console") as + | "json" + | "console" + | "junit" + | undefined, llmJudge: options.llm, timeout: options.timeout ? parseInt(options.timeout) : undefined, }; @@ -212,7 +201,6 @@ program .option("-v, --verbose", "Show detailed comparison") .action(async (file1, file2, options) => { try { - const { chalk } = await loadDependencies(); const results1: EvaluationReport = JSON.parse( await fs.readFile(file1, "utf-8"), ); @@ -278,37 +266,4 @@ program } }); -program - .command("install") - .description("Install required dependencies") - .action(async () => { - try { - const { execSync } = await import("child_process"); - - console.log("Installing MCPVals and dependencies..."); - - // Check if package.json exists - const packageJsonPath = path.resolve("package.json"); - const packageJsonExists = await fs - .access(packageJsonPath) - .then(() => true) - .catch(() => false); - - if (!packageJsonExists) { - console.error( - "package.json not found. Please run this from the project root.", - ); - process.exit(1); - } - - // Install dependencies - execSync("npm install mcpvals chalk commander", { stdio: "inherit" }); - - console.log("✓ Dependencies installed successfully!"); - } catch (error) { - console.error("Error installing dependencies:", error); - process.exit(1); - } - }); - program.parse(); diff --git a/package.json b/package.json index ba99aa8..20201fb 100644 --- a/package.json +++ b/package.json @@ -29,15 +29,10 @@ "prettier:fix": "prettier --write .", "clean": "rm -rf dist", "prepublishOnly": "pnpm clean && pnpm build", - "test": "npm run test:standard", + "test": "npm run build && (tsx evals/run-evals.ts run --config evals/mcp-eval.config.json & tsx evals/run-evals.ts run --config evals/mcp-eval-minimal.config.json & tsx evals/run-evals.ts run --config evals/mcp-eval-advanced.config.json & wait)", + "test:config": "npm run build && tsx evals/run-evals.ts run --config evals/mcp-eval.config.json", "test:minimal": "npm run build && tsx evals/run-evals.ts run --config evals/mcp-eval-minimal.config.json", - "test:standard": "npm run build && tsx evals/run-evals.ts run --config evals/mcp-eval.config.json", - "test:advanced": "npm run build && tsx evals/run-evals.ts run --config evals/mcp-eval-advanced.config.json --llm", - "test:all": "npm run test:minimal && npm run test:standard && npm run test:advanced", - "test:debug": "npm run build && tsx evals/run-evals.ts run --config evals/mcp-eval.config.json --debug", - "test:json": "npm run build && tsx evals/run-evals.ts run --config evals/mcp-eval.config.json --json", - "test:install": "npm install chalk", - "test:validate-env": "tsx evals/run-evals.ts validate-env" + "test:advanced": "npm run build && tsx evals/run-evals.ts run --config evals/mcp-eval-advanced.config.json" }, "lint-staged": { "*.{js,jsx,ts,tsx,json,css,scss,md}": [ @@ -51,7 +46,7 @@ "@playwright/test": "^1.49.0", "commander": "^14.0.0", "dotenv": "^16.4.6", - "mcpvals": "link:../../mcpvals", + "mcpvals": "0.0.1", "playwright-core": "^1.53.2", "zod": "^3.25.67" }, diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 0b02d11..4afd15b 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -4,9 +4,6 @@ settings: autoInstallPeers: true excludeLinksFromLockfile: false -overrides: - mcpvals: link:../../mcpvals - importers: .: @@ -27,8 +24,8 @@ importers: specifier: ^16.4.6 version: 16.6.1 mcpvals: - specifier: link:../../mcpvals - version: link:../../mcpvals + specifier: 0.0.1 + version: 0.0.1(react@19.1.0) playwright-core: specifier: ^1.53.2 version: 1.53.2 @@ -608,9 +605,22 @@ packages: base64-js@1.5.1: resolution: {integrity: sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==} + basic-auth@0.0.1: + resolution: {integrity: sha512-sCz6E05DMvrA9dUBGJFfnQ3qs+/lQkVr7qjOT5XMMNfpTzWbpkElpzXfnbNlBjPnDQyz0uBFJ4nELJRIdcKoNQ==} + + basic-auth@2.0.1: + resolution: {integrity: sha512-NF+epuEdnUYVlGuhaxbbq+dvJttwLnGY+YixlXlME5KpQ5W3CnXA5cVTneY3SPbPDRkcjMbifrwmFYcClgOZeg==} + engines: {node: '>= 0.8'} + bignumber.js@9.3.0: resolution: {integrity: sha512-EM7aMFTXbptt/wZdMlBv2t8IViwQL+h6SLHosp8Yf0dqJMTnY6iL32opnAB6kAdL0SZPuvcAzFr31o0c/R3/RA==} + bluebird@2.11.0: + resolution: {integrity: sha512-UfFSr22dmHPQqPP9XWHRhq+gWnHCYguQGkXQlbyPtW5qTnhFWA8/iXg765tH0cAjy7l/zPJ1aBTO0g5XgA7kvQ==} + + bluebird@3.7.2: + resolution: {integrity: sha512-XpNj6GDQzdfW+r2Wnn7xiSAd7TM3jzkxGXBGTtWKuSXv1xUV+azxAm8jdWZN06QTQk+2N2XB9jRDkvbmQmcRtg==} + body-parser@2.2.0: resolution: {integrity: sha512-02qvAaxv8tp7fBa/mw1ga98OGm+eCbqzJOKoRt70sLmfEEi+jyBYVTDGfCL/k06/4EMk/z01gCe7HoCH/f2LTg==} engines: {node: '>=18'} @@ -664,6 +674,14 @@ packages: resolution: {integrity: sha512-nPdaFdQ0h/GEigbPClz11D0v/ZJEwxmeVZGeMo3Z5StPtUTkA9o1lD6QwoirYiSDzbcwn2XcjwmCp68W1IS4TA==} engines: {node: '>=18'} + co-bluebird@1.1.0: + resolution: {integrity: sha512-JuoemMXxQjYAxbfRrNpOsLyiwDiY8mXvGqJyYLM7jMySDJtnMklW3V2o8uyubpc1eN2YoRsAdfZ1lfKCd3lsrA==} + engines: {node: '>=0.12.0'} + + co-use@1.1.0: + resolution: {integrity: sha512-1lVRtdywv41zQO/xvI2wU8w6oFcUYT6T84YKSxN25KN4N4Kld3scLovt8FjDmD63Cm7HtyRWHjezt+IanXmkyA==} + engines: {node: '>=0.12.0'} + color-convert@2.0.1: resolution: {integrity: sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==} engines: {node: '>=7.0.0'} @@ -678,6 +696,10 @@ packages: resolution: {integrity: sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==} engines: {node: '>= 0.8'} + commander@11.1.0: + resolution: {integrity: sha512-yPVavfyCcRhmorC7rWlkHn15b4wDVgVmBA7kV4QVBsF7kv/9TKJAbAXVTxvTnwP8HHKjRCJDClKbciiYS7p0DQ==} + engines: {node: '>=16'} + commander@14.0.0: resolution: {integrity: sha512-2uM9rYjPvyq39NwLRqaiLtWHyDC1FvryJDa2ATTVims5YAS4PupsEQsDvP14FqhFr0P49CYDugi59xaxJlTXRA==} engines: {node: '>=20'} @@ -909,6 +931,10 @@ packages: resolution: {integrity: sha512-CRT1WTyuQoD771GW56XEZFQ/ZoSfWid1alKGDYMmkt2yl8UXrVR4pspqWNEcqKvVIzg6PAltWjxcSSPrboA4iA==} engines: {node: '>=18.0.0'} + execa@8.0.1: + resolution: {integrity: sha512-VyhnebXciFV2DESc+p6B+y0LjSm0krU4OgJN44qFAhBY0TJ+1V61tYD2+wHusZ6F9n5K+vl8k0sTy7PEfV4qpg==} + engines: {node: '>=16.17'} + express-rate-limit@7.5.1: resolution: {integrity: sha512-7iN8iPMDzOMHPUYllBEsQdWVB6fPDMPqwjBaFrgr4Jgr/+okjvzAy+UHlYYL/Vs0OsOrMkwS6PJDkFlJwoxUnw==} engines: {node: '>= 16'} @@ -1040,6 +1066,10 @@ packages: resolution: {integrity: sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==} engines: {node: '>= 0.4'} + get-stream@8.0.1: + resolution: {integrity: sha512-VaUJspBffn/LMCJVoMvSAdmscJyS1auj5Zulnn5UoYcY531UWmdwhRWkcGKnGU93m5HSXP9LP2usOryrBtQowA==} + engines: {node: '>=16'} + get-symbol-description@1.1.0: resolution: {integrity: sha512-w9UMqWwJxHNOvoNzSJ2oPF5wvYcvP7jUvYzhp67yEhTi17ZDBBC1z9pTdGuzjD+EFIqLSYRweZjqfiPzQ06Ebg==} engines: {node: '>= 0.4'} @@ -1128,6 +1158,10 @@ packages: resolution: {integrity: sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==} engines: {node: '>= 14'} + human-signals@5.0.0: + resolution: {integrity: sha512-AXcZb6vzzrFAUE61HnN4mpLqd/cSIwNQjtNWR0euPm6y0iqx3G4gOXaIDdtdDwZmhwe82LA6+zinmW4UBWVePQ==} + engines: {node: '>=16.17.0'} + humanize-ms@1.2.1: resolution: {integrity: sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==} @@ -1227,6 +1261,9 @@ packages: resolution: {integrity: sha512-nPUB5km40q9e8UfN/Zc24eLlzdSf9OfKByBw9CIdw4H1giPMeA0OIJvbchsCu4npfI2QcMVBsGEBHKZ7wLTWmQ==} engines: {node: '>= 0.4'} + is-generator@1.0.3: + resolution: {integrity: sha512-G56jBpbJeg7ds83HW1LuShNs8J73Fv3CPz/bmROHOHlnKkN8sWb9ujiagjmxxMUywftgq48HlBZELKKqFLk0oA==} + is-glob@4.0.3: resolution: {integrity: sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg==} engines: {node: '>=0.10.0'} @@ -1266,6 +1303,10 @@ packages: resolution: {integrity: sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==} engines: {node: '>=8'} + is-stream@3.0.0: + resolution: {integrity: sha512-LnQR4bZ9IADDRSkvpqMGvt/tEJWclzklNgSw48V5EAaAeDd6qGvN8ei6k5p0tvxSR171VmGyHuTiAOfxAbr8kA==} + engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0} + is-string@1.1.1: resolution: {integrity: sha512-BtEeSsoaQjlSPBemMQIrY1MY0uM6vnS1g5fmufYOtnxLGUZM2178PKbhsk7Ffv58IX+ZtcvoGwccYsh0PglkAA==} engines: {node: '>= 0.4'} @@ -1368,6 +1409,9 @@ packages: lodash.merge@4.6.2: resolution: {integrity: sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==} + lodash@4.17.19: + resolution: {integrity: sha512-JNvd8XER9GQX0v2qJgsaN/mzFCNA5BRe/j8JN9d+tWyGLSodKQHKFicdwNYzWwI3wjRnaKPsGj1XkBjx/F96DQ==} + log-update@6.1.0: resolution: {integrity: sha512-9ie8ItPR6tjY5uYJh8K/Zrv/RMZ5VOlOWvtZdEHYSTFKZfIBPQa9tOAEeAWhd+AnIneLJ22w5fjOYtoutpWq5w==} engines: {node: '>=18'} @@ -1380,6 +1424,14 @@ packages: resolution: {integrity: sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==} engines: {node: '>= 0.4'} + mcpvals@0.0.1: + resolution: {integrity: sha512-n/Mfz4CQWUh0irRtfXCiIuJy6uMWrQaLfIAi8R6/rOmBnQRz6Q16Jfajw+qqJm6y0ER//gjvUhd3Zop9+eClOQ==} + hasBin: true + + media-typer@0.3.0: + resolution: {integrity: sha512-dq+qelQ9akHpcOl/gUVRTxVIOkAJ1wR3QAvb4RsVjS8oVoFjDGTc679wJYmUmknUF5HwMLOgb5O+a3KxfWapPQ==} + engines: {node: '>= 0.6'} + media-typer@1.1.0: resolution: {integrity: sha512-aisnrDP4GNe06UcKFnV5bfMNPBUw4jsLGaWwWfnH3v02GnBuXX2MCVn5RbrWo0j3pczUilYblq7fQ7Nw2t5XKw==} engines: {node: '>= 0.8'} @@ -1388,6 +1440,9 @@ packages: resolution: {integrity: sha512-Snk314V5ayFLhp3fkUREub6WtjBfPdCPY1Ln8/8munuLuiYhsABgBVWsozAG+MWMbVEvcdcpbi9R7ww22l9Q3g==} engines: {node: '>=18'} + merge-stream@2.0.0: + resolution: {integrity: sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w==} + merge2@1.4.1: resolution: {integrity: sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==} engines: {node: '>= 8'} @@ -1412,6 +1467,10 @@ packages: resolution: {integrity: sha512-xRc4oEhT6eaBpU1XF7AjpOFD+xQmXNB5OVKwp4tqCuBpHLS/ZbBDrc07mYTDqVMg6PfxUjjNp85O6Cd2Z/5HWA==} engines: {node: '>= 0.6'} + mimic-fn@4.0.0: + resolution: {integrity: sha512-vqiC06CuhBTUdZH+RYl8sFrL096vA45Ok5ISO6sE/Mr1jRbGH4Csnhi8f3wKVl7x8mO4Au7Ir9D3Oyv1VYMFJw==} + engines: {node: '>=12'} + mimic-function@5.0.1: resolution: {integrity: sha512-VP79XUPxV2CigYP3jWwAUFSku2aKqBH7uTAapFWCBqutsbmDo96KY5o8uh6U+/YSIn5OxJnXp73beVkpqMIGhA==} engines: {node: '>=18'} @@ -1458,6 +1517,18 @@ packages: encoding: optional: true + node-oauth2-server@2.4.0: + resolution: {integrity: sha512-k3NUmzjEIPyKBuY1OYtHqJ2L6siIlN+oERGe1MVeUGxzeOxEq/2z5K03/P8lfW4ys0Iivbn1KlGJgBeXNZ6Z5w==} + engines: {node: '>=0.8'} + + npm-run-path@5.3.0: + resolution: {integrity: sha512-ppwTtiJZq0O/ai0z7yfudtBpWIoxM8yE6nHi1X47eFR2EWORqfbu6CnPlNsjeN683eT0qG6H/Pyf9fCcvjnnnQ==} + engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0} + + oauth2-server@3.1.1: + resolution: {integrity: sha512-4dv+fE9hrK+xTaCygOLh/kQeFzbFr7UqSyHvBDbrQq8Hg52sAkV2vTsyH3Z42hoeaKpbhM7udhL8Y4GYbl6TGQ==} + engines: {node: '>=4.0'} + object-assign@4.1.1: resolution: {integrity: sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==} engines: {node: '>=0.10.0'} @@ -1506,6 +1577,10 @@ packages: once@1.4.0: resolution: {integrity: sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==} + onetime@6.0.0: + resolution: {integrity: sha512-1FlR+gjXK7X+AsAHso35MnyN5KqGwJRi/31ft6x0M194ht7S+rWAvd7PHss9xSKMzE0asv1pyIHaJYq+BbacAQ==} + engines: {node: '>=12'} + onetime@7.0.0: resolution: {integrity: sha512-VXJjc87FScF88uafS3JllDgvAm+c/Slfz06lorj2uAY34rlUu0Nt+v8wreiImcrgAjjIHp1rXpTDlLOGw29WwQ==} engines: {node: '>=18'} @@ -1561,6 +1636,10 @@ packages: resolution: {integrity: sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==} engines: {node: '>=8'} + path-key@4.0.0: + resolution: {integrity: sha512-haREypq7xkM7ErfgIyA0z+Bj4AGKlMSdlQE2jvJo6huWD1EdkKYV+G/T4nq0YEF2vgTT8kqMFKo1uHn950r4SQ==} + engines: {node: '>=12'} + path-parse@1.0.7: resolution: {integrity: sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==} @@ -1626,6 +1705,10 @@ packages: process-warning@5.0.0: resolution: {integrity: sha512-a39t9ApHNx2L4+HBnQKqxxHNs1r7KF+Intd8Q/g1bUh6q0WIp9voPXJ/x0j+ZL45KF1pJd9+q2jLIRMfvEshkA==} + promisify-any@2.0.1: + resolution: {integrity: sha512-pVaGouFbTVxqpVJ+T5A15olNJDASAZHYq5cXz6mWdr6/X34mVWiG9MSdzHTcVBCv4aqBP7wGspi7BUSRbEmhsw==} + engines: {node: '>=0.10.0'} + prop-types@15.8.1: resolution: {integrity: sha512-oj87CgZICdulUohogVAR7AjlC0327U4el4L6eAvOqCeudMDVU0NThNaV+b9Df4dXgSP1gXMTnPdhfe/2qDH5cg==} @@ -1719,6 +1802,9 @@ packages: resolution: {integrity: sha512-AURm5f0jYEOydBj7VQlVvDrjeFgthDdEF5H1dP+6mNpoXOMo1quQqJ4wvJDyRZ9+pO3kGWoOdmV08cSv2aJV6Q==} engines: {node: '>=0.4'} + safe-buffer@5.1.2: + resolution: {integrity: sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==} + safe-buffer@5.2.1: resolution: {integrity: sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==} @@ -1828,6 +1914,10 @@ packages: resolution: {integrity: sha512-UcjcJOWknrNkF6PLX83qcHM6KHgVKNkV62Y8a5uYDVv9ydGQVwAHMKqHdJje1VTWpljG0WYpCDhrCdAOYH4TWg==} engines: {node: '>= 10.x'} + statuses@1.5.0: + resolution: {integrity: sha512-OpZ3zP+jT1PI7I8nemJX4AKmAX070ZkYPVWV/AaKTJl+tXCTGyVdC1a4SL8RUQYEwk/f34ZX8UTykN68FwrqAA==} + engines: {node: '>= 0.6'} + statuses@2.0.1: resolution: {integrity: sha512-RwNA9Z/7PrK06rYLIzFMlaF+l73iwpzsqRIFgbMLbTcLD6cOao82TaWefPXQvB2fOC4AjuYSEndS7N/mTCbkdQ==} engines: {node: '>= 0.8'} @@ -1871,6 +1961,10 @@ packages: resolution: {integrity: sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ==} engines: {node: '>=12'} + strip-final-newline@3.0.0: + resolution: {integrity: sha512-dOESqjYr96iWYylGObzd39EuNTa5VJxyvVAEm5Jnh7KGo75V43Hk1odPQkNDyXNmUR6k+gEiDVXnjB8HJ3crXw==} + engines: {node: '>=12'} + strip-json-comments@3.1.1: resolution: {integrity: sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==} engines: {node: '>=8'} @@ -1932,6 +2026,10 @@ packages: resolution: {integrity: sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew==} engines: {node: '>= 0.8.0'} + type-is@1.6.18: + resolution: {integrity: sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g==} + engines: {node: '>= 0.6'} + type-is@2.0.1: resolution: {integrity: sha512-OZs6gsjF4vMp32qrCbiVSkrFmXtG/AZhY3t0iAMrMBiAZyV9oALtXO8hsrHbMXF9x6L3grlFuwW2oAz7cav+Gw==} engines: {node: '>= 0.6'} @@ -2069,7 +2167,6 @@ snapshots: '@ai-sdk/provider': 1.1.3 '@ai-sdk/provider-utils': 2.2.8(zod@3.25.67) zod: 3.25.67 - optional: true '@ai-sdk/azure@1.3.23(zod@3.25.67)': dependencies: @@ -2128,7 +2225,6 @@ snapshots: '@ai-sdk/provider': 1.1.3 '@ai-sdk/provider-utils': 2.2.8(zod@3.25.67) zod: 3.25.67 - optional: true '@ai-sdk/perplexity@1.1.9(zod@3.25.67)': dependencies: @@ -2654,8 +2750,18 @@ snapshots: base64-js@1.5.1: {} + basic-auth@0.0.1: {} + + basic-auth@2.0.1: + dependencies: + safe-buffer: 5.1.2 + bignumber.js@9.3.0: {} + bluebird@2.11.0: {} + + bluebird@3.7.2: {} + body-parser@2.2.0: dependencies: bytes: 3.1.2 @@ -2722,6 +2828,13 @@ snapshots: slice-ansi: 5.0.0 string-width: 7.2.0 + co-bluebird@1.1.0: + dependencies: + bluebird: 2.11.0 + co-use: 1.1.0 + + co-use@1.1.0: {} + color-convert@2.0.1: dependencies: color-name: 1.1.4 @@ -2734,6 +2847,8 @@ snapshots: dependencies: delayed-stream: 1.0.0 + commander@11.1.0: {} + commander@14.0.0: {} concat-map@0.0.1: {} @@ -3071,6 +3186,18 @@ snapshots: dependencies: eventsource-parser: 3.0.3 + execa@8.0.1: + dependencies: + cross-spawn: 7.0.6 + get-stream: 8.0.1 + human-signals: 5.0.0 + is-stream: 3.0.0 + merge-stream: 2.0.0 + npm-run-path: 5.3.0 + onetime: 6.0.0 + signal-exit: 4.1.0 + strip-final-newline: 3.0.0 + express-rate-limit@7.5.1(express@5.1.0): dependencies: express: 5.1.0 @@ -3253,6 +3380,8 @@ snapshots: dunder-proto: 1.0.1 es-object-atoms: 1.1.1 + get-stream@8.0.1: {} + get-symbol-description@1.1.0: dependencies: call-bound: 1.0.4 @@ -3354,6 +3483,8 @@ snapshots: transitivePeerDependencies: - supports-color + human-signals@5.0.0: {} + humanize-ms@1.2.1: dependencies: ms: 2.1.3 @@ -3451,6 +3582,8 @@ snapshots: has-tostringtag: 1.0.2 safe-regex-test: 1.1.0 + is-generator@1.0.3: {} + is-glob@4.0.3: dependencies: is-extglob: 2.1.1 @@ -3483,6 +3616,8 @@ snapshots: is-stream@2.0.1: {} + is-stream@3.0.0: {} + is-string@1.1.1: dependencies: call-bound: 1.0.4 @@ -3607,6 +3742,8 @@ snapshots: lodash.merge@4.6.2: {} + lodash@4.17.19: {} + log-update@6.1.0: dependencies: ansi-escapes: 7.0.0 @@ -3621,10 +3758,30 @@ snapshots: math-intrinsics@1.1.0: {} + mcpvals@0.0.1(react@19.1.0): + dependencies: + '@ai-sdk/anthropic': 1.2.12(zod@3.25.67) + '@ai-sdk/openai': 1.3.22(zod@3.25.67) + '@modelcontextprotocol/sdk': 1.13.1 + ai: 4.3.16(react@19.1.0)(zod@3.25.67) + chalk: 5.4.1 + commander: 11.1.0 + execa: 8.0.1 + node-oauth2-server: 2.4.0 + oauth2-server: 3.1.1 + zod: 3.25.67 + transitivePeerDependencies: + - react + - supports-color + + media-typer@0.3.0: {} + media-typer@1.1.0: {} merge-descriptors@2.0.0: {} + merge-stream@2.0.0: {} + merge2@1.4.1: {} micromatch@4.0.8: @@ -3644,6 +3801,8 @@ snapshots: dependencies: mime-db: 1.54.0 + mimic-fn@4.0.0: {} + mimic-function@5.0.1: {} minimatch@3.1.2: @@ -3672,6 +3831,23 @@ snapshots: dependencies: whatwg-url: 5.0.0 + node-oauth2-server@2.4.0: + dependencies: + basic-auth: 0.0.1 + + npm-run-path@5.3.0: + dependencies: + path-key: 4.0.0 + + oauth2-server@3.1.1: + dependencies: + basic-auth: 2.0.1 + bluebird: 3.7.2 + lodash: 4.17.19 + promisify-any: 2.0.1 + statuses: 1.5.0 + type-is: 1.6.18 + object-assign@4.1.1: {} object-inspect@1.13.4: {} @@ -3727,6 +3903,10 @@ snapshots: dependencies: wrappy: 1.0.2 + onetime@6.0.0: + dependencies: + mimic-fn: 4.0.0 + onetime@7.0.0: dependencies: mimic-function: 5.0.1 @@ -3784,6 +3964,8 @@ snapshots: path-key@3.1.1: {} + path-key@4.0.0: {} + path-parse@1.0.7: {} path-to-regexp@8.2.0: {} @@ -3848,6 +4030,12 @@ snapshots: process-warning@5.0.0: {} + promisify-any@2.0.1: + dependencies: + bluebird: 2.11.0 + co-bluebird: 1.1.0 + is-generator: 1.0.3 + prop-types@15.8.1: dependencies: loose-envify: 1.4.0 @@ -3960,6 +4148,8 @@ snapshots: has-symbols: 1.1.0 isarray: 2.0.5 + safe-buffer@5.1.2: {} + safe-buffer@5.2.1: {} safe-push-apply@1.0.0: @@ -4097,6 +4287,8 @@ snapshots: split2@4.2.0: {} + statuses@1.5.0: {} + statuses@2.0.1: {} statuses@2.0.2: {} @@ -4162,6 +4354,8 @@ snapshots: dependencies: ansi-regex: 6.1.0 + strip-final-newline@3.0.0: {} + strip-json-comments@3.1.1: {} supports-color@7.2.0: @@ -4215,6 +4409,11 @@ snapshots: dependencies: prelude-ls: 1.2.1 + type-is@1.6.18: + dependencies: + media-typer: 0.3.0 + mime-types: 2.1.35 + type-is@2.0.1: dependencies: content-type: 1.0.5 diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index 053bcf6..33dd6e3 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -1,5 +1,2 @@ packages: - - . - -overrides: - mcpvals: link:../../mcpvals + - . \ No newline at end of file