salesforcecli · mdonnalley · Jun 10, 2025 · Jun 10, 2025 · Jun 10, 2025 · Jun 10, 2025
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -7,6 +7,23 @@ on:
 jobs:
   yarn-lockfile-check:
     uses: salesforcecli/github-workflows/.github/workflows/lockFileCheck.yml@main
+
+  # Detect which files have changed to determine what tests to run
+  changes:
+    runs-on: ubuntu-latest
+    outputs:
+      confidence-changed: ${{ steps.changes.outputs.confidence }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: dorny/paths-filter@v2
+        id: changes
+        with:
+          filters: |
+            confidence:
+              - 'confidence/**'
+              - 'test/confidence/**'
+              - 'src/tools/**'
+
   # Since the Windows unit tests take much longer, we run the linux unit tests first and then run the windows unit tests in parallel with NUTs
   linux-unit-tests:
     needs: yarn-lockfile-check
@@ -15,6 +32,31 @@ jobs:
     needs: linux-unit-tests
     uses: salesforcecli/github-workflows/.github/workflows/unitTestsWindows.yml@main
 
+  # Run the confidence tests after the unit tests
+  confidence-tests:
+    needs: [linux-unit-tests, changes]
+    runs-on: ubuntu-latest
+    if: ${{ needs.changes.outputs.confidence-changed == 'true'}}
+    env:
+      SF_MCP_CONFIDENCE_CONSUMER_KEY: ${{ secrets.SF_MCP_CONFIDENCE_CONSUMER_KEY }}
+      SF_MCP_CONFIDENCE_CONSUMER_SECRET: ${{ secrets.SF_MCP_CONFIDENCE_CONSUMER_SECRET }}
+      SF_MCP_CONFIDENCE_INSTANCE_URL: ${{ secrets.SF_MCP_CONFIDENCE_INSTANCE_URL }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-node@v4
+        with:
+          node-version: lts/*
+          cache: yarn
+      - run: yarn install --frozen-lockfile
+      # Note: we cannot parallelize confidence tests since we don't have the rate limits to support it
+      # the test runner has rate limiting built-in to prevent hitting the API limits within that test run
+      - name: Run confidence tests
+        run: |
+          for file in test/confidence/*.yml; do
+            echo "Running confidence test for $file"
+            yarn test:confidence --file "$file"
+          done
+
   # Uncomment to enable NUT testing in Github Actions
   # nuts:
   #   needs: linux-unit-tests

diff --git a/DEVELOPING.md b/DEVELOPING.md
@@ -124,9 +124,59 @@ mcp-inspector --cli node bin/run.js --orgs DEFAULT_TARGET_ORG --method tools/lis
 
 Unit tests are run with `yarn test` and use the Mocha test framework. Tests are located in the `test` directory and are named with the pattern, `test/**/*.test.ts`.
 
+### Confidence Tests
+
+Confidence tests validate that the MCP server tools are accurately invoked by various LLM models through the Salesforce LLM Gateway API. These tests ensure that natural language prompts correctly trigger the expected tools with appropriate parameters, maintaining the quality of the AI-powered tool selection.
+
+#### Running Confidence Tests Locally
+
+1. **Set up API access**: Follow this [documentation](https://developer.salesforce.com/docs/einstein/genai/guide/access-models-api-with-rest.html) to setup an External Client App that will give you access to the Models API. Once you have the consumer key and secret from the External Client App, you'll need to add these to environment variables:
+
+   ```shell
+   export SF_MCP_CONFIDENCE_CONSUMER_KEY=your_client_id_here
+   export SF_MCP_CONFIDENCE_CONSUMER_SECRET=your_client_secret_here
+   export SF_MCP_CONFIDENCE_INSTANCE_URL=https://your_instance.salesforce.com
+   ```
+
+   These environment variables are used to generate a JWT token that will be used to authenticate with the Models API.
+
+2. **Run a confidence test**:
+   ```shell
+   yarn test:confidence --file test/confidence/sf-deploy-metadata.yml
+   ```
+
+#### Test Structure
+
+Confidence tests are defined in YAML files located in `test/confidence/`. Each test file specifies:
+
+- **Models**: Which LLM models to test against. See the Agentforce Developer Guide for [available models](https://developer.salesforce.com/docs/einstein/genai/guide/supported-models.html).
+- **Initial Context**: Background information provided to the model
+- **Test Cases**: Natural language utterances with expected tool invocations and confidence thresholds
+
+The tests run multiple iterations (default: 5) to calculate confidence levels and ensure consistent tool selection across different model runs. This can be adjusted by passing the `--runs` flag when running the tests, like this:
+
+```shell
+yarn test:confidence test/confidence/sf-deploy-metadata.yml --runs 2
+```
+
+#### Understanding Test Results
+
+Tests measure two types of confidence:
+
+- **Tool Confidence**: Whether the correct tool was invoked
+- **Parameter Confidence**: Whether the tool was called with the expected parameters
+
+Failed tests indicate that either:
+
+1. The model selected the wrong tool for a given prompt
+2. The model selected the correct tool but with incorrect parameters
+3. The confidence level fell below the specified threshold
+
+These failures help identify areas where tool descriptions or agent instructions need improvement.
+
 ## Debugging
 
-> [!NOTE]  
+> [!NOTE]
 > This section assumes you're using Visual Studio Code (VS Code).
 
 You can use the VS Code debugger with the MCP Inspector CLI to step through the code of your MCP tools:
@@ -150,7 +200,7 @@ MCP_SERVER_REQUEST_TIMEOUT=120000 mcp-inspector --cli node --inspect-brk bin/run
 We suggest you set `MCP_SERVER_REQUEST_TIMEOUT` to 120000ms (2 minutes) to allow longer debugging sessions without having the MCP Inspector client timeout.
 For other configuration values see: https://github.com/modelcontextprotocol/inspector?tab=readme-ov-file#configuration
 
-> [!IMPORTANT]  
+> [!IMPORTANT]
 > You must compile the local MCP server using `yarn compile` after every change in a TypeScript file, otherwise breakpoints in the TypeScript files might not match the running JavaScript code.
 
 ## Useful yarn Commands

diff --git a/confidence/.eslintrc.cjs b/confidence/.eslintrc.cjs
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2025, Salesforce, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+module.exports = {
+  extends: '../.eslintrc.cjs',
+  parserOptions: {
+    project: [
+      './tsconfig.json',
+      './test/tsconfig.json',
+      './confidence/tsconfig.json', // Add this line
+    ],
+  },
+  rules: {
+    'import/no-extraneous-dependencies': ['error', { devDependencies: true }],
+  },
+};
diff --git a/confidence/bin/dev.js b/confidence/bin/dev.js
@@ -0,0 +1,23 @@
+#!/usr/bin/env -S node --loader ts-node/esm --disable-warning=ExperimentalWarning
+
+import { dirname } from 'node:path';
+import { execute } from '@oclif/core';
+
+process.env.NODE_TLS_REJECT_UNAUTHORIZED = '0'; // Disable TLS verification for local testing
+await execute({
+  development: true,
+  dir: import.meta.url,
+  loadOptions: {
+    root: dirname(import.meta.dirname),
+    pjson: {
+      name: 'mcp-test',
+      version: '1.0.0',
+      oclif: {
+        bin: 'mcp-test',
+        dirname: 'mcp-test',
+        commands: './lib/commands',
+        topicSeparator: ' ',
+      },
+    },
+  },
+});
diff --git a/confidence/bin/run.js b/confidence/bin/run.js
@@ -0,0 +1,22 @@
+#!/usr/bin/env node
+
+import { dirname } from 'node:path';
+import { execute } from '@oclif/core';
+
+process.env.NODE_TLS_REJECT_UNAUTHORIZED = '0'; // Disable TLS verification for local testing
+await execute({
+  dir: import.meta.url,
+  loadOptions: {
+    root: dirname(import.meta.dirname),
+    pjson: {
+      name: 'mcp-test',
+      version: '1.0.0',
+      oclif: {
+        bin: 'mcp-test',
+        dirname: 'mcp-test',
+        commands: './lib/commands',
+        topicSeparator: ' ',
+      },
+    },
+  },
+});