Skip to content

Commit 6ee1759

Browse files
jirispilkaclaude
andauthored
fix: Log MCP client errors and disconnects as softFail (#679)
* fix: Implement soft-fail logging for MCP client errors * fix: sanitize errMessage in softFail paths of logHttpError Both softFail branches passed the raw error.message to the errMessage key. Per the Mezmo promotion rule, any message containing " error:" (e.g. SDK ParseError: "Parse error: ...") gets re-promoted to error level, defeating the purpose of softFail. Apply .replace(/ error:/gi, ' failure:') once at the top and reuse for both the HTTP < 500 path and the new MCP soft-code path. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent da68c7c commit 6ee1759

2 files changed

Lines changed: 90 additions & 22 deletions

File tree

src/mcp/server.ts

Lines changed: 35 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -406,10 +406,14 @@ export class ActorsMcpServer {
406406

407407
private setupErrorHandling(setupSIGINTHandler = true): void {
408408
this.server.onerror = (error) => {
409-
if (error.message?.includes('No connection established')) {
409+
// Client-disconnect noise from the MCP SDK (protocol.js). Two messages we see in prod:
410+
// - "No connection established" (sendRequest before transport attached)
411+
// - "Failed to send response: Error: Not connected" (client vanished mid-flight)
412+
// Both are expected; log as softFail so they don't flood Mezmo error alerts.
413+
if (/Not connected|No connection established/i.test(error.message ?? '')) {
410414
// Mezmo (logDNA) promotes log entries to errors when the message contains "error".
411415
// Use errMessage key and sanitize the string to preserve the soft-fail log level.
412-
const errMessage = error.message.replace(/ error:/gi, ' failure:');
416+
const errMessage = (error.message ?? '').replace(/ error:/gi, ' failure:');
413417
log.softFail('MCP client disconnected before response could be sent', { errMessage });
414418
} else {
415419
log.error('[MCP Error]', { error });
@@ -1286,16 +1290,35 @@ export class ActorsMcpServer {
12861290
failure_detail: failureDetail,
12871291
...buildActorFields(actorName, actorId),
12881292
};
1289-
log.error('Error executing tool for task', {
1290-
taskId,
1291-
toolName: tool.name,
1292-
toolStatus,
1293-
mcpSessionId,
1294-
failureCategory: callDiagnostics.failure_category,
1295-
failureHttpStatus: callDiagnostics.failure_http_status,
1296-
actorName: callDiagnostics.actor_name,
1297-
error,
1298-
});
1293+
// Log level follows the already-classified toolStatus:
1294+
// SOFT_FAIL (e.g. 402/403 user quota, client-side issues) → softFail
1295+
// FAILED/ABORTED/other → error
1296+
if (toolStatus === TOOL_STATUS.SOFT_FAIL) {
1297+
// Mezmo promotes on "error" in message/keys — use errMessage key, sanitized.
1298+
const errMessage = (error instanceof Error ? error.message : String(error))
1299+
.replace(/ error:/gi, ' failure:');
1300+
log.softFail('Tool execution soft-failed for task', {
1301+
taskId,
1302+
toolName: tool.name,
1303+
toolStatus,
1304+
mcpSessionId,
1305+
failureCategory: callDiagnostics.failure_category,
1306+
failureHttpStatus: callDiagnostics.failure_http_status,
1307+
actorName: callDiagnostics.actor_name,
1308+
errMessage,
1309+
});
1310+
} else {
1311+
log.error('Error executing tool for task', {
1312+
taskId,
1313+
toolName: tool.name,
1314+
toolStatus,
1315+
mcpSessionId,
1316+
failureCategory: callDiagnostics.failure_category,
1317+
failureHttpStatus: callDiagnostics.failure_http_status,
1318+
actorName: callDiagnostics.actor_name,
1319+
error,
1320+
});
1321+
}
12991322
const userText = getToolCallErrorUserText(tool.name, error);
13001323

13011324
// Check if task was cancelled before storing result

src/utils/logging.ts

Lines changed: 55 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import { ErrorCode } from '@modelcontextprotocol/sdk/types.js';
2+
13
import log from '@apify/log';
24

35
/**
@@ -29,28 +31,71 @@ export function getHttpStatusCode(error: unknown): number | undefined {
2931
}
3032

3133
/**
32-
* Logs HTTP errors based on status code, following apify-core pattern.
33-
* Uses `softFail` for status < 500 (API client errors) and `exception` for status >= 500 (API server errors).
34+
* Client/caller faults and transient transport conditions that shouldn't trigger error alerts.
35+
* Anything else in the JSON-RPC reserved range (-32768..-32000) is treated as a server fault.
36+
*/
37+
const SOFT_MCP_ERROR_CODES: ReadonlySet<number> = new Set([
38+
ErrorCode.ParseError,
39+
ErrorCode.InvalidRequest,
40+
ErrorCode.MethodNotFound,
41+
ErrorCode.InvalidParams,
42+
ErrorCode.ConnectionClosed,
43+
ErrorCode.RequestTimeout,
44+
]);
45+
46+
/**
47+
* Extract a JSON-RPC error code from an `McpError`-shaped object.
48+
* Returns `undefined` if the `code` field is absent or outside the JSON-RPC reserved range.
49+
*/
50+
function getMcpErrorCode(error: unknown): number | undefined {
51+
if (typeof error !== 'object' || error === null || !('code' in error)) return undefined;
52+
const { code } = error as { code?: unknown };
53+
if (typeof code === 'number' && code >= -32768 && code <= -32000) return code;
54+
return undefined;
55+
}
56+
57+
/**
58+
* Logs HTTP or MCP errors at the appropriate level:
59+
* - Client errors (HTTP < 500, or JSON-RPC client/transient codes) → softFail (no stack).
60+
* - Server errors (HTTP >= 500, or JSON-RPC server codes) → exception (with stack).
61+
* - Anything unclassifiable → error.
3462
*
3563
* @param error - The error object
3664
* @param message - The log message
3765
* @param data - Additional data to include in the log
3866
*/
3967
export function logHttpError<T extends object>(error: unknown, message: string, data?: T): void {
4068
const statusCode = getHttpStatusCode(error);
41-
const errorMessage = error instanceof Error ? error.message : String(error);
69+
const rawErrorMessage = error instanceof Error ? error.message : String(error);
70+
// Mezmo (logDNA) promotes log entries to error level when message/keys contain "error".
71+
// Sanitize for softFail paths (see CONTRIBUTING.md § Logging → Mezmo promotion rule).
72+
const softErrMessage = rawErrorMessage.replace(/ error:/gi, ' failure:');
4273

4374
if (statusCode !== undefined && statusCode < 500) {
44-
// Client errors (< 500) - log as softFail without stack trace
45-
log.softFail(message, { errMessage: errorMessage, statusCode, ...data });
46-
} else if (statusCode !== undefined && statusCode >= 500) {
47-
// Server errors (>= 500) - log as exception with full error (includes stack trace)
75+
// HTTP client errors (< 500) - softFail without stack trace
76+
log.softFail(message, { errMessage: softErrMessage, statusCode, ...data });
77+
return;
78+
}
79+
if (statusCode !== undefined && statusCode >= 500) {
80+
// HTTP server errors (>= 500) - exception with full error (includes stack trace)
4881
const errorObj = error instanceof Error ? error : new Error(String(error));
4982
log.exception(errorObj, message, { statusCode, ...data });
50-
} else {
51-
// No status code available - log as error
52-
log.error(message, { error, ...data });
83+
return;
5384
}
85+
86+
const mcpErrorCode = getMcpErrorCode(error);
87+
if (mcpErrorCode !== undefined) {
88+
if (SOFT_MCP_ERROR_CODES.has(mcpErrorCode)) {
89+
log.softFail(message, { errMessage: softErrMessage, mcpErrorCode, ...data });
90+
} else {
91+
const errorObj = error instanceof Error ? error : new Error(String(error));
92+
log.exception(errorObj, message, { mcpErrorCode, ...data });
93+
}
94+
return;
95+
}
96+
97+
// No status code available - log as error
98+
log.error(message, { error, ...data });
5499
}
55100

56101
const SKYFIRE_PAY_ID_KEY = 'skyfire-pay-id';

0 commit comments

Comments
 (0)