Skip to content

Commit 99743d9

Browse files
waleedlatif1claude
andcommitted
fix(knowledge): cap Gemini batches at 100 items, add singular GEMINI_API_KEY fallback
- Gemini's batchEmbedContents API rejects requests with more than 100 items. The token-based batcher could pack hundreds of short chunks into a single request, causing 400s. Add maxItemsPerRequest on ResolvedProvider and split token batches further when set. - Mirror resolveOpenAIKey by accepting GEMINI_API_KEY (singular) as a fallback before requiring the rotating GEMINI_API_KEY_1/2/3 keys. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1 parent 64a17db commit 99743d9

2 files changed

Lines changed: 24 additions & 2 deletions

File tree

apps/sim/lib/core/config/env.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ export const env = createEnv({
9696
ANTHROPIC_API_KEY_1: z.string().min(1).optional(), // Primary Anthropic Claude API key
9797
ANTHROPIC_API_KEY_2: z.string().min(1).optional(), // Additional Anthropic API key for load balancing
9898
ANTHROPIC_API_KEY_3: z.string().min(1).optional(), // Additional Anthropic API key for load balancing
99+
GEMINI_API_KEY: z.string().min(1).optional(), // Singular Gemini API key (used as fallback when rotation keys are unset)
99100
GEMINI_API_KEY_1: z.string().min(1).optional(), // Primary Gemini API key
100101
GEMINI_API_KEY_2: z.string().min(1).optional(), // Additional Gemini API key for load balancing
101102
GEMINI_API_KEY_3: z.string().min(1).optional(), // Additional Gemini API key for load balancing

apps/sim/lib/knowledge/embeddings.ts

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,14 @@ interface ResolvedProvider {
5151
isBYOK: boolean
5252
/** Tokenizer used to estimate tokens when the API does not return a usage field. */
5353
tokenizerProvider: TokenizerProviderId
54+
/** Hard per-request item cap enforced by the provider (e.g. Gemini caps at 100). */
55+
maxItemsPerRequest?: number
5456
buildRequest: (inputs: string[], inputType: EmbeddingInputType) => ProviderRequest
5557
}
5658

59+
/** Gemini's `batchEmbedContents` rejects requests with more than 100 items. */
60+
const GEMINI_MAX_ITEMS_PER_REQUEST = 100
61+
5762
async function resolveOpenAIKey(workspaceId?: string | null): Promise<{
5863
apiKey: string
5964
isBYOK: boolean
@@ -86,11 +91,14 @@ async function resolveGeminiKey(workspaceId?: string | null): Promise<{
8691
return { apiKey: byokResult.apiKey, isBYOK: true }
8792
}
8893
}
94+
if (env.GEMINI_API_KEY) {
95+
return { apiKey: env.GEMINI_API_KEY, isBYOK: false }
96+
}
8997
try {
9098
return { apiKey: getRotatingApiKey('gemini'), isBYOK: false }
9199
} catch {
92100
throw new Error(
93-
'GEMINI_API_KEY_1, GEMINI_API_KEY_2, or GEMINI_API_KEY_3 must be configured for Gemini embeddings'
101+
'GEMINI_API_KEY (or GEMINI_API_KEY_1/2/3 for rotation) must be configured for Gemini embeddings'
94102
)
95103
}
96104
}
@@ -243,6 +251,7 @@ async function resolveProvider(
243251
pricingId: info.pricingId,
244252
isBYOK,
245253
tokenizerProvider: info.tokenizerProvider,
254+
maxItemsPerRequest: GEMINI_MAX_ITEMS_PER_REQUEST,
246255
buildRequest: buildGeminiProvider(embeddingModel, apiKey),
247256
}
248257
}
@@ -304,6 +313,15 @@ async function callEmbeddingAPI(
304313
)
305314
}
306315

316+
function splitByItemLimit<T>(items: T[], limit: number): T[][] {
317+
if (items.length <= limit) return [items]
318+
const result: T[][] = []
319+
for (let i = 0; i < items.length; i += limit) {
320+
result.push(items.slice(i, i + limit))
321+
}
322+
return result
323+
}
324+
307325
async function processWithConcurrency<T, R>(
308326
items: T[],
309327
concurrency: number,
@@ -342,7 +360,10 @@ export async function generateEmbeddings(
342360
): Promise<GenerateEmbeddingsResult> {
343361
const provider = await resolveProvider(embeddingModel, workspaceId)
344362

345-
const batches = batchByTokenLimit(texts, MAX_TOKENS_PER_REQUEST, embeddingModel)
363+
const tokenBatches = batchByTokenLimit(texts, MAX_TOKENS_PER_REQUEST, embeddingModel)
364+
const batches = provider.maxItemsPerRequest
365+
? tokenBatches.flatMap((batch) => splitByItemLimit(batch, provider.maxItemsPerRequest!))
366+
: tokenBatches
346367

347368
const batchResults = await processWithConcurrency(
348369
batches,

0 commit comments

Comments
 (0)