fix(knowledge): cap Gemini batches at 100 items, add singular GEMINI_API_KEY fallback

waleedlatif1 · claude · waleedlatif1 · commit 99743d90dee4 · 2026-04-30T01:12:36.000-07:00
- Gemini's batchEmbedContents API rejects requests with more than 100
  items. The token-based batcher could pack hundreds of short chunks
  into a single request, causing 400s. Add maxItemsPerRequest on
  ResolvedProvider and split token batches further when set.
- Mirror resolveOpenAIKey by accepting GEMINI_API_KEY (singular) as a
  fallback before requiring the rotating GEMINI_API_KEY_1/2/3 keys.

Co-Authored-By: Claude Opus 4.7 &lt;noreply@anthropic.com&gt;
diff --git a/apps/sim/lib/core/config/env.ts b/apps/sim/lib/core/config/env.ts
@@ -96,6 +96,7 @@ export const env = createEnv({
     ANTHROPIC_API_KEY_1:                   z.string().min(1).optional(),           // Primary Anthropic Claude API key
     ANTHROPIC_API_KEY_2:                   z.string().min(1).optional(),           // Additional Anthropic API key for load balancing
     ANTHROPIC_API_KEY_3:                   z.string().min(1).optional(),           // Additional Anthropic API key for load balancing
+    GEMINI_API_KEY:                        z.string().min(1).optional(),           // Singular Gemini API key (used as fallback when rotation keys are unset)
     GEMINI_API_KEY_1:                      z.string().min(1).optional(),           // Primary Gemini API key
     GEMINI_API_KEY_2:                      z.string().min(1).optional(),           // Additional Gemini API key for load balancing
     GEMINI_API_KEY_3:                      z.string().min(1).optional(),           // Additional Gemini API key for load balancing
diff --git a/apps/sim/lib/knowledge/embeddings.ts b/apps/sim/lib/knowledge/embeddings.ts
@@ -51,9 +51,14 @@ interface ResolvedProvider {
   isBYOK: boolean
   /** Tokenizer used to estimate tokens when the API does not return a usage field. */
   tokenizerProvider: TokenizerProviderId
+  /** Hard per-request item cap enforced by the provider (e.g. Gemini caps at 100). */
+  maxItemsPerRequest?: number
   buildRequest: (inputs: string[], inputType: EmbeddingInputType) => ProviderRequest
 }
 
+/** Gemini's `batchEmbedContents` rejects requests with more than 100 items. */
+const GEMINI_MAX_ITEMS_PER_REQUEST = 100
+
 async function resolveOpenAIKey(workspaceId?: string | null): Promise<{
   apiKey: string
   isBYOK: boolean
@@ -86,11 +91,14 @@ async function resolveGeminiKey(workspaceId?: string | null): Promise<{
       return { apiKey: byokResult.apiKey, isBYOK: true }
     }
   }
+  if (env.GEMINI_API_KEY) {
+    return { apiKey: env.GEMINI_API_KEY, isBYOK: false }
+  }
   try {
     return { apiKey: getRotatingApiKey('gemini'), isBYOK: false }
   } catch {
     throw new Error(
-      'GEMINI_API_KEY_1, GEMINI_API_KEY_2, or GEMINI_API_KEY_3 must be configured for Gemini embeddings'
+      'GEMINI_API_KEY (or GEMINI_API_KEY_1/2/3 for rotation) must be configured for Gemini embeddings'
     )
   }
 }
@@ -243,6 +251,7 @@ async function resolveProvider(
       pricingId: info.pricingId,
       isBYOK,
       tokenizerProvider: info.tokenizerProvider,
+      maxItemsPerRequest: GEMINI_MAX_ITEMS_PER_REQUEST,
       buildRequest: buildGeminiProvider(embeddingModel, apiKey),
     }
   }
@@ -304,6 +313,15 @@ async function callEmbeddingAPI(
   )
 }
 
+function splitByItemLimit<T>(items: T[], limit: number): T[][] {
+  if (items.length <= limit) return [items]
+  const result: T[][] = []
+  for (let i = 0; i < items.length; i += limit) {
+    result.push(items.slice(i, i + limit))
+  }
+  return result
+}
+
 async function processWithConcurrency<T, R>(
   items: T[],
   concurrency: number,
@@ -342,7 +360,10 @@ export async function generateEmbeddings(
 ): Promise<GenerateEmbeddingsResult> {
   const provider = await resolveProvider(embeddingModel, workspaceId)
 
-  const batches = batchByTokenLimit(texts, MAX_TOKENS_PER_REQUEST, embeddingModel)
+  const tokenBatches = batchByTokenLimit(texts, MAX_TOKENS_PER_REQUEST, embeddingModel)
+  const batches = provider.maxItemsPerRequest
+    ? tokenBatches.flatMap((batch) => splitByItemLimit(batch, provider.maxItemsPerRequest!))
+    : tokenBatches
 
   const batchResults = await processWithConcurrency(
     batches,