simstudioai · waleedlatif1 · Apr 30, 2026 · Apr 30, 2026 · Apr 30, 2026 · Apr 30, 2026
diff --git a/apps/docs/content/docs/en/tools/firecrawl.mdx b/apps/docs/content/docs/en/tools/firecrawl.mdx
@@ -254,6 +254,8 @@ Parse uploaded documents (PDF, DOCX, HTML, etc.) into clean markdown using Firec
 | `proxy` | string | No | Proxy mode: "basic" or "auto" |
 | `zeroDataRetention` | boolean | No | Enable zero data retention. Defaults to false. |
 | `apiKey` | string | Yes | Firecrawl API key |
+| `pricing` | custom | No | No description |
+| `metadata` | string | No | No description |
 | `rateLimit` | string | No | No description |
 
 #### Output

diff --git a/apps/docs/content/docs/en/tools/knowledge.mdx b/apps/docs/content/docs/en/tools/knowledge.mdx
@@ -47,6 +47,8 @@ Search for similar content in a knowledge base using vector similarity
 | `properties` | string | No | No description |
 | `tagName` | string | No | No description |
 | `tagValue` | string | No | No description |
+| `rerankerEnabled` | boolean | No | Whether to apply Cohere reranking to vector search results |
+| `rerankerModel` | string | No | Cohere rerank model to use \(one of: rerank-v4.0-pro, rerank-v4.0-fast, rerank-v3.5\) |
 | `tagFilters` | string | No | No description |
 
 #### Output

diff --git a/apps/sim/app/api/knowledge/[id]/documents/[documentId]/chunks/route.ts b/apps/sim/app/api/knowledge/[id]/documents/[documentId]/chunks/route.ts
@@ -213,9 +213,11 @@ export const POST = withRouteHandler(
           accessCheck.knowledgeBase?.workspaceId
         )
 
+        const chunkEmbeddingModel =
+          accessCheck.knowledgeBase?.embeddingModel ?? 'text-embedding-3-small'
         let cost = null
         try {
-          cost = calculateCost('text-embedding-3-small', newChunk.tokenCount, 0, false)
+          cost = calculateCost(chunkEmbeddingModel, newChunk.tokenCount, 0, false)
         } catch (error) {
           logger.warn(`[${requestId}] Failed to calculate cost for chunk upload`, {
             error: error instanceof Error ? error.message : 'Unknown error',
@@ -240,7 +242,7 @@ export const POST = withRouteHandler(
                       completion: 0,
                       total: newChunk.tokenCount,
                     },
-                    model: 'text-embedding-3-small',
+                    model: chunkEmbeddingModel,
                     pricing: cost.pricing,
                   },
                 }

diff --git a/apps/sim/app/api/knowledge/route.ts b/apps/sim/app/api/knowledge/route.ts
@@ -6,6 +6,11 @@ import { getSession } from '@/lib/auth'
 import { PlatformEvents } from '@/lib/core/telemetry'
 import { generateRequestId } from '@/lib/core/utils/request'
 import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
+import {
+  DEFAULT_EMBEDDING_MODEL,
+  EMBEDDING_DIMENSIONS,
+  SUPPORTED_EMBEDDING_MODEL_IDS,
+} from '@/lib/knowledge/embeddings'
 import {
   createKnowledgeBase,
   getKnowledgeBases,
@@ -20,8 +25,10 @@ const CreateKnowledgeBaseSchema = z.object({
   name: z.string().min(1, 'Name is required'),
   description: z.string().optional(),
   workspaceId: z.string().min(1, 'Workspace ID is required'),
-  embeddingModel: z.literal('text-embedding-3-small').default('text-embedding-3-small'),
-  embeddingDimension: z.literal(1536).default(1536),
+  embeddingModel: z
+    .enum(SUPPORTED_EMBEDDING_MODEL_IDS as [string, ...string[]])
+    .default(DEFAULT_EMBEDDING_MODEL),
+  embeddingDimension: z.literal(EMBEDDING_DIMENSIONS).default(EMBEDDING_DIMENSIONS),
   chunkingConfig: z
     .object({
       maxSize: z.number().min(100).max(4000).default(1024),

diff --git a/apps/sim/app/api/knowledge/search/route.test.ts b/apps/sim/app/api/knowledge/search/route.test.ts
@@ -432,6 +432,7 @@ describe('Knowledge Search API Route', () => {
           userId: 'user-123',
           name: 'Test KB',
           deletedAt: null,
+          embeddingModel: 'text-embedding-3-small',
         },
       })
 
@@ -524,6 +525,7 @@ describe('Knowledge Search API Route', () => {
             userId: 'user-123',
             name: 'Test KB',
             deletedAt: null,
+            embeddingModel: 'text-embedding-3-small',
           },
         })
 
@@ -571,6 +573,7 @@ describe('Knowledge Search API Route', () => {
             userId: 'user-123',
             name: 'Test KB',
             deletedAt: null,
+            embeddingModel: 'text-embedding-3-small',
           },
         })
 
@@ -625,6 +628,7 @@ describe('Knowledge Search API Route', () => {
             userId: 'user-123',
             name: 'Test KB',
             deletedAt: null,
+            embeddingModel: 'text-embedding-3-small',
           },
         })
 
@@ -694,6 +698,7 @@ describe('Knowledge Search API Route', () => {
           userId: 'user-123',
           name: 'Test KB',
           deletedAt: null,
+          embeddingModel: 'text-embedding-3-small',
         },
       })
 
@@ -739,6 +744,7 @@ describe('Knowledge Search API Route', () => {
           userId: 'user-123',
           name: 'Test KB',
           deletedAt: null,
+          embeddingModel: 'text-embedding-3-small',
         },
       })
 
@@ -877,6 +883,7 @@ describe('Knowledge Search API Route', () => {
           userId: 'user-123',
           name: 'Test KB',
           deletedAt: null,
+          embeddingModel: 'text-embedding-3-small',
         },
       })
 
@@ -921,11 +928,17 @@ describe('Knowledge Search API Route', () => {
             userId: 'user-123',
             name: 'Test KB',
             deletedAt: null,
+            embeddingModel: 'text-embedding-3-small',
           },
         })
         .mockResolvedValueOnce({
           hasAccess: true,
-          knowledgeBase: { id: 'kb-456', userId: 'user-123', name: 'Test KB 2' },
+          knowledgeBase: {
+            id: 'kb-456',
+            userId: 'user-123',
+            name: 'Test KB 2',
+            embeddingModel: 'text-embedding-3-small',
+          },
         })
 
       mockGetDocumentTagDefinitions.mockResolvedValue(mockTagDefinitions)

diff --git a/apps/sim/app/api/knowledge/search/route.ts b/apps/sim/app/api/knowledge/search/route.ts
@@ -7,6 +7,8 @@ import { PlatformEvents } from '@/lib/core/telemetry'
 import { generateRequestId } from '@/lib/core/utils/request'
 import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
 import { ALL_TAG_SLOTS } from '@/lib/knowledge/constants'
+import { getEmbeddingModelInfo } from '@/lib/knowledge/embedding-models'
+import { DEFAULT_RERANKER_MODEL, rerank, SUPPORTED_RERANKER_MODELS } from '@/lib/knowledge/reranker'
 import { getDocumentTagDefinitions } from '@/lib/knowledge/tags/service'
 import { buildUndefinedTagsError, validateTagValue } from '@/lib/knowledge/tags/utils'
 import type { StructuredFilter } from '@/lib/knowledge/types'
@@ -20,7 +22,8 @@ import {
   handleVectorOnlySearch,
   type SearchResult,
 } from '@/app/api/knowledge/search/utils'
-import { checkKnowledgeBaseAccess } from '@/app/api/knowledge/utils'
+import { checkKnowledgeBaseAccess, type KnowledgeBaseAccessResult } from '@/app/api/knowledge/utils'
+import { getRerankModelPricing } from '@/providers/models'
 import { calculateCost } from '@/providers/utils'
 
 const logger = createLogger('VectorSearchAPI')
@@ -59,6 +62,11 @@ const VectorSearchSchema = z
       .optional()
       .nullable()
       .transform((val) => val || undefined),
+    rerankerEnabled: z.boolean().optional().default(false),
+    rerankerModel: z
+      .enum(SUPPORTED_RERANKER_MODELS as unknown as [string, ...string[]])
+      .optional()
+      .default(DEFAULT_RERANKER_MODEL),
   })
   .refine(
     (data) => {
@@ -235,11 +243,29 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
         )
       }
 
-      const workspaceId = accessChecks.find((ac) => ac?.hasAccess)?.knowledgeBase?.workspaceId
+      const accessibleKbs = accessChecks
+        .filter((ac): ac is KnowledgeBaseAccessResult => Boolean(ac?.hasAccess))
+        .map((ac) => ac.knowledgeBase)
+      const workspaceId = accessibleKbs[0]?.workspaceId
+
+      const useReranker = validatedData.rerankerEnabled && Boolean(validatedData.query?.trim())
+      const rerankerModel = useReranker ? validatedData.rerankerModel : null
+
+      const embeddingModels = Array.from(new Set(accessibleKbs.map((kb) => kb.embeddingModel)))
+      if (embeddingModels.length > 1) {
+        return NextResponse.json(
+          {
+            error:
+              'Selected knowledge bases use different embedding models and cannot be searched together. Search them separately.',
+          },
+          { status: 400 }
+        )
+      }
+      const queryEmbeddingModel = embeddingModels[0]
 
       const hasQuery = validatedData.query && validatedData.query.trim().length > 0
       const queryEmbeddingPromise = hasQuery
-        ? generateSearchEmbedding(validatedData.query!, undefined, workspaceId)
+        ? generateSearchEmbedding(validatedData.query!, queryEmbeddingModel, workspaceId)
         : Promise.resolve(null)
 
       // Check if any requested knowledge bases were not accessible
@@ -278,6 +304,10 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
 
       const hasFilters = structuredFilters && structuredFilters.length > 0
 
+      // Oversample candidates when reranking so the reranker has more to choose from.
+      // Cap at 100 to bound Cohere request cost (1 search unit = ≤100 docs).
+      const candidateTopK = useReranker ? Math.min(100, validatedData.topK * 4) : validatedData.topK
+
       if (!hasQuery && hasFilters) {
         // Tag-only search without vector similarity
         results = await handleTagOnlySearch({
@@ -291,24 +321,24 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
           `[${requestId}] Executing tag + vector search with filters:`,
           structuredFilters
         )
-        const strategy = getQueryStrategy(accessibleKbIds.length, validatedData.topK)
+        const strategy = getQueryStrategy(accessibleKbIds.length, candidateTopK)
         const queryVector = JSON.stringify(await queryEmbeddingPromise)
 
         results = await handleTagAndVectorSearch({
           knowledgeBaseIds: accessibleKbIds,
-          topK: validatedData.topK,
+          topK: candidateTopK,
           structuredFilters,
           queryVector,
           distanceThreshold: strategy.distanceThreshold,
         })
       } else if (hasQuery && !hasFilters) {
         // Vector-only search
-        const strategy = getQueryStrategy(accessibleKbIds.length, validatedData.topK)
+        const strategy = getQueryStrategy(accessibleKbIds.length, candidateTopK)
         const queryVector = JSON.stringify(await queryEmbeddingPromise)
 
         results = await handleVectorOnlySearch({
           knowledgeBaseIds: accessibleKbIds,
-          topK: validatedData.topK,
+          topK: candidateTopK,
           queryVector,
           distanceThreshold: strategy.distanceThreshold,
         })
@@ -323,13 +353,61 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
         )
       }
 
+      // Optional Cohere rerank pass on top of vector results.
+      const rerankedScores = new Map<string, number>()
+      // `rerankBilled` = Cohere was successfully called (even with 0 results) and we owe the search unit.
+      let rerankBilled = false
+      let rerankIsBYOK = false
+      if (useReranker && rerankerModel && results.length > 0) {
+        const candidateCount = results.length
+        try {
+          const { results: ranked, isBYOK } = await rerank(
+            validatedData.query!,
+            results.map((r) => ({ id: r.id, text: r.content })),
+            { model: rerankerModel, topN: validatedData.topK, workspaceId }
+          )
+          rerankBilled = true
+          rerankIsBYOK = isBYOK
+          if (ranked.length === 0) {
+            logger.warn(
+              `[${requestId}] Reranker returned 0 results; falling back to vector ordering`,
+              { model: rerankerModel, candidateCount }
+            )
+            results = results.slice(0, validatedData.topK)
+          } else {
+            const idToResult = new Map(results.map((r) => [r.id, r]))
+            results = ranked
+              .map((r) => idToResult.get(r.item.id))
+              .filter((r): r is SearchResult => Boolean(r))
+            for (const r of ranked) rerankedScores.set(r.item.id, r.relevanceScore)
+            logger.info(`[${requestId}] Reranked ${candidateCount} → ${results.length} results`, {
+              model: rerankerModel,
+            })
+          }
+        } catch (error) {
+          logger.warn(`[${requestId}] Reranker failed; falling back to vector ordering`, {
+            error: error instanceof Error ? error.message : 'Unknown error',
+            model: rerankerModel,
+            candidateCount,
+            workspaceId,
+          })
+          results = results.slice(0, validatedData.topK)
+        }
+      } else if (useReranker) {
+        results = results.slice(0, validatedData.topK)
+      }
+
       // Calculate cost for the embedding (with fallback if calculation fails)
       let cost = null
       let tokenCount = null
       if (hasQuery) {
         try {
-          tokenCount = estimateTokenCount(validatedData.query!, 'openai')
-          cost = calculateCost('text-embedding-3-small', tokenCount.count, 0, false)
+          // Use the tokenizer matching the actual embedding provider so token counts
+          // (and the input cost derived from them) reflect how the provider tokenizes.
+          const tokenizerProvider =
+            getEmbeddingModelInfo(queryEmbeddingModel).provider === 'gemini' ? 'google' : 'openai'
+          tokenCount = estimateTokenCount(validatedData.query!, tokenizerProvider)
+          cost = calculateCost(queryEmbeddingModel, tokenCount.count, 0, false)
         } catch (error) {
           logger.warn(`[${requestId}] Failed to calculate cost for search query`, {
             error: error instanceof Error ? error.message : 'Unknown error',
@@ -338,6 +416,32 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
         }
       }
 
+      // Add Cohere rerank cost (1 search unit per successful call, since we cap candidates ≤100).
+      // Bill on every successful API response — Cohere charges even when 0 results are returned.
+      let rerankerCost = 0
+      if (rerankBilled && rerankerModel && !rerankIsBYOK) {
+        const pricing = getRerankModelPricing(rerankerModel)
+        if (pricing) {
+          rerankerCost = pricing.perSearchUnit
+          if (cost) {
+            cost = {
+              ...cost,
+              input: cost.input + rerankerCost,
+              total: cost.total + rerankerCost,
+            }
+          } else {
+            cost = {
+              input: rerankerCost,
+              output: 0,
+              total: rerankerCost,
+              pricing: { input: 0, output: 0, updatedAt: pricing.updatedAt },
+            }
+          }
+        } else {
+          logger.warn(`[${requestId}] No pricing entry for rerank model ${rerankerModel}`)
+        }
+      }
+
       // Fetch tag definitions for display name mapping (reuse the same fetch from filtering)
       const tagDefsResults = await Promise.all(
         accessibleKbIds.map(async (kbId) => {
@@ -400,33 +504,38 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
               }
             })
 
+            const rerankerScore = rerankedScores.get(result.id)
             return {
               documentId: result.documentId,
               documentName: documentNameMap[result.documentId] || undefined,
               content: result.content,
               chunkIndex: result.chunkIndex,
               metadata: tags, // Clean display name mapped tags
               similarity: hasQuery ? 1 - result.distance : 1, // Perfect similarity for tag-only searches
+              ...(rerankerScore !== undefined && { rerankerScore }),
             }
           }),
           query: validatedData.query || '',
           knowledgeBaseIds: accessibleKbIds,
           knowledgeBaseId: accessibleKbIds[0],
           topK: validatedData.topK,
           totalResults: results.length,
-          ...(cost && tokenCount
+          ...(cost
             ? {
                 cost: {
                   input: cost.input,
                   output: cost.output,
                   total: cost.total,
                   tokens: {
-                    prompt: tokenCount.count,
+                    prompt: tokenCount?.count ?? 0,
                     completion: 0,
-                    total: tokenCount.count,
+                    total: tokenCount?.count ?? 0,
                   },
-                  model: 'text-embedding-3-small',
+                  model: queryEmbeddingModel,
                   pricing: cost.pricing,
+                  ...(rerankBilled && !rerankIsBYOK
+                    ? { rerankerCost, rerankerModel, rerankerSearchUnits: 1 }
+                    : {}),
                 },
               }
             : {}),

diff --git a/apps/sim/app/api/knowledge/search/utils.test.ts b/apps/sim/app/api/knowledge/search/utils.test.ts
@@ -282,7 +282,7 @@ describe('Knowledge Search Utils', () => {
       Object.keys(env).forEach((key) => delete (env as any)[key])
 
       await expect(generateSearchEmbedding('test query')).rejects.toThrow(
-        'Either OPENAI_API_KEY or Azure OpenAI configuration (AZURE_OPENAI_API_KEY + AZURE_OPENAI_ENDPOINT) must be configured'
+        'OPENAI_API_KEY is not configured'
       )
     })
 
@@ -354,6 +354,7 @@ describe('Knowledge Search Utils', () => {
           body: JSON.stringify({
             input: ['test query'],
             encoding_format: 'float',
+            dimensions: 1536,
           }),
         })
       )