Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
9614a28
feat(knowledge): add embedding model selection and Cohere reranker
waleedlatif1 Apr 30, 2026
c56b7a4
fix(knowledge): split reranker model constants into client-safe module
waleedlatif1 Apr 30, 2026
5d1446f
fix(knowledge): bill rerank on every successful API call and fix MDX …
waleedlatif1 Apr 30, 2026
553021a
test(knowledge): align embedding tests with provider abstraction changes
waleedlatif1 Apr 30, 2026
542d2ed
fix(knowledge): require explicit Azure deployment per OpenAI embeddin…
waleedlatif1 Apr 30, 2026
d70ac8f
fix(knowledge): skip platform reranker billing for BYOK Cohere keys
waleedlatif1 Apr 30, 2026
57589cb
fix(knowledge): match search tokenizer to embedding provider; remove …
waleedlatif1 Apr 30, 2026
96cf4dd
fix(knowledge): match chunk tokenizer to KB embedding provider
waleedlatif1 Apr 30, 2026
14538a1
refactor(knowledge): centralize tokenizer mapping on EmbeddingModelInfo
waleedlatif1 Apr 30, 2026
7a9ba8b
refactor(knowledge): lock embedding model to KB_EMBEDDING_MODEL env var
waleedlatif1 Apr 30, 2026
53624f3
fix(knowledge): use provider tokenizer for chunks and bound rerank in…
waleedlatif1 Apr 30, 2026
97fa4d5
fix(knowledge): use .count from estimateTokenCount return value
waleedlatif1 Apr 30, 2026
b1643ee
fix(knowledge): only enforce single embedding model when query is pre…
waleedlatif1 Apr 30, 2026
78d6af7
fix(knowledge): use getConfiguredEmbeddingModel in copilot KB creation
waleedlatif1 Apr 30, 2026
687b7f5
fix(knowledge): make EMBEDDING_DIMENSIONS a literal type
waleedlatif1 Apr 30, 2026
f7eef61
fix(knowledge): use per-KB embedding model in v1 search route
waleedlatif1 Apr 30, 2026
cb1cab7
chore(knowledge): polish embedding/reranker implementation
waleedlatif1 Apr 30, 2026
5dfaed2
fix(knowledge): resolve type errors and unhandled rejection in search…
waleedlatif1 Apr 30, 2026
b73c218
fix(knowledge): pass Gemini API key via x-goog-api-key header
waleedlatif1 Apr 30, 2026
64a17db
fix(knowledge): default Azure deployment name to embedding model name
waleedlatif1 Apr 30, 2026
99743d9
fix(knowledge): cap Gemini batches at 100 items, add singular GEMINI_…
waleedlatif1 Apr 30, 2026
8fd0557
fix(knowledge): prefer singular Cohere key before rotation
waleedlatif1 Apr 30, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions apps/docs/content/docs/en/tools/firecrawl.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,8 @@ Parse uploaded documents (PDF, DOCX, HTML, etc.) into clean markdown using Firec
| `proxy` | string | No | Proxy mode: "basic" or "auto" |
| `zeroDataRetention` | boolean | No | Enable zero data retention. Defaults to false. |
| `apiKey` | string | Yes | Firecrawl API key |
| `pricing` | custom | No | No description |
| `metadata` | string | No | No description |
| `rateLimit` | string | No | No description |

#### Output
Expand Down
2 changes: 2 additions & 0 deletions apps/docs/content/docs/en/tools/knowledge.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ Search for similar content in a knowledge base using vector similarity
| `properties` | string | No | No description |
| `tagName` | string | No | No description |
| `tagValue` | string | No | No description |
| `rerankerEnabled` | boolean | No | Whether to apply Cohere reranking to vector search results |
| `rerankerModel` | string | No | Cohere rerank model to use \(one of: rerank-v4.0-pro, rerank-v4.0-fast, rerank-v3.5\) |
| `tagFilters` | string | No | No description |

#### Output
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -213,9 +213,11 @@ export const POST = withRouteHandler(
accessCheck.knowledgeBase?.workspaceId
)

const chunkEmbeddingModel =
accessCheck.knowledgeBase?.embeddingModel ?? 'text-embedding-3-small'
let cost = null
try {
cost = calculateCost('text-embedding-3-small', newChunk.tokenCount, 0, false)
cost = calculateCost(chunkEmbeddingModel, newChunk.tokenCount, 0, false)
} catch (error) {
logger.warn(`[${requestId}] Failed to calculate cost for chunk upload`, {
error: error instanceof Error ? error.message : 'Unknown error',
Expand All @@ -240,7 +242,7 @@ export const POST = withRouteHandler(
completion: 0,
total: newChunk.tokenCount,
},
model: 'text-embedding-3-small',
model: chunkEmbeddingModel,
pricing: cost.pricing,
},
}
Expand Down
11 changes: 9 additions & 2 deletions apps/sim/app/api/knowledge/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@ import { getSession } from '@/lib/auth'
import { PlatformEvents } from '@/lib/core/telemetry'
import { generateRequestId } from '@/lib/core/utils/request'
import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
import {
DEFAULT_EMBEDDING_MODEL,
EMBEDDING_DIMENSIONS,
SUPPORTED_EMBEDDING_MODEL_IDS,
} from '@/lib/knowledge/embeddings'
import {
createKnowledgeBase,
getKnowledgeBases,
Expand All @@ -20,8 +25,10 @@ const CreateKnowledgeBaseSchema = z.object({
name: z.string().min(1, 'Name is required'),
description: z.string().optional(),
workspaceId: z.string().min(1, 'Workspace ID is required'),
embeddingModel: z.literal('text-embedding-3-small').default('text-embedding-3-small'),
embeddingDimension: z.literal(1536).default(1536),
embeddingModel: z
.enum(SUPPORTED_EMBEDDING_MODEL_IDS as [string, ...string[]])
.default(DEFAULT_EMBEDDING_MODEL),
embeddingDimension: z.literal(EMBEDDING_DIMENSIONS).default(EMBEDDING_DIMENSIONS),
chunkingConfig: z
.object({
maxSize: z.number().min(100).max(4000).default(1024),
Expand Down
15 changes: 14 additions & 1 deletion apps/sim/app/api/knowledge/search/route.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,7 @@ describe('Knowledge Search API Route', () => {
userId: 'user-123',
name: 'Test KB',
deletedAt: null,
embeddingModel: 'text-embedding-3-small',
},
})

Expand Down Expand Up @@ -524,6 +525,7 @@ describe('Knowledge Search API Route', () => {
userId: 'user-123',
name: 'Test KB',
deletedAt: null,
embeddingModel: 'text-embedding-3-small',
},
})

Expand Down Expand Up @@ -571,6 +573,7 @@ describe('Knowledge Search API Route', () => {
userId: 'user-123',
name: 'Test KB',
deletedAt: null,
embeddingModel: 'text-embedding-3-small',
},
})

Expand Down Expand Up @@ -625,6 +628,7 @@ describe('Knowledge Search API Route', () => {
userId: 'user-123',
name: 'Test KB',
deletedAt: null,
embeddingModel: 'text-embedding-3-small',
},
})

Expand Down Expand Up @@ -694,6 +698,7 @@ describe('Knowledge Search API Route', () => {
userId: 'user-123',
name: 'Test KB',
deletedAt: null,
embeddingModel: 'text-embedding-3-small',
},
})

Expand Down Expand Up @@ -739,6 +744,7 @@ describe('Knowledge Search API Route', () => {
userId: 'user-123',
name: 'Test KB',
deletedAt: null,
embeddingModel: 'text-embedding-3-small',
},
})

Expand Down Expand Up @@ -877,6 +883,7 @@ describe('Knowledge Search API Route', () => {
userId: 'user-123',
name: 'Test KB',
deletedAt: null,
embeddingModel: 'text-embedding-3-small',
},
})

Expand Down Expand Up @@ -921,11 +928,17 @@ describe('Knowledge Search API Route', () => {
userId: 'user-123',
name: 'Test KB',
deletedAt: null,
embeddingModel: 'text-embedding-3-small',
},
})
.mockResolvedValueOnce({
hasAccess: true,
knowledgeBase: { id: 'kb-456', userId: 'user-123', name: 'Test KB 2' },
knowledgeBase: {
id: 'kb-456',
userId: 'user-123',
name: 'Test KB 2',
embeddingModel: 'text-embedding-3-small',
},
})

mockGetDocumentTagDefinitions.mockResolvedValue(mockTagDefinitions)
Expand Down
135 changes: 122 additions & 13 deletions apps/sim/app/api/knowledge/search/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ import { PlatformEvents } from '@/lib/core/telemetry'
import { generateRequestId } from '@/lib/core/utils/request'
import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
import { ALL_TAG_SLOTS } from '@/lib/knowledge/constants'
import { getEmbeddingModelInfo } from '@/lib/knowledge/embedding-models'
import { DEFAULT_RERANKER_MODEL, rerank, SUPPORTED_RERANKER_MODELS } from '@/lib/knowledge/reranker'
import { getDocumentTagDefinitions } from '@/lib/knowledge/tags/service'
import { buildUndefinedTagsError, validateTagValue } from '@/lib/knowledge/tags/utils'
import type { StructuredFilter } from '@/lib/knowledge/types'
Expand All @@ -20,7 +22,8 @@ import {
handleVectorOnlySearch,
type SearchResult,
} from '@/app/api/knowledge/search/utils'
import { checkKnowledgeBaseAccess } from '@/app/api/knowledge/utils'
import { checkKnowledgeBaseAccess, type KnowledgeBaseAccessResult } from '@/app/api/knowledge/utils'
import { getRerankModelPricing } from '@/providers/models'
import { calculateCost } from '@/providers/utils'

const logger = createLogger('VectorSearchAPI')
Expand Down Expand Up @@ -59,6 +62,11 @@ const VectorSearchSchema = z
.optional()
.nullable()
.transform((val) => val || undefined),
rerankerEnabled: z.boolean().optional().default(false),
rerankerModel: z
.enum(SUPPORTED_RERANKER_MODELS as unknown as [string, ...string[]])
.optional()
.default(DEFAULT_RERANKER_MODEL),
})
.refine(
(data) => {
Expand Down Expand Up @@ -235,11 +243,29 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
)
}

const workspaceId = accessChecks.find((ac) => ac?.hasAccess)?.knowledgeBase?.workspaceId
const accessibleKbs = accessChecks
.filter((ac): ac is KnowledgeBaseAccessResult => Boolean(ac?.hasAccess))
.map((ac) => ac.knowledgeBase)
const workspaceId = accessibleKbs[0]?.workspaceId

const useReranker = validatedData.rerankerEnabled && Boolean(validatedData.query?.trim())
const rerankerModel = useReranker ? validatedData.rerankerModel : null

const embeddingModels = Array.from(new Set(accessibleKbs.map((kb) => kb.embeddingModel)))
if (embeddingModels.length > 1) {
return NextResponse.json(
{
error:
'Selected knowledge bases use different embedding models and cannot be searched together. Search them separately.',
},
{ status: 400 }
)
}
Comment thread
waleedlatif1 marked this conversation as resolved.
const queryEmbeddingModel = embeddingModels[0]

const hasQuery = validatedData.query && validatedData.query.trim().length > 0
const queryEmbeddingPromise = hasQuery
? generateSearchEmbedding(validatedData.query!, undefined, workspaceId)
? generateSearchEmbedding(validatedData.query!, queryEmbeddingModel, workspaceId)
: Promise.resolve(null)

// Check if any requested knowledge bases were not accessible
Expand Down Expand Up @@ -278,6 +304,10 @@ export const POST = withRouteHandler(async (request: NextRequest) => {

const hasFilters = structuredFilters && structuredFilters.length > 0

// Oversample candidates when reranking so the reranker has more to choose from.
// Cap at 100 to bound Cohere request cost (1 search unit = ≤100 docs).
const candidateTopK = useReranker ? Math.min(100, validatedData.topK * 4) : validatedData.topK

if (!hasQuery && hasFilters) {
// Tag-only search without vector similarity
results = await handleTagOnlySearch({
Expand All @@ -291,24 +321,24 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
`[${requestId}] Executing tag + vector search with filters:`,
structuredFilters
)
const strategy = getQueryStrategy(accessibleKbIds.length, validatedData.topK)
const strategy = getQueryStrategy(accessibleKbIds.length, candidateTopK)
const queryVector = JSON.stringify(await queryEmbeddingPromise)

results = await handleTagAndVectorSearch({
knowledgeBaseIds: accessibleKbIds,
topK: validatedData.topK,
topK: candidateTopK,
structuredFilters,
queryVector,
distanceThreshold: strategy.distanceThreshold,
})
} else if (hasQuery && !hasFilters) {
// Vector-only search
const strategy = getQueryStrategy(accessibleKbIds.length, validatedData.topK)
const strategy = getQueryStrategy(accessibleKbIds.length, candidateTopK)
const queryVector = JSON.stringify(await queryEmbeddingPromise)

results = await handleVectorOnlySearch({
knowledgeBaseIds: accessibleKbIds,
topK: validatedData.topK,
topK: candidateTopK,
queryVector,
distanceThreshold: strategy.distanceThreshold,
})
Expand All @@ -323,13 +353,61 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
)
}

// Optional Cohere rerank pass on top of vector results.
const rerankedScores = new Map<string, number>()
// `rerankBilled` = Cohere was successfully called (even with 0 results) and we owe the search unit.
let rerankBilled = false
let rerankIsBYOK = false
if (useReranker && rerankerModel && results.length > 0) {
const candidateCount = results.length
try {
const { results: ranked, isBYOK } = await rerank(
validatedData.query!,
results.map((r) => ({ id: r.id, text: r.content })),
{ model: rerankerModel, topN: validatedData.topK, workspaceId }
)
rerankBilled = true
rerankIsBYOK = isBYOK
if (ranked.length === 0) {
logger.warn(
`[${requestId}] Reranker returned 0 results; falling back to vector ordering`,
{ model: rerankerModel, candidateCount }
)
results = results.slice(0, validatedData.topK)
} else {
const idToResult = new Map(results.map((r) => [r.id, r]))
results = ranked
.map((r) => idToResult.get(r.item.id))
.filter((r): r is SearchResult => Boolean(r))
for (const r of ranked) rerankedScores.set(r.item.id, r.relevanceScore)
logger.info(`[${requestId}] Reranked ${candidateCount} → ${results.length} results`, {
model: rerankerModel,
})
}
} catch (error) {
logger.warn(`[${requestId}] Reranker failed; falling back to vector ordering`, {
error: error instanceof Error ? error.message : 'Unknown error',
model: rerankerModel,
candidateCount,
workspaceId,
})
results = results.slice(0, validatedData.topK)
}
} else if (useReranker) {
results = results.slice(0, validatedData.topK)
}

// Calculate cost for the embedding (with fallback if calculation fails)
let cost = null
let tokenCount = null
if (hasQuery) {
try {
tokenCount = estimateTokenCount(validatedData.query!, 'openai')
cost = calculateCost('text-embedding-3-small', tokenCount.count, 0, false)
// Use the tokenizer matching the actual embedding provider so token counts
// (and the input cost derived from them) reflect how the provider tokenizes.
const tokenizerProvider =
getEmbeddingModelInfo(queryEmbeddingModel).provider === 'gemini' ? 'google' : 'openai'
tokenCount = estimateTokenCount(validatedData.query!, tokenizerProvider)
cost = calculateCost(queryEmbeddingModel, tokenCount.count, 0, false)
Comment thread
waleedlatif1 marked this conversation as resolved.
} catch (error) {
logger.warn(`[${requestId}] Failed to calculate cost for search query`, {
error: error instanceof Error ? error.message : 'Unknown error',
Expand All @@ -338,6 +416,32 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
}
}

// Add Cohere rerank cost (1 search unit per successful call, since we cap candidates ≤100).
// Bill on every successful API response — Cohere charges even when 0 results are returned.
let rerankerCost = 0
if (rerankBilled && rerankerModel && !rerankIsBYOK) {
const pricing = getRerankModelPricing(rerankerModel)
if (pricing) {
rerankerCost = pricing.perSearchUnit
if (cost) {
cost = {
...cost,
input: cost.input + rerankerCost,
total: cost.total + rerankerCost,
}
Comment thread
waleedlatif1 marked this conversation as resolved.
} else {
cost = {
input: rerankerCost,
output: 0,
total: rerankerCost,
pricing: { input: 0, output: 0, updatedAt: pricing.updatedAt },
}
}
} else {
logger.warn(`[${requestId}] No pricing entry for rerank model ${rerankerModel}`)
}
}

// Fetch tag definitions for display name mapping (reuse the same fetch from filtering)
const tagDefsResults = await Promise.all(
accessibleKbIds.map(async (kbId) => {
Expand Down Expand Up @@ -400,33 +504,38 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
}
})

const rerankerScore = rerankedScores.get(result.id)
return {
documentId: result.documentId,
documentName: documentNameMap[result.documentId] || undefined,
content: result.content,
chunkIndex: result.chunkIndex,
metadata: tags, // Clean display name mapped tags
similarity: hasQuery ? 1 - result.distance : 1, // Perfect similarity for tag-only searches
...(rerankerScore !== undefined && { rerankerScore }),
}
}),
query: validatedData.query || '',
knowledgeBaseIds: accessibleKbIds,
knowledgeBaseId: accessibleKbIds[0],
topK: validatedData.topK,
totalResults: results.length,
...(cost && tokenCount
...(cost
? {
cost: {
input: cost.input,
output: cost.output,
total: cost.total,
tokens: {
prompt: tokenCount.count,
prompt: tokenCount?.count ?? 0,
completion: 0,
total: tokenCount.count,
total: tokenCount?.count ?? 0,
},
model: 'text-embedding-3-small',
model: queryEmbeddingModel,
pricing: cost.pricing,
...(rerankBilled && !rerankIsBYOK
? { rerankerCost, rerankerModel, rerankerSearchUnits: 1 }
: {}),
},
}
: {}),
Expand Down
3 changes: 2 additions & 1 deletion apps/sim/app/api/knowledge/search/utils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ describe('Knowledge Search Utils', () => {
Object.keys(env).forEach((key) => delete (env as any)[key])

await expect(generateSearchEmbedding('test query')).rejects.toThrow(
'Either OPENAI_API_KEY or Azure OpenAI configuration (AZURE_OPENAI_API_KEY + AZURE_OPENAI_ENDPOINT) must be configured'
'OPENAI_API_KEY is not configured'
)
})

Expand Down Expand Up @@ -354,6 +354,7 @@ describe('Knowledge Search Utils', () => {
body: JSON.stringify({
input: ['test query'],
encoding_format: 'float',
dimensions: 1536,
}),
})
)
Expand Down
Loading
Loading