|
1 | 1 | { |
2 | | - "version": "1.3", |
| 2 | + "version": "1.4", |
3 | 3 | "testCases": [ |
4 | 4 | { |
5 | 5 | "id": "fetch-actor-details-1", |
|
42 | 42 | "id": "fetch-actor-details-7", |
43 | 43 | "category": "fetch-actor-details", |
44 | 44 | "query": "What parameters does apify/instagram-scraper accept?", |
45 | | - "expectedTools": ["fetch-actor-details"] |
| 45 | + "expectedTools": ["fetch-actor-details", "call-actor"], |
| 46 | + "reference": "Both fetch-actor-details and call-actor with step='info' are valid for getting Actor parameters." |
46 | 47 | }, |
47 | 48 | { |
48 | 49 | "id": "fetch-actor-details-8", |
|
65 | 66 | { |
66 | 67 | "id": "search-actors-1", |
67 | 68 | "category": "search-actors", |
68 | | - "query": "How to scrape Instagram posts", |
69 | | - "expectedTools": [], |
70 | | - "reference": "Either it should explain how to scrape Instagram posts or call 'search-actors' tool with the query: 'Instagram posts' or similar" |
| 69 | + "query": "What Actors can scrape Instagram posts?", |
| 70 | + "expectedTools": ["search-actors"], |
| 71 | + "reference": "It should call 'search-actors' tool with the query: 'Instagram posts' or similar. Query explicitly asks about Actors." |
71 | 72 | }, |
72 | 73 | { |
73 | 74 | "id": "search-actors-2", |
|
100 | 101 | { |
101 | 102 | "id": "search-actors-6", |
102 | 103 | "category": "search-actors", |
103 | | - "query": "Get Facebook data", |
| 104 | + "query": "Find an Actor to get Facebook data", |
104 | 105 | "expectedTools": ["search-actors"], |
105 | | - "reference": "It must call the 'search-actors' tool with the query: 'Facebook' or similar." |
| 106 | + "reference": "It must call the 'search-actors' tool with the query: 'Facebook' or similar. Added 'Actor' to make intent clear." |
106 | 107 | }, |
107 | 108 | { |
108 | 109 | "id": "search-actors-7", |
|
140 | 141 | { |
141 | 142 | "id": "search-actors-12", |
142 | 143 | "category": "search-actors", |
143 | | - "query": "Fetch posts from Twitter about AI", |
| 144 | + "query": "Find an Actor to fetch posts from Twitter about AI", |
144 | 145 | "expectedTools": ["search-actors"], |
145 | | - "reference": "It must call the 'search-actors' tool with the query: 'Twitter posts' or similar" |
| 146 | + "reference": "It must call the 'search-actors' tool with the query: 'Twitter posts' or similar. Added 'Actor' to clarify tool search intent." |
146 | 147 | }, |
147 | 148 | { |
148 | 149 | "id": "search-actors-13", |
149 | 150 | "category": "search-actors", |
150 | | - "query": "Get flight information from Skyscanner", |
151 | | - "expectedTools": ["search-actors"] |
| 151 | + "query": "Find an Actor to get flight information from Skyscanner", |
| 152 | + "expectedTools": ["search-actors"], |
| 153 | + "reference": "Added 'Actor' to clarify this is a tool search, not immediate data retrieval." |
152 | 154 | }, |
153 | 155 | { |
154 | 156 | "id": "search-actors-14", |
|
160 | 162 | "id": "search-actors-15", |
161 | 163 | "category": "search-actors", |
162 | 164 | "query": "Find actors for data extraction tasks", |
163 | | - "expectedTools": [], |
164 | | - "reference": "It should not call any tools, because the query is too general. It should suggest to be more specific about the platform or data type needed." |
| 165 | + "expectedTools": ["search-actors"], |
| 166 | + "reference": "While query is general, it explicitly asks about 'actors', so search-actors is appropriate. Changed from [] to allow tool call." |
165 | 167 | }, |
166 | 168 | { |
167 | 169 | "id": "rag-web-browser-1", |
168 | 170 | "category": "apify-slash-rag-web-browser", |
169 | | - "query": "Search articles about AI from tech blogs", |
170 | | - "expectedTools": ["apify-slash-rag-web-browser"] |
| 171 | + "query": "Find recent articles about AI from tech blogs", |
| 172 | + "expectedTools": ["apify-slash-rag-web-browser"], |
| 173 | + "reference": "Added 'recent' to signal immediate data need rather than tool search." |
171 | 174 | }, |
172 | 175 | { |
173 | 176 | "id": "rag-web-browser-2", |
|
210 | 213 | { |
211 | 214 | "id": "search-vs-rag-3", |
212 | 215 | "category": "apify-slash-rag-web-browser", |
213 | | - "query": "Search for AI articles on tech blogs", |
214 | | - "expectedTools": ["apify-slash-rag-web-browser"] |
| 216 | + "query": "Find recent AI articles on tech blogs", |
| 217 | + "expectedTools": ["apify-slash-rag-web-browser"], |
| 218 | + "reference": "Added 'recent' to signal immediate data retrieval rather than tool search." |
215 | 219 | }, |
216 | 220 | { |
217 | 221 | "id": "search-vs-rag-4", |
218 | 222 | "category": "apify-slash-rag-web-browser", |
219 | | - "query": "Fetch articles about AI from Wired and The Verge", |
220 | | - "expectedTools": ["apify-slash-rag-web-browser"] |
| 223 | + "query": "Get current articles about AI from Wired and The Verge", |
| 224 | + "expectedTools": ["apify-slash-rag-web-browser"], |
| 225 | + "reference": "Added 'current' to signal immediate data need." |
221 | 226 | }, |
222 | 227 | { |
223 | 228 | "id": "search-vs-rag-5", |
|
232 | 237 | "expectedTools": ["search-actors"] |
233 | 238 | }, |
234 | 239 | { |
235 | | - "id": "search-vs-rag-7", |
| 240 | + "id": "search-vs-rag-7a", |
| 241 | + "category": "apify-slash-rag-web-browser", |
| 242 | + "query": "Get flight prices from New York to London for tomorrow", |
| 243 | + "expectedTools": ["apify-slash-rag-web-browser"], |
| 244 | + "reference": "Time indicator 'tomorrow' signals immediate data need. Split from ambiguous search-vs-rag-7." |
| 245 | + }, |
| 246 | + { |
| 247 | + "id": "search-vs-rag-7b", |
236 | 248 | "category": "search-actors", |
237 | | - "query": "Find one way flights from New York to London tomorrow", |
238 | | - "expectedTools": ["search-actors"] |
| 249 | + "query": "Find an Actor that scrapes flight data from booking sites", |
| 250 | + "expectedTools": ["search-actors"], |
| 251 | + "reference": "Explicit 'Actor' mention signals tool search. Split from ambiguous search-vs-rag-7." |
239 | 252 | }, |
240 | 253 | { |
241 | 254 | "id": "search-vs-rag-8", |
|
394 | 407 | "query": "What's the weather like today in San Francisco?", |
395 | 408 | "expectedTools": ["apify-slash-rag-web-browser"] |
396 | 409 | }, |
397 | | - { |
398 | | - "id": "misleading-query-2", |
399 | | - "category": "misleading", |
400 | | - "query": "How do I scrape Instagram without using Apify?", |
401 | | - "expectedTools": ["search-actors"] |
402 | | - }, |
403 | 410 | { |
404 | 411 | "id": "misleading-query-3", |
405 | 412 | "category": "search-apify-docs", |
406 | | - "query": "I need to build my own scraper from scratch", |
407 | | - "expectedTools": ["search-apify-docs"] |
| 413 | + "query": "I need to build my own Apify Actor from scratch", |
| 414 | + "expectedTools": ["search-apify-docs"], |
| 415 | + "reference": "Added 'Apify Actor' to clarify seeking Apify-specific documentation." |
408 | 416 | }, |
409 | 417 | { |
410 | 418 | "id": "ambiguous-query-1", |
411 | 419 | "category": "search-actors", |
412 | | - "query": "Get instagram posts", |
| 420 | + "query": "Find an Actor to get instagram posts", |
413 | 421 | "expectedTools": ["search-actors"], |
414 | | - "reference": "It must call the 'search-actors' tool with the query: 'Instagram posts' or similar" |
| 422 | + "reference": "It must call the 'search-actors' tool with the query: 'Instagram posts' or similar. Added 'Actor' to remove ambiguity." |
415 | 423 | }, |
416 | 424 | { |
417 | 425 | "id": "ambiguous-query-3", |
418 | 426 | "category": "ambiguous", |
419 | | - "query": "documentation", |
420 | | - "expectedTools": ["search-apify-docs"] |
| 427 | + "query": "Show me Apify Actor documentation", |
| 428 | + "expectedTools": ["search-apify-docs"], |
| 429 | + "reference": "Made query less vague by adding 'Apify Actor' context." |
421 | 430 | }, |
422 | 431 | { |
423 | 432 | "id": "tool-selection-confusion-1", |
|
428 | 437 | { |
429 | 438 | "id": "tool-selection-confusion-2", |
430 | 439 | "category": "tool-selection", |
431 | | - "query": "Search for AI articles on tech blogs", |
432 | | - "expectedTools": ["apify-slash-rag-web-browser"] |
| 440 | + "query": "Find recent AI articles on tech blogs", |
| 441 | + "expectedTools": ["apify-slash-rag-web-browser"], |
| 442 | + "reference": "Added 'recent' to signal immediate data need." |
433 | 443 | }, |
434 | 444 | { |
435 | 445 | "id": "tool-selection-confusion-3", |
|
0 commit comments