|
1 | 1 | { |
2 | | - "version": "1.3", |
| 2 | + "version": "1.4", |
3 | 3 | "testCases": [ |
4 | 4 | { |
5 | 5 | "id": "fetch-actor-details-1", |
|
42 | 42 | "id": "fetch-actor-details-7", |
43 | 43 | "category": "fetch-actor-details", |
44 | 44 | "query": "What parameters does apify/instagram-scraper accept?", |
45 | | - "expectedTools": ["fetch-actor-details"] |
| 45 | + "expectedTools": ["fetch-actor-details", "call-actor"], |
| 46 | + "reference": "Both fetch-actor-details and call-actor with step='info' are valid for getting Actor parameters." |
46 | 47 | }, |
47 | 48 | { |
48 | 49 | "id": "fetch-actor-details-8", |
|
65 | 66 | { |
66 | 67 | "id": "search-actors-1", |
67 | 68 | "category": "search-actors", |
68 | | - "query": "How to scrape Instagram posts", |
69 | | - "expectedTools": [], |
70 | | - "reference": "Either it should explain how to scrape Instagram posts or call 'search-actors' tool with the query: 'Instagram posts' or similar" |
| 69 | + "query": "What Actors can scrape Instagram posts?", |
| 70 | + "expectedTools": ["search-actors"], |
| 71 | + "reference": "It should call 'search-actors' tool with the query: 'Instagram posts' or similar. Query explicitly asks about Actors." |
71 | 72 | }, |
72 | 73 | { |
73 | 74 | "id": "search-actors-2", |
|
100 | 101 | { |
101 | 102 | "id": "search-actors-6", |
102 | 103 | "category": "search-actors", |
103 | | - "query": "Get Facebook data", |
| 104 | + "query": "Find an Actor to get Facebook data", |
104 | 105 | "expectedTools": ["search-actors"], |
105 | 106 | "reference": "It must call the 'search-actors' tool with the query: 'Facebook' or similar." |
106 | 107 | }, |
|
140 | 141 | { |
141 | 142 | "id": "search-actors-12", |
142 | 143 | "category": "search-actors", |
143 | | - "query": "Fetch posts from Twitter about AI", |
| 144 | + "query": "Find an Actor to fetch posts from Twitter about AI", |
144 | 145 | "expectedTools": ["search-actors"], |
145 | | - "reference": "It must call the 'search-actors' tool with the query: 'Twitter posts' or similar" |
| 146 | + "reference": "It must call the 'search-actors' tool with the query: 'Twitter posts' or similar." |
146 | 147 | }, |
147 | 148 | { |
148 | 149 | "id": "search-actors-13", |
149 | 150 | "category": "search-actors", |
150 | | - "query": "Get flight information from Skyscanner", |
| 151 | + "query": "Find an Actor to get flight information from Skyscanner", |
151 | 152 | "expectedTools": ["search-actors"] |
152 | 153 | }, |
153 | 154 | { |
|
160 | 161 | "id": "search-actors-15", |
161 | 162 | "category": "search-actors", |
162 | 163 | "query": "Find actors for data extraction tasks", |
163 | | - "expectedTools": [], |
164 | | - "reference": "It should not call any tools, because the query is too general. It should suggest to be more specific about the platform or data type needed." |
| 164 | + "expectedTools": ["search-actors"], |
| 165 | + "reference": "While query is general, it explicitly asks about 'actors', so search-actors is appropriate." |
165 | 166 | }, |
166 | 167 | { |
167 | 168 | "id": "rag-web-browser-1", |
168 | 169 | "category": "apify-slash-rag-web-browser", |
169 | | - "query": "Search articles about AI from tech blogs", |
| 170 | + "query": "Find recent articles about AI from tech blogs", |
170 | 171 | "expectedTools": ["apify-slash-rag-web-browser"] |
171 | 172 | }, |
172 | 173 | { |
|
210 | 211 | { |
211 | 212 | "id": "search-vs-rag-3", |
212 | 213 | "category": "apify-slash-rag-web-browser", |
213 | | - "query": "Search for AI articles on tech blogs", |
| 214 | + "query": "Find recent AI articles on tech blogs", |
214 | 215 | "expectedTools": ["apify-slash-rag-web-browser"] |
215 | 216 | }, |
216 | 217 | { |
217 | 218 | "id": "search-vs-rag-4", |
218 | 219 | "category": "apify-slash-rag-web-browser", |
219 | | - "query": "Fetch articles about AI from Wired and The Verge", |
| 220 | + "query": "Get current articles about AI from Wired and The Verge", |
220 | 221 | "expectedTools": ["apify-slash-rag-web-browser"] |
221 | 222 | }, |
222 | 223 | { |
|
232 | 233 | "expectedTools": ["search-actors"] |
233 | 234 | }, |
234 | 235 | { |
235 | | - "id": "search-vs-rag-7", |
| 236 | + "id": "search-vs-rag-7a", |
| 237 | + "category": "apify-slash-rag-web-browser", |
| 238 | + "query": "Get flight prices from New York to London for tomorrow", |
| 239 | + "expectedTools": ["apify-slash-rag-web-browser"] |
| 240 | + }, |
| 241 | + { |
| 242 | + "id": "search-vs-rag-7b", |
236 | 243 | "category": "search-actors", |
237 | | - "query": "Find one way flights from New York to London tomorrow", |
| 244 | + "query": "Find an Actor that scrapes flight data from booking sites", |
238 | 245 | "expectedTools": ["search-actors"] |
239 | 246 | }, |
240 | 247 | { |
|
394 | 401 | "query": "What's the weather like today in San Francisco?", |
395 | 402 | "expectedTools": ["apify-slash-rag-web-browser"] |
396 | 403 | }, |
397 | | - { |
398 | | - "id": "misleading-query-2", |
399 | | - "category": "misleading", |
400 | | - "query": "How do I scrape Instagram without using Apify?", |
401 | | - "expectedTools": ["search-actors"] |
402 | | - }, |
403 | 404 | { |
404 | 405 | "id": "misleading-query-3", |
405 | 406 | "category": "search-apify-docs", |
406 | | - "query": "I need to build my own scraper from scratch", |
| 407 | + "query": "I need to build my own Apify Actor from scratch", |
407 | 408 | "expectedTools": ["search-apify-docs"] |
408 | 409 | }, |
409 | 410 | { |
410 | 411 | "id": "ambiguous-query-1", |
411 | 412 | "category": "search-actors", |
412 | | - "query": "Get instagram posts", |
| 413 | + "query": "Find an Actor to get instagram posts", |
413 | 414 | "expectedTools": ["search-actors"], |
414 | | - "reference": "It must call the 'search-actors' tool with the query: 'Instagram posts' or similar" |
| 415 | + "reference": "It must call the 'search-actors' tool with the query: 'Instagram posts' or similar." |
415 | 416 | }, |
416 | 417 | { |
417 | 418 | "id": "ambiguous-query-3", |
418 | 419 | "category": "ambiguous", |
419 | | - "query": "documentation", |
| 420 | + "query": "Show me Apify Actor documentation", |
420 | 421 | "expectedTools": ["search-apify-docs"] |
421 | 422 | }, |
422 | 423 | { |
|
428 | 429 | { |
429 | 430 | "id": "tool-selection-confusion-2", |
430 | 431 | "category": "tool-selection", |
431 | | - "query": "Search for AI articles on tech blogs", |
| 432 | + "query": "Find recent AI articles on tech blogs", |
432 | 433 | "expectedTools": ["apify-slash-rag-web-browser"] |
433 | 434 | }, |
434 | 435 | { |
|
0 commit comments