-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathparquetSql.js
More file actions
99 lines (88 loc) · 3.95 KB
/
parquetSql.js
File metadata and controls
99 lines (88 loc) · 3.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import { asyncBufferFromFile, asyncBufferFromUrl, parquetMetadataAsync } from 'hyparquet'
import { compressors } from 'hyparquet-compressors'
import { collect, executeSql } from 'squirreling'
import { parquetDataSource } from 'hyperparam'
import { markdownTable } from './markdownTable.js'
const maxRows = 100
/**
* @import { ToolHandler } from '../types.d.ts'
* @type {ToolHandler}
*/
export const parquetSql = {
emoji: '🛢️',
tool: {
type: 'function',
name: 'parquet_sql',
description: 'Execute SQL queries against a parquet file using ANSI SQL syntax.'
+ ' Cell values are truncated by default to 1000 characters (or 10,000 if truncate=false).'
+ ' If a cell is truncated due to length, the column header will say "(truncated)".'
+ ' You can get subsequent pages of long text by using SUBSTR on long columns.'
+ ' Examples:'
+ '\n - `SELECT * FROM table LIMIT 10`'
+ '\n - `SELECT "First Name", "Last Name" FROM table WHERE age > 30 ORDER BY age DESC`.'
+ '\n - `SELECT country, COUNT(*) as total FROM table GROUP BY country`.'
+ '\n - `SELECT SUBSTR(long_column, 10001, 20000) as short_column FROM table`.',
parameters: {
type: 'object',
properties: {
file: {
type: 'string',
description: 'The parquet file to query either local file path or url.',
},
query: {
type: 'string',
description: 'The SQL query string. Use standard SQL syntax with WHERE clauses, ORDER BY, LIMIT, GROUP BY, aggregate functions, etc. Wrap column names containing spaces in double quotes: "column name". String literals should be single-quoted. Always use "table" as the table name in your FROM clause.',
},
truncate: {
type: 'boolean',
description: 'Whether to truncate long string values in the results. If true (default), each string cell is limited to 1000 characters. If false, each string cell is limited to 10,000 characters.',
},
},
required: ['file', 'query'],
},
},
/**
* @param {Record<string, unknown>} args
* @returns {Promise<string>}
*/
async handleToolCall({ file, query, truncate = true }) {
if (typeof file !== 'string') {
throw new Error('Expected file to be a string')
}
if (typeof query !== 'string' || query.trim().length === 0) {
throw new Error('Query parameter must be a non-empty string')
}
try {
const startTime = performance.now()
// Load parquet file and create data source
const asyncBuffer = file.startsWith('http://') || file.startsWith('https://')
? await asyncBufferFromUrl({ url: file })
: await asyncBufferFromFile(file)
const metadata = await parquetMetadataAsync(asyncBuffer)
const table = parquetDataSource(asyncBuffer, metadata, compressors)
// Execute SQL query
const results = await collect(executeSql({ tables: { table }, query }))
const queryTime = (performance.now() - startTime) / 1000
// Handle empty results
if (results.length === 0) {
return `Query executed successfully but returned no results in ${queryTime.toFixed(1)} seconds.`
}
// Format results
const rowCount = results.length
const displayRows = results.slice(0, maxRows)
// Determine max characters per string cell based on truncate parameter
const maxChars = truncate ? 1000 : 10000
// Convert to formatted string
let content = `Query returned ${rowCount} row${rowCount === 1 ? '' : 's'} in ${queryTime.toFixed(1)} seconds.`
content += '\n\n'
content += markdownTable(displayRows, maxChars)
if (rowCount > maxRows) {
content += `\n\n... and ${rowCount - maxRows} more row${rowCount - maxRows === 1 ? '' : 's'} (showing first ${maxRows} rows)`
}
return content
} catch (error) {
const message = error instanceof Error ? error.message : String(error)
return `SQL query failed: ${message}`
}
},
}