Skip to content

Commit 8df12be

Browse files
authored
Merge pull request #253 from Canner/fix/jsonsify-huggingface-extension-output
Fix: change the output format to JSON string of hugging face extension
2 parents bf4b1a2 + bae5c5c commit 8df12be

5 files changed

Lines changed: 119 additions & 27 deletions

File tree

labs/playground1/Makefile

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ start: build test-data/moma.db ../../node_modules
66
@vulcan start
77

88
# build the required packages
9-
build: pkg-core pkg-build pkg-serve pkg-catalog-server pkg-cli pkg-extension-driver-duckdb pkg-extension-authenticator-canner pkg-extension-driver-clickhouse
9+
build: pkg-core pkg-build pkg-serve pkg-catalog-server pkg-cli pkg-extension-driver-duckdb pkg-extension-authenticator-canner pkg-extension-driver-clickhouse pkg-extension-huggingface
1010

1111

1212
# build for core pakge
@@ -62,8 +62,15 @@ pkg-extension-driver-clickhouse: ../../node_modules
6262
mkdir -p ./labs/playground1/node_modules/@vulcan-sql; \
6363
rm -rf ./labs/playground1/node_modules/@vulcan-sql/extension-driver-clickhouse; \
6464
cp -R ./dist/packages/extension-driver-clickhouse ./labs/playground1/node_modules/@vulcan-sql; \
65-
cp -R ./packages/extension-driver-clickhouse/node_modules/@clickhouse ./labs/playground1/node_modules
65+
cp -R ./packages/extension-driver-clickhouse/node_modules ./labs/playground1
6666

67+
pkg-extension-huggingface: ../../node_modules
68+
@cd ../..; \
69+
yarn nx build extension-huggingface; \
70+
mkdir -p ./labs/playground1/node_modules/@vulcan-sql; \
71+
rm -rf ./labs/playground1/node_modules/@vulcan-sql/extension-huggingface; \
72+
cp -R ./dist/packages/extension-huggingface ./labs/playground1/node_modules/@vulcan-sql; \
73+
cp -R ./packages/extension-huggingface/node_modules ./labs/playground1
6774

6875
# build and install for cli pakge
6976
pkg-cli: ../../node_modules

packages/doc/docs/extensions/huggingface-table-question-answering.mdx

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,9 @@ The [Table Question Answering](https://huggingface.co/docs/api-inference/detaile
4242
4343
Using the `huggingface_table_question_answering` filter.
4444

45-
Sample 1:
45+
The result will be converted to a JSON string from `huggingface_table_question_answering`. You could decompress the JSON string and use the result by itself.
46+
47+
**Sample 1 - send the data from variable by [set tag](https://vulcansql.com/docs/develop/advance#set-variables):**
4648

4749
```sql
4850
{% set data = [
@@ -67,14 +69,36 @@ Sample 1:
6769
SELECT {{ data | huggingface_table_question_answering(query="How many repositories related to data-lake topic?") }}
6870
```
6971

70-
Sample 2:
72+
**Sample 1 - Response:**
73+
74+
```json
75+
[
76+
{
77+
"result": "{\"answer\":\"COUNT > vulcan-sql, accio\",\"coordinates\":[[0,0],[1,0]],\"cells\":[\"vulcan-sql\",\"accio\"],\"aggregator\":\"COUNT\"}"
78+
}
79+
]
80+
```
81+
82+
**Sample 2 - send the data from [req tag](https://vulcansql.com/docs/develop/predefined-queries):**
7183

7284
```sql
73-
{% req products %}
74-
SELECT * FROM products
85+
{% req artists %}
86+
SELECT * FROM artists
7587
{% endreq %}
7688
77-
SELECT {{ products.value() | huggingface_table_question_answering(query="How many products related to 3C type?", model="microsoft/tapex-base-finetuned-wtq", wait_for_model=true, use_cache=true) }}
89+
{% set question = "List display name where gender are female?" %}
90+
91+
SELECT {{ products.value() | huggingface_table_question_answering(query=question, model="microsoft/tapex-base-finetuned-wtq", wait_for_model=true, use_cache=true) }}
92+
```
93+
94+
**Sample 2 - Response:**
95+
96+
```json
97+
[
98+
{
99+
"result": "{\"answer\":\"Irene Aronson, Ruth Asawa, Isidora Aschheim, Geneviève Asse, Dana Atchley, Aino Aalto, Berenice Abbott\",\"coordinates\":[[8,1],[16,1],[17,1],[23,1],[25,1],[29,1],[35,1]],\"cells\":[\"Irene Aronson\",\"Ruth Asawa\",\"Isidora Aschheim\",\"Geneviève Asse\",\"Dana Atchley\",\"Aino Aalto\",\"Berenice Abbott\"],\"aggregator\":\"NONE\"}"
100+
}
101+
]
78102
```
79103

80104
### Arguments

packages/extension-huggingface/README.md

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,9 @@ The [Table Question Answering](https://huggingface.co/docs/api-inference/detaile
3333
3434
Using the `huggingface_table_question_answering` filter.
3535

36-
Sample 1:
36+
The result will be converted to a JSON string from `huggingface_table_question_answering`. You could decompress the JSON string and use the result by itself.
37+
38+
**Sample 1 - send the data from variable by [set tag](https://vulcansql.com/docs/develop/advance#set-variables):**
3739

3840
```sql
3941
{% set data = [
@@ -55,18 +57,40 @@ Sample 1:
5557
] %}
5658
5759
-- The source data for "huggingface_table_question_answering" needs to be an array of objects.
58-
SELECT {{ data | huggingface_table_question_answering(query="How many repositories related to data-lake topic?") }}
60+
SELECT {{ data | huggingface_table_question_answering(query="How many repositories related to data-lake topic?") }} as result
61+
```
62+
63+
**Sample 1 - Response:**
64+
65+
```json
66+
[
67+
{
68+
"result": "{\"answer\":\"COUNT > vulcan-sql, accio\",\"coordinates\":[[0,0],[1,0]],\"cells\":[\"vulcan-sql\",\"accio\"],\"aggregator\":\"COUNT\"}"
69+
}
70+
]
5971
```
6072

61-
Sample 2:
73+
**Sample 2 - send the data from [req tag](https://vulcansql.com/docs/develop/predefined-queries):**
6274

6375
```sql
64-
{% req products %}
65-
SELECT * FROM products
76+
{% req artists %}
77+
SELECT * FROM artists
6678
{% endreq %}
6779
80+
{% set question = "List display name where gender are female?" %}
81+
6882
-- The "model" keyword argument is optional. If not provided, the default value is 'google/tapas-base-finetuned-wtq'.
6983
-- The "wait_for_model" keyword argument is optional. If not provided, the default value is false.
7084
-- The "use_cache" keyword argument is optional. If not provided, the default value is true.
71-
SELECT {{ products.value() | huggingface_table_question_answering(query="How many products related to 3C type?", model="microsoft/tapex-base-finetuned-wtq", wait_for_model=true, use_cache=true) }}
85+
SELECT {{ products.value() | huggingface_table_question_answering(query=question, model="microsoft/tapex-base-finetuned-wtq", wait_for_model=true, use_cache=true) }}
86+
```
87+
88+
**Sample 2 - Response:**
89+
90+
```json
91+
[
92+
{
93+
"result": "{\"answer\":\"Irene Aronson, Ruth Asawa, Isidora Aschheim, Geneviève Asse, Dana Atchley, Aino Aalto, Berenice Abbott\",\"coordinates\":[[8,1],[16,1],[17,1],[23,1],[25,1],[29,1],[35,1]],\"cells\":[\"Irene Aronson\",\"Ruth Asawa\",\"Isidora Aschheim\",\"Geneviève Asse\",\"Dana Atchley\",\"Aino Aalto\",\"Berenice Abbott\"],\"aggregator\":\"NONE\"}"
94+
}
95+
]
7296
```

packages/extension-huggingface/src/lib/filters/tableQuestionAnswering.ts

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -79,11 +79,8 @@ export const TableQuestionAnsweringFilter: FunctionalFilter = async ({
7979

8080
try {
8181
const results = await request(url, context, token);
82-
// result format, convert to suitable FunctionalFilter response => https://huggingface.co/docs/api-inference/detailed_parameters#question-answering-task
83-
if (!results.aggregator || results.aggregator === 'NONE')
84-
// trim the beginning & ending space if model returned answer exist the space, e.g: ' hello world'
85-
return (results.answer as string).trim();
86-
return results.cells.join(', ');
82+
// convert to JSON string to make user get the whole result after parsing it in SQL
83+
return JSON.stringify(results);
8784
} catch (error) {
8885
throw new InternalError(
8986
`Error when sending data to Hugging Face for executing TableQuestionAnswering tasks, details: ${

packages/extension-huggingface/test/tableQuestionAnswering.spec.ts

Lines changed: 49 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ const data = [
2626
description: 'Query Your Data Warehouse Like Exploring One Big View.',
2727
},
2828
{
29-
repository: 'hell-word',
29+
repository: 'hello-world',
3030
stars: 0,
3131
topic: [],
3232
description: 'Sample repository for testing',
@@ -151,6 +151,10 @@ it(
151151
it(
152152
'Should get correct expected value when provided "neulab/omnitab-large-1024shot-finetuned-wtq-1024shot" model and wait it for model',
153153
async () => {
154+
const expected = JSON.stringify({
155+
// neulab/omnitab-large-1024shot-finetuned-wtq-1024shot will return the result including space in the beginning of the vulcan-sql -> ' vulcan-sql'
156+
answer: ' vulcan-sql',
157+
});
154158
const token = process.env['HF_ACCESS_TOKEN'];
155159
const { compileAndLoad, execute, getExecutedQueries, getCreatedBinding } =
156160
await getTestCompiler({
@@ -173,28 +177,62 @@ it(
173177
const bindings = await getCreatedBinding();
174178

175179
expect(queries[0]).toBe('SELECT $1');
176-
expect(bindings[0].get('$1')).toEqual('vulcan-sql');
180+
expect(bindings[0].get('$1')).toEqual(expected);
177181
},
178182
50 * 1000
179183
);
180184

181185
it.each([
182-
{ question: 'what repository has most stars?', expected: 'vulcan-sql' },
183-
{ question: 'what repository has lowest stars?', expected: 'hell-word' },
186+
{
187+
question: 'what repository has most stars?',
188+
expected: {
189+
answer: 'vulcan-sql',
190+
coordinates: [[0, 0]],
191+
cells: ['vulcan-sql'],
192+
aggregator: 'NONE',
193+
},
194+
},
195+
{
196+
question: 'what repository has lowest stars?',
197+
expected: {
198+
answer: 'hello-world',
199+
coordinates: [[2, 0]],
200+
cells: ['hello-world'],
201+
aggregator: 'NONE',
202+
},
203+
},
184204
{
185205
question: 'How many stars does the vulcan-sql repository have?',
186-
expected: '1000',
206+
expected: {
207+
answer: 'SUM > 1000',
208+
coordinates: [[0, 1]],
209+
cells: ['1000'],
210+
aggregator: 'SUM',
211+
},
187212
},
188213
{
189214
question: 'How many stars does the accio repository have?',
190-
expected: '500',
215+
expected: {
216+
answer: 'AVERAGE > 500',
217+
coordinates: [[1, 1]],
218+
cells: ['500'],
219+
aggregator: 'AVERAGE',
220+
},
191221
},
192222
{
193223
question: 'How many repositories related to data-lake topic?',
194-
expected: 'vulcan-sql, accio',
224+
expected: {
225+
answer: 'COUNT > vulcan-sql, accio',
226+
coordinates: [
227+
[0, 0],
228+
[1, 0],
229+
],
230+
cells: ['vulcan-sql', 'accio'],
231+
aggregator: 'COUNT',
232+
},
195233
},
196234
])(
197-
'Should get correct expected $answer when asking $question',
235+
'Should get correct expected answer when asking question',
198236
async ({ question, expected }) => {
199237
// Arrange
200238

@@ -220,7 +258,9 @@ it.each([
220258
const bindings = await getCreatedBinding();
221259

222260
expect(queries[0]).toBe('SELECT $1');
223-
expect(bindings[0].get('$1')).toEqual(expected);
261+
// parse the result to object and match the expected value
262+
const result = JSON.parse(bindings[0].get('$1'));
263+
expect(result).toEqual(expected);
224264
},
225265
50 * 1000
226266
);

0 commit comments

Comments
 (0)