Skip to content

Commit d8865d6

Browse files
authored
Merge pull request #243 from Canner/feature/huggingface-table-question-answering
Feature: HuggingFace TableQuestionAnswering filter
2 parents 9025811 + 9375bfe commit d8865d6

24 files changed

Lines changed: 870 additions & 8 deletions

packages/cli/test/cli.spec.ts

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -47,14 +47,18 @@ afterEach(async () => {
4747
await runShutdownJobs();
4848
});
4949

50-
it('Init command should create new folder with default config', async () => {
51-
// Action
52-
const config: any = jsYAML.load(
53-
await fs.readFile(path.resolve(projectRoot, 'vulcan.yaml'), 'utf8')
54-
);
55-
// Assert
56-
expect(config.name).toBe(projectName);
57-
});
50+
it(
51+
'Init command should create new folder with default config',
52+
async () => {
53+
// Action
54+
const config: any = jsYAML.load(
55+
await fs.readFile(path.resolve(projectRoot, 'vulcan.yaml'), 'utf8')
56+
);
57+
// Assert
58+
expect(config.name).toBe(projectName);
59+
},
60+
10 * 1000
61+
);
5862

5963
it('Build command should make result.json', async () => {
6064
// Action

packages/core/src/lib/functional-extensions/filter.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ export interface FunctionalFilterOptions {
1010
value: any;
1111
args: Record<string, any>;
1212
metadata: NunjucksExecutionMetadata;
13+
// The options from configuration for the filter extension
14+
options: Record<string, any> | Array<Record<string, any>>;
1315
}
1416

1517
export type FunctionalFilter = (
@@ -32,6 +34,7 @@ export const createFilterExtension = (
3234
value: options.value,
3335
args: options.args[0],
3436
metadata: options.metadata,
37+
options: this.getConfig(),
3538
});
3639
}
3740
}

packages/core/src/lib/functional-extensions/tag.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ export interface FunctionalTagOptions {
1111
sql: string;
1212
args: Record<string, any>;
1313
metadata: NunjucksExecutionMetadata;
14+
// The options from configuration for the tag extension
15+
options: Record<string, any> | Array<Record<string, any>>;
1416
}
1517

1618
export type FunctionalTag = (options: FunctionalTagOptions) => Promise<string>;
@@ -57,6 +59,7 @@ export const createTagExtension = (
5759
sql,
5860
args: args[0] as any,
5961
metadata,
62+
options: this.getConfig(),
6063
});
6164
return new nunjucks.runtime.SafeString(result);
6265
}
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
# Hugging Face
2+
3+
## Installation
4+
5+
1. Install the package:
6+
7+
**If you are developing with binary, the package is already bundled in the binary. You can skip this step.**
8+
9+
```bash
10+
npm i @vulcan-sql/extension-huggingface
11+
```
12+
13+
2. Update your `vulcan.yaml` file to enable the extension:
14+
15+
```yaml
16+
extensions:
17+
...
18+
// highlight-next-line
19+
hf: '@vulcan-sql/extension-huggingface'
20+
21+
// highlight-next-line
22+
hf:
23+
// highlight-next-line
24+
# Required: Hugging Face access token, see: https://huggingface.co/docs/hub/security-tokens
25+
// highlight-next-line
26+
accessToken: 'your-huggingface-access-token'
27+
```
28+
29+
## Using Hugging Face
30+
31+
VulcanSQL support using Hugging Face tasks by [VulcanSQL Filters](https://vulcansql.com/docs/develop/advance#filters) statement.
32+
33+
:::caution
34+
Hugging Face has a [rate limit](https://huggingface.co/docs/api-inference/faq#rate-limits), so it does not allow sending large datasets to the Hugging Face library for processing.
35+
36+
Otherwise, using a different Hugging Face model may yield different results or even result in failure.
37+
:::
38+
39+
### Table Question Answering
40+
41+
The [Table Question Answering](https://huggingface.co/docs/api-inference/detailed_parameters#table-question-answering-task) is one of the Natural Language Processing tasks supported by Hugging Face.
42+
43+
Using the `huggingface_table_question_answering` filter.
44+
45+
Sample 1:
46+
47+
```sql
48+
{% set data = [
49+
{
50+
"repository": "vulcan-sql",
51+
"topic": ["analytics", "data-lake", "data-warehouse", "api-builder"],
52+
"description":"Create and share Data APIs fast! Data API framework for DuckDB, ClickHouse, Snowflake, BigQuery, PostgreSQL"
53+
},
54+
{
55+
"repository": "accio",
56+
"topic": ["data-analytics", "data-lake", "data-warehouse", "bussiness-intelligence"],
57+
"description": "Query Your Data Warehouse Like Exploring One Big View."
58+
},
59+
{
60+
"repository": "hell-word",
61+
"topic": [],
62+
"description": "Sample repository for testing"
63+
}
64+
] %}
65+
66+
-- The source data for "huggingface_table_question_answering" needs to be an array of objects.
67+
SELECT {{ data | huggingface_table_question_answering(query="How many repositories related to data-lake topic?") }}
68+
```
69+
70+
Sample 2:
71+
72+
```sql
73+
{% req products %}
74+
SELECT * FROM products
75+
{% endreq %}
76+
77+
SELECT {{ products.value() | huggingface_table_question_answering(query="How many products related to 3C type?", model="microsoft/tapex-base-finetuned-wtq", wait_for_model=true, use_cache=true) }}
78+
```
79+
80+
### Arguments
81+
82+
Please check [Table Question Answering](https://huggingface.co/docs/api-inference/detailed_parameters#table-question-answering-task) for further information.
83+
84+
| Name | Required | Default | Description |
85+
| -------------- | -------- | ------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- |
86+
| query | Y | | The query in plain text that you want to ask the table. |
87+
| model | N | google/tapas-base-finetuned-wtq | The model id of a pretrained model hosted inside a model repo on huggingface.co. See: https://huggingface.co/models?pipeline_tag=table-question-answering |
88+
| use_cache | N | true | There is a cache layer on the inference API to speedup requests we have already seen |
89+
| wait_for_model | N | false | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done |
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"extends": ["../../.eslintrc.json"],
3+
"ignorePatterns": ["!**/*"],
4+
"overrides": [
5+
{
6+
"files": ["*.ts", "*.tsx", "*.js", "*.jsx"],
7+
"rules": {}
8+
},
9+
{
10+
"files": ["*.ts", "*.tsx"],
11+
"rules": {}
12+
},
13+
{
14+
"files": ["*.js", "*.jsx"],
15+
"rules": {}
16+
}
17+
]
18+
}
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# @canner/canner-storage used files for running test cases
2+
.env
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
# extension-huggingface
2+
3+
Supporting Hugging Face Inference API task for VulcanSQL, provided by [Canner](https://canner.io/).
4+
5+
## Installation
6+
7+
1. Install the package:
8+
9+
```bash
10+
npm i @vulcan-sql/extension-huggingface
11+
```
12+
13+
2. Update your `vulcan.yaml` file to enable the extension:
14+
15+
```yaml
16+
extensions:
17+
hf: '@vulcan-sql/extension-huggingface'
18+
19+
hf:
20+
# Required: Hugging Face access token, see: https://huggingface.co/docs/hub/security-tokens
21+
accessToken: 'your-huggingface-access-token'
22+
```
23+
24+
## Using Hugging Face
25+
26+
VulcanSQL support using Hugging Face tasks by [VulcanSQL Filters](https://vulcansql.com/docs/develop/advance#filters) statement.
27+
28+
**⚠️ Caution**: Hugging Face has a [rate limit](https://huggingface.co/docs/api-inference/faq#rate-limits), so it does not allow sending large datasets to the Hugging Face library for processing. Otherwise, using a different Hugging Face model may yield different results or even result in failure.
29+
30+
### Table Question Answering
31+
32+
The [Table Question Answering](https://huggingface.co/docs/api-inference/detailed_parameters#table-question-answering-task) is one of the Natural Language Processing tasks supported by Hugging Face.
33+
34+
Using the `huggingface_table_question_answering` filter.
35+
36+
Sample 1:
37+
38+
```sql
39+
{% set data = [
40+
{
41+
"repository": "vulcan-sql",
42+
"topic": ["analytics", "data-lake", "data-warehouse", "api-builder"],
43+
"description":"Create and share Data APIs fast! Data API framework for DuckDB, ClickHouse, Snowflake, BigQuery, PostgreSQL"
44+
},
45+
{
46+
"repository": "accio",
47+
"topic": ["data-analytics", "data-lake", "data-warehouse", "bussiness-intelligence"],
48+
"description": "Query Your Data Warehouse Like Exploring One Big View."
49+
},
50+
{
51+
"repository": "hell-word",
52+
"topic": [],
53+
"description": "Sample repository for testing"
54+
}
55+
] %}
56+
57+
-- The source data for "huggingface_table_question_answering" needs to be an array of objects.
58+
SELECT {{ data | huggingface_table_question_answering(query="How many repositories related to data-lake topic?") }}
59+
```
60+
61+
Sample 2:
62+
63+
```sql
64+
{% req products %}
65+
SELECT * FROM products
66+
{% endreq %}
67+
68+
-- The "model" keyword argument is optional. If not provided, the default value is 'google/tapas-base-finetuned-wtq'.
69+
-- The "wait_for_model" keyword argument is optional. If not provided, the default value is false.
70+
-- The "use_cache" keyword argument is optional. If not provided, the default value is true.
71+
SELECT {{ products.value() | huggingface_table_question_answering(query="How many products related to 3C type?", model="microsoft/tapex-base-finetuned-wtq", wait_for_model=true, use_cache=true) }}
72+
```
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
module.exports = {
2+
displayName: 'extension-huggingface',
3+
preset: '../../jest.preset.ts',
4+
testEnvironment: 'node',
5+
globals: {
6+
'ts-jest': {
7+
tsconfig: '<rootDir>/tsconfig.spec.json',
8+
},
9+
},
10+
transform: {
11+
'^.+\\.[tj]s$': 'ts-jest',
12+
},
13+
moduleFileExtensions: ['ts', 'js', 'html'],
14+
coverageDirectory: '../../coverage/packages/extension-huggingface',
15+
};
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
{
2+
"name": "@vulcan-sql/extension-huggingface",
3+
"description": "Hugging Face feature for VulcanSQL",
4+
"version": "0.6.0",
5+
"type": "commonjs",
6+
"publishConfig": {
7+
"access": "public"
8+
},
9+
"keywords": [
10+
"vulcan",
11+
"vulcan-sql",
12+
"data",
13+
"sql",
14+
"database",
15+
"data-warehouse",
16+
"data-lake",
17+
"api-builder",
18+
"huggingface"
19+
],
20+
"repository": {
21+
"type": "git",
22+
"url": "https://github.com/Canner/vulcan.git"
23+
},
24+
"license": "MIT",
25+
"peerDependencies": {
26+
"@vulcan-sql/core": "~0.6.0-0"
27+
}
28+
}
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
{
2+
"root": "packages/extension-huggingface",
3+
"sourceRoot": "packages/extension-huggingface/src",
4+
"targets": {
5+
"build": {
6+
"executor": "@nrwl/workspace:run-commands",
7+
"options": {
8+
"command": "yarn ts-node ./tools/scripts/replaceAlias.ts extension-huggingface"
9+
},
10+
"dependsOn": [
11+
{
12+
"projects": "self",
13+
"target": "tsc"
14+
},
15+
{
16+
"projects": "self",
17+
"target": "install-dependencies"
18+
}
19+
]
20+
},
21+
"tsc": {
22+
"executor": "@nrwl/js:tsc",
23+
"outputs": ["{options.outputPath}"],
24+
"options": {
25+
"outputPath": "dist/packages/extension-huggingface",
26+
"main": "packages/extension-huggingface/src/index.ts",
27+
"tsConfig": "packages/extension-huggingface/tsconfig.lib.json",
28+
"assets": ["packages/extension-huggingface/*.md"],
29+
"buildableProjectDepsInPackageJsonType": "dependencies"
30+
},
31+
"dependsOn": [
32+
{
33+
"projects": "dependencies",
34+
"target": "build"
35+
},
36+
{
37+
"projects": "self",
38+
"target": "install-dependencies"
39+
}
40+
]
41+
},
42+
"lint": {
43+
"executor": "@nrwl/linter:eslint",
44+
"outputs": ["{options.outputFile}"],
45+
"options": {
46+
"lintFilePatterns": ["packages/extension-huggingface/**/*.ts"]
47+
}
48+
},
49+
"test": {
50+
"executor": "@nrwl/jest:jest",
51+
"outputs": ["coverage/packages/extension-huggingface"],
52+
"options": {
53+
"jestConfig": "packages/extension-huggingface/jest.config.ts",
54+
"passWithNoTests": true
55+
},
56+
"dependsOn": [
57+
{
58+
"projects": "self",
59+
"target": "install-dependencies"
60+
}
61+
]
62+
},
63+
"publish": {
64+
"executor": "@nrwl/workspace:run-commands",
65+
"options": {
66+
"command": "node ../../../tools/scripts/publish.mjs {args.tag} {args.version}",
67+
"cwd": "dist/packages/extension-huggingface"
68+
},
69+
"dependsOn": [
70+
{
71+
"projects": "self",
72+
"target": "build"
73+
}
74+
]
75+
},
76+
"install-dependencies": {
77+
"executor": "@nrwl/workspace:run-commands",
78+
"options": {
79+
"command": "yarn",
80+
"cwd": "packages/extension-huggingface"
81+
}
82+
}
83+
},
84+
"tags": []
85+
}

0 commit comments

Comments
 (0)