chore(extension-huggingface): add logistic of check checking "query" has value with test cases for huggingface filter

kokokuo · kokokuo · commit 5e22e5117c10 · 2023-08-02T22:25:15.000+08:00
- fix grammar in README.
- fix the section of document .
- add logistic for checking query has value with test cases
diff --git a/packages/doc/docs/extensions/huggingface/huggingface-text-generation.mdx b/packages/doc/docs/extensions/huggingface/huggingface-text-generation.mdx
@@ -6,8 +6,8 @@ The [Text Generation](https://huggingface.co/docs/api-inference/detailed_paramet
 
 The result will be a string from `huggingface_text_generation`.
 
-:::📢 Notice
- The **Text Generation** default model is **gpt2**, If you would like to use the [Meta LLama2](https://huggingface.co/meta-llama) models, you have two method to do:
+:::info
+ The **Text Generation** default model is **gpt2**, If you would like to use the [Meta LLama2](https://huggingface.co/meta-llama) models, you have two methods to do:
 
  1. Subscribe to the [Pro Account](https://huggingface.co/pricing#pro). 
   - Set the Meta LLama2 model using the `model` keyword argument in `huggingface_text_generation`, e.g: `meta-llama/Llama-2-13b-chat-hf`.
diff --git a/packages/extension-huggingface/README.md b/packages/extension-huggingface/README.md
@@ -112,7 +112,7 @@ The [Text Generation](https://huggingface.co/docs/api-inference/detailed_paramet
 
 Using the `huggingface_text_generation` filter. The result will be a string from `huggingface_text_generation`.
 
-**📢 Notice**: The **Text Generation** default model is **gpt2**, If you would like to use the [Meta LLama2](https://huggingface.co/meta-llama) models, you have two method to do:
+**📢 Notice**: The **Text Generation** default model is **gpt2**, If you would like to use the [Meta LLama2](https://huggingface.co/meta-llama) models, you have two methods to do:
 
 1. Subscribe to the [Pro Account](https://huggingface.co/pricing#pro). 
  - Set the Meta LLama2 model using the `model` keyword argument in `huggingface_text_generation`, e.g: `meta-llama/Llama-2-13b-chat-hf`.
diff --git a/packages/extension-huggingface/src/index.ts b/packages/extension-huggingface/src/index.ts
@@ -5,13 +5,13 @@ import {
 } from './lib/filters/tableQuestionAnswering';
 
 import {
-  Builder as TextGenerationFilterBuilder,
-  Runner as TextGenerationFilterRunner,
+  Builder as HuggingFaceTextGenerationFilterBuilder,
+  Runner as HuggingFaceTextGenerationFilterRunner,
 } from './lib/filters/textGeneration';
 
 export default [
   HuggingFaceTableQuestionAnsweringFilterBuilder,
   HuggingFaceTableQuestionAnsweringFilterRunner,
-  TextGenerationFilterBuilder,
-  TextGenerationFilterRunner,
+  HuggingFaceTextGenerationFilterBuilder,
+  HuggingFaceTextGenerationFilterRunner,
 ];
diff --git a/packages/extension-huggingface/src/lib/filters/tableQuestionAnswering.ts b/packages/extension-huggingface/src/lib/filters/tableQuestionAnswering.ts
@@ -40,6 +40,8 @@ export const TableQuestionAnsweringFilter: FunctionalFilter = async ({
     throw new InternalError('Input value must be an array of object');
   if (!(typeof args === 'object') || !has(args, 'query'))
     throw new InternalError('Must provide "query" keyword argument');
+  if (!args['query'])
+    throw new InternalError('The "query" argument must have value');
 
   // Convert the data result format to table value format
   const table = convertToHuggingFaceTable(value);
diff --git a/packages/extension-huggingface/src/lib/filters/textGeneration.ts b/packages/extension-huggingface/src/lib/filters/textGeneration.ts
@@ -55,6 +55,8 @@ export const TextGenerationFilter: FunctionalFilter = async ({
     throw new InternalError('Input value must be an array of object');
   if (!(typeof args === 'object') || !has(args, 'query'))
     throw new InternalError('Must provide "query" keyword argument');
+  if (!args['query'])
+    throw new InternalError('The "query" argument must have value');
 
   // Convert the data result to JSON string as question context
   const context = JSON.stringify(value);
diff --git a/packages/extension-huggingface/test/tableQuestionAnswering.spec.ts b/packages/extension-huggingface/test/tableQuestionAnswering.spec.ts
@@ -34,6 +34,58 @@ describe('Test "huggingface_table_question_answering" filter', () => {
     50 * 1000
   );
 
+  it(
+    'Should throw error when pass the "query" argument but value is undefined',
+    async () => {
+      const token = process.env['HF_ACCESS_TOKEN'];
+      const { compileAndLoad, execute } = await getTestCompiler({
+        extensions: { huggingface: path.join(__dirname, '..', 'src') },
+        huggingface: {
+          accessToken: token,
+        },
+      });
+
+      const sql = `{% set data = ${JSON.stringify(
+        repositories
+      )} %}SELECT {{ data | huggingface_table_question_answering(query=undefined) }}`;
+
+      // Act
+      await compileAndLoad(sql);
+
+      // Assert
+      await expect(execute({})).rejects.toThrow(
+        'The "query" argument must have value'
+      );
+    },
+    50 * 1000
+  );
+
+  it(
+    'Should throw error when pass the "query" argument but value is empty string',
+    async () => {
+      const token = process.env['HF_ACCESS_TOKEN'];
+      const { compileAndLoad, execute } = await getTestCompiler({
+        extensions: { huggingface: path.join(__dirname, '..', 'src') },
+        huggingface: {
+          accessToken: token,
+        },
+      });
+
+      const sql = `{% set data = ${JSON.stringify(
+        repositories
+      )} %}SELECT {{ data | huggingface_table_question_answering(query='') }}`;
+
+      // Act
+      await compileAndLoad(sql);
+
+      // Assert
+      await expect(execute({})).rejects.toThrow(
+        'The "query" argument must have value'
+      );
+    },
+    50 * 1000
+  );
+
   it('Should throw error when input value not be array of object', async () => {
     const token = process.env['HF_ACCESS_TOKEN'];
     const { compileAndLoad, execute } = await getTestCompiler({
diff --git a/packages/extension-huggingface/test/textGeneration.spec.ts b/packages/extension-huggingface/test/textGeneration.spec.ts
@@ -33,6 +33,58 @@ describe('Test "huggingface_text_generation" filter', () => {
     50 * 1000
   );
 
+  it(
+    'Should throw error when pass the "query" argument but value is undefined',
+    async () => {
+      const token = process.env['HF_ACCESS_TOKEN'];
+      const { compileAndLoad, execute } = await getTestCompiler({
+        extensions: { huggingface: path.join(__dirname, '..', 'src') },
+        huggingface: {
+          accessToken: token,
+        },
+      });
+
+      const sql = `{% set data = ${JSON.stringify(
+        repositories
+      )} %}SELECT {{ data | huggingface_text_generation(query=undefined) }}`;
+
+      // Act
+      await compileAndLoad(sql);
+
+      // Assert
+      await expect(execute({})).rejects.toThrow(
+        'The "query" argument must have value'
+      );
+    },
+    50 * 1000
+  );
+
+  it(
+    'Should throw error when pass the "query" argument but value is empty string',
+    async () => {
+      const token = process.env['HF_ACCESS_TOKEN'];
+      const { compileAndLoad, execute } = await getTestCompiler({
+        extensions: { huggingface: path.join(__dirname, '..', 'src') },
+        huggingface: {
+          accessToken: token,
+        },
+      });
+
+      const sql = `{% set data = ${JSON.stringify(
+        repositories
+      )} %}SELECT {{ data | huggingface_text_generation(query='') }}`;
+
+      // Act
+      await compileAndLoad(sql);
+
+      // Assert
+      await expect(execute({})).rejects.toThrow(
+        'The "query" argument must have value'
+      );
+    },
+    50 * 1000
+  );
+
   it('Should throw error when input value not be array of object', async () => {
     const token = process.env['HF_ACCESS_TOKEN'];
     const { compileAndLoad, execute } = await getTestCompiler({
@@ -75,27 +127,27 @@ describe('Test "huggingface_text_generation" filter', () => {
     },
     50 * 1000
   );
-  
+
   it(
-    'Should not throw when pass the "query" argument by dynamic parameter through HuggingFace recommended model',
+    'Should not throw when passing the "query" argument by dynamic parameter through HuggingFace default recommended "gpt2" model',
     async () => {
       const token = process.env['HF_ACCESS_TOKEN'];
-      const { compileAndLoad, execute } =
-        await getTestCompiler({
-          extensions: { huggingface: path.join(__dirname, '..', 'src') },
-          huggingface: {
-            accessToken: token,
-          },
-        });
+      const { compileAndLoad, execute } = await getTestCompiler({
+        extensions: { huggingface: path.join(__dirname, '..', 'src') },
+        huggingface: {
+          accessToken: token,
+        },
+      });
 
       const sql = `{% set data = ${JSON.stringify(
         repositories
       )} %}SELECT {{ data | huggingface_text_generation(query=context.params.value, wait_for_model=true, use_cache=false) }}`;
 
-      
       await compileAndLoad(sql);
       // Assert
-      await expect(execute({ value: 'what repository has most stars?' })).resolves.not.toThrow();
+      await expect(
+        execute({ value: 'what repository has most stars?' })
+      ).resolves.not.toThrow();
     },
     100 * 1000
   );
@@ -117,16 +169,17 @@ describe('Test "huggingface_text_generation" filter', () => {
         repositories
       )} %}SELECT {{ data | huggingface_text_generation(query=context.params.value,model="meta-llama/Llama-2-13b-chat-hf", wait_for_model=true, use_cache=false) }}`;
 
-      
       await compileAndLoad(sql);
       await execute({ value: 'what repository has most stars?' });
 
       // Assert
       const queries = await getExecutedQueries();
       const bindings = await getCreatedBinding();
-      
+
       expect(queries[0]).toBe('SELECT $1');
-      expect(bindings[0].get('$1')).toEqual('Answer: Based on the information provided, the repository with the most stars is "vulcan-sql" with 1000 stars.');
+      expect(bindings[0].get('$1')).toEqual(
+        'Answer: Based on the information provided, the repository with the most stars is "vulcan-sql" with 1000 stars.'
+      );
     },
     100 * 1000
   );
@@ -135,19 +188,23 @@ describe('Test "huggingface_text_generation" filter', () => {
   it.skip.each([
     {
       question: 'what repository has most stars?',
-      expected: 'Answer: Based on the information provided, the repository with the most stars is "vulcan-sql" with 1000 stars.'
+      expected:
+        'Answer: Based on the information provided, the repository with the most stars is "vulcan-sql" with 1000 stars.',
     },
     {
       question: 'what repository has lowest stars?',
-      expected: 'Answer: Based on the information provided, the repository with the lowest stars is "hello-world" with 0 stars.'
+      expected:
+        'Answer: Based on the information provided, the repository with the lowest stars is "hello-world" with 0 stars.',
     },
     {
       question: 'How many stars does the vulcan-sql repository have?',
-      expected: 'Answer: Based on the information provided, the vulcan-sql repository has 1000 stars.'
+      expected:
+        'Answer: Based on the information provided, the vulcan-sql repository has 1000 stars.',
     },
     {
       question: 'How many stars does the accio repository have?',
-      expected: 'Answer: Based on the information provided, the accio repository has 500 stars.'
+      expected:
+        'Answer: Based on the information provided, the accio repository has 500 stars.',
     },
     {
       question: 'How many repositories related to data-lake topic?',
@@ -156,7 +213,7 @@ describe('Test "huggingface_text_generation" filter', () => {
       1. vulcan-sql
       2. accio
       
-      Both of these repositories have the data-lake topic in their description.`
+      Both of these repositories have the data-lake topic in their description.`,
     },
   ])(
     'Should get "$expected" answer when asking "$question" through "meta-llama/Llama-2-13b-chat-hf" model',