feat(extension-huggingface): refactor for reusing common feature and refactor huggingface filter logistic

kokokuo · kokokuo · commit 75b862564041 · 2023-07-31T18:06:22.000+08:00
diff --git a/packages/extension-huggingface/src/lib/filters/tableQuestionAnswering.ts b/packages/extension-huggingface/src/lib/filters/tableQuestionAnswering.ts
@@ -3,82 +3,56 @@ import {
   InternalError,
   createFilterExtension,
 } from '@vulcan-sql/core';
-import axios, { AxiosError } from 'axios';
-import { convertToHuggingFaceTable } from '../utils';
-import { isArray } from 'class-validator';
-import { has } from 'lodash';
 
-type HuggingFaceOptions = {
-  accessToken: string;
-};
+import { convertToHuggingFaceTable, postRequest } from '../utils';
+import { has, isArray, isEmpty, omit } from 'lodash';
+import {
+  InferenceNLPOptions,
+  HuggingFaceOptions,
+  apiInferenceEndpoint,
+} from '../model';
 
-// More information described the options, see: https://huggingface.co/docs/api-inference/detailed_parameters#table-question-answering-task
+// More information described the options. See: https://huggingface.co/docs/api-inference/detailed_parameters#table-question-answering-task
 type TableQuestionAnsweringOptions = {
   inputs: {
     query: string;
     table: Record<string, string[]>;
   };
-  options: {
-    use_cache: boolean;
-    wait_for_model: boolean;
-  };
-};
-
-const request = async (url: string, data: any, token: string) => {
-  try {
-    const result = await axios.post(url, data, {
-      headers: { Authorization: `Bearer ${token}` },
-    });
-    return result.data;
-  } catch (error) {
-    const axiosError = error as AxiosError;
-    // https://axios-http.com/docs/handling_errors
-    // if response has error, throw the response error, or throw the request error
-    if (axiosError.response)
-      throw new Error(JSON.stringify(axiosError.response?.data));
-    throw new Error(axiosError.message);
-  }
+  options?: InferenceNLPOptions;
 };
 
-// default recommended model, see https://huggingface.co/docs/api-inference/detailed_parameters#table-question-answering-task
+/**
+ * Get table question answering url. Used recommend model be default value.
+ * See: https://huggingface.co/docs/api-inference/detailed_parameters#table-question-answering-task
+ * */
 const getUrl = (model = 'google/tapas-base-finetuned-wtq') =>
-  `https://api-inference.huggingface.co/models/${model}`;
+  `${apiInferenceEndpoint}/${model}`;
 
 export const TableQuestionAnsweringFilter: FunctionalFilter = async ({
   args,
   value,
   options,
 }) => {
-  if (!options || !(options as HuggingFaceOptions).accessToken)
-    throw new InternalError('please given access token');
+  const token = (options as HuggingFaceOptions)?.accessToken;
+  if (!token) throw new InternalError('please given access token');
 
   if (!isArray(value))
     throw new InternalError('Input value must be an array of object');
-
   if (!(typeof args === 'object') || !has(args, 'query'))
     throw new InternalError('Must provide "query" keyword argument');
-  if (!args['query'])
-    throw new InternalError('The "query" argument must have value');
 
-  const token = (options as HuggingFaceOptions).accessToken;
   // Convert the data result format to table value format
   const table = convertToHuggingFaceTable(value);
-  const context = {
-    inputs: {
-      query: args['query'],
-      table,
-    },
-    options: {
-      use_cache: args['use_cache'] ? args['use_cache'] : true,
-      wait_for_model: args['wait_for_model'] ? args['wait_for_model'] : false,
-    },
+  // omit hidden value '__keywords' from args, it generated from nunjucks and not related to HuggingFace.
+  const { query, model, ...inferenceOptions } = omit(args, '__keywords');
+  const payload = {
+    inputs: { query, table },
   } as TableQuestionAnsweringOptions;
-
-  // Get table question answering url
-  const url = args['model'] ? getUrl(args['model']) : getUrl();
+  if (!isEmpty(inferenceOptions)) payload.options = inferenceOptions;
 
   try {
-    const results = await request(url, context, token);
+    const url = getUrl(model);
+    const results = await postRequest(url, payload, token);
     // convert to JSON string to make user get the whole result after parsing it in SQL
     return JSON.stringify(results);
   } catch (error) {
diff --git a/packages/extension-huggingface/src/lib/model.ts b/packages/extension-huggingface/src/lib/model.ts
@@ -0,0 +1,14 @@
+export type HuggingFaceOptions = {
+  accessToken: string;
+};
+
+export const apiInferenceEndpoint =
+  'https://api-inference.huggingface.co/models';
+
+// For more information. See: https://huggingface.co/docs/api-inference/detailed_parameters#natural-language-processing
+export type InferenceNLPOptions = {
+  // Default: true. There is a cache layer on the inference API to speedup requests we have already seen. Most models can use those results as is as models are deterministic (meaning the results will be the same anyway).
+  use_cache?: boolean;
+  // Default: false. If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done.
+  wait_for_model?: boolean;
+};
diff --git a/packages/extension-huggingface/src/lib/utils/index.ts b/packages/extension-huggingface/src/lib/utils/index.ts
@@ -1 +1,2 @@
 export * from './converter';
+export * from './request';
diff --git a/packages/extension-huggingface/src/lib/utils/request.ts b/packages/extension-huggingface/src/lib/utils/request.ts
@@ -0,0 +1,17 @@
+import axios, { AxiosError } from 'axios';
+
+export const postRequest = async (url: string, data: any, token: string) => {
+  try {
+    const result = await axios.post(url, data, {
+      headers: { Authorization: `Bearer ${token}` },
+    });
+    return result.data;
+  } catch (error) {
+    const axiosError = error as AxiosError;
+    // https://axios-http.com/docs/handling_errors
+    // if response has error, throw the response error, or throw the request error
+    if (axiosError.response)
+      throw new Error(JSON.stringify(axiosError.response?.data));
+    throw new Error(axiosError.message);
+  }
+};
diff --git a/packages/extension-huggingface/test/tableQuestionAnswering.spec.ts b/packages/extension-huggingface/test/tableQuestionAnswering.spec.ts
diff --git a/packages/extension-huggingface/test/test-data/repositories.ts b/packages/extension-huggingface/test/test-data/repositories.ts

Original file line number	Diff line number	Diff line change
`@@ -1 +1,2 @@`
`1`	`1`	`export * from './converter';`
	`2`	`+export * from './request';`