|
18 | 18 | class AbsEmbedder(ABC): |
19 | 19 | """ |
20 | 20 | Base class for embedder. |
21 | | - Extend this class and implement :meth:`encode_queries`, :meth:`encode_passages`, :meth:`encode` for custom embedders. |
| 21 | + Extend this class and implement :meth:`encode_queries`, :meth:`encode_corpus`, :meth:`encode` for custom embedders. |
22 | 22 |
|
23 | 23 | Args: |
24 | 24 | model_name_or_path (str): If it's a path to a local model, it loads the model from the path. Otherwise tries to download and |
25 | 25 | load a model from HuggingFace Hub with the name. |
26 | | - normalize_embeddings (bool, optional): If True, normalize the embedding vector. Default: `True`. |
| 26 | + normalize_embeddings (bool, optional): If True, normalize the embedding vector. Defaults to :data:`True`. |
27 | 27 | use_fp16 (bool, optional): If true, use half-precision floating-point to speed up computation with a slight performance |
28 | | - degradation. Default: `True`. |
| 28 | + degradation. Defaults to :data:`True`. |
29 | 29 | query_instruction_for_retrieval: (Optional[str], optional): Query instruction for retrieval tasks, which will be used with |
30 | | - with :attr:`query_instruction_format`. Default: `None`. |
31 | | - query_instruction_format: (str, optional): The template for :attr:`query_instruction_for_retrieval`. Default: `"{}{}"`. |
32 | | - devices (Optional[Union[str, int, List[str], List[int]]], optional): Devices to use for model inference. Default: `None`. |
33 | | - batch_size (int, optional): Batch size for inference. Default: `256`. |
34 | | - query_max_length (int, optional): Maximum length for query. Default: `512`. |
35 | | - passage_max_length (int, optional): Maximum length for passage. Default: `512`. |
36 | | - instruction (Optional[str], optional): Instruction for embedding with :attr:`instruction_format`. Default: `None`. |
37 | | - instruction_format (str, optional): Instruction format when using :attr:`instruction`. Default: `"{}{}"`. |
| 30 | + with :attr:`query_instruction_format`. Defaults to :data:`None`. |
| 31 | + query_instruction_format: (str, optional): The template for :attr:`query_instruction_for_retrieval`. Defaults to :data:`"{}{}"`. |
| 32 | + devices (Optional[Union[str, int, List[str], List[int]]], optional): Devices to use for model inference. Defaults to :data:`None`. |
| 33 | + batch_size (int, optional): Batch size for inference. Defaults to :data:`256`. |
| 34 | + query_max_length (int, optional): Maximum length for query. Defaults to :data:`512`. |
| 35 | + passage_max_length (int, optional): Maximum length for passage. Defaults to :data:`512`. |
38 | 36 | convert_to_numpy (bool, optional): If True, the output embedding will be a Numpy array. Otherwise, it will be a Torch Tensor. |
39 | | - Default: `True`. |
| 37 | + Defaults to :data:`True`. |
40 | 38 | kwargs (Dict[Any], optional): Additional parameters for HuggingFace Transformers config or children classes. |
41 | 39 | """ |
42 | 40 |
|
@@ -139,10 +137,10 @@ def encode_queries( |
139 | 137 |
|
140 | 138 | Args: |
141 | 139 | queries (Union[List[str], str]): Input queries to encode. |
142 | | - batch_size (Optional[int], optional): Number of sentences for each iter. Defaults to None. |
143 | | - max_length (Optional[int], optional): Maximum length of tokens. Defaults to None. |
| 140 | + batch_size (Optional[int], optional): Number of sentences for each iter. Defaults to :data:`None`. |
| 141 | + max_length (Optional[int], optional): Maximum length of tokens. Defaults to :data:`None`. |
144 | 142 | convert_to_numpy (Optional[bool], optional): If True, the output embedding will be a Numpy array. Otherwise, it will |
145 | | - be a Torch Tensor. Defaults to None. |
| 143 | + be a Torch Tensor. Defaults to :data:`None`. |
146 | 144 |
|
147 | 145 | Returns: |
148 | 146 | Union[torch.Tensor, np.ndarray]: Return the embedding vectors in a numpy array or tensor. |
@@ -173,10 +171,10 @@ def encode_corpus( |
173 | 171 |
|
174 | 172 | Args: |
175 | 173 | corpus (Union[List[str], str]): Input corpus to encode. |
176 | | - batch_size (Optional[int], optional): Number of sentences for each iter. Defaults to None. |
177 | | - max_length (Optional[int], optional): Maximum length of tokens. Defaults to None. |
| 174 | + batch_size (Optional[int], optional): Number of sentences for each iter. Defaults to :data:`None`. |
| 175 | + max_length (Optional[int], optional): Maximum length of tokens. Defaults to :data:`None`. |
178 | 176 | convert_to_numpy (Optional[bool], optional): If True, the output embedding will be a Numpy array. Otherwise, it will |
179 | | - be a Torch Tensor. Defaults to None. |
| 177 | + be a Torch Tensor. Defaults to :data:`None`. |
180 | 178 |
|
181 | 179 | Returns: |
182 | 180 | Union[torch.Tensor, np.ndarray]: Return the embedding vectors in a numpy array or tensor. |
@@ -212,12 +210,12 @@ def encode( |
212 | 210 |
|
213 | 211 | Args: |
214 | 212 | sentences (Union[List[str], str]): Input sentences to encode. |
215 | | - batch_size (Optional[int], optional): Number of sentences for each iter. Defaults to None. |
216 | | - max_length (Optional[int], optional): Maximum length of tokens. Defaults to None. |
| 213 | + batch_size (Optional[int], optional): Number of sentences for each iter. Defaults to :data:`None`. |
| 214 | + max_length (Optional[int], optional): Maximum length of tokens. Defaults to :data:`None`. |
217 | 215 | convert_to_numpy (Optional[bool], optional): If True, the output embedding will be a Numpy array. Otherwise, it will |
218 | | - be a Torch Tensor. Defaults to None. |
219 | | - instruction (Optional[str], optional): The text of instruction. Defaults to None. |
220 | | - instruction_format (Optional[str], optional): Format for instruction. Defaults to None. |
| 216 | + be a Torch Tensor. Defaults to :data:`None`. |
| 217 | + instruction (Optional[str], optional): The text of instruction. Defaults to :data:`None`. |
| 218 | + instruction_format (Optional[str], optional): Format for instruction. Defaults to :data:`None`. |
221 | 219 |
|
222 | 220 | Returns: |
223 | 221 | Union[torch.Tensor, np.ndarray]: return the embedding vectors in a numpy array or tensor. |
@@ -396,7 +394,7 @@ def _concatenate_results_from_multi_process(self, results_list: List[Union[torch |
396 | 394 | """concatenate and return the results from all the processes |
397 | 395 |
|
398 | 396 | Args: |
399 | | - results_list (List[Union[torch.Tensor, np.ndarray, Any]]): a list of results from all the processes |
| 397 | + results_list (List[Union[torch.Tensor, np.ndarray, Any]]): A list of results from all the processes. |
400 | 398 |
|
401 | 399 | Raises: |
402 | 400 | NotImplementedError: Unsupported type for results_list |
|
0 commit comments