Skip to content

Commit d2be215

Browse files
committed
docstring
1 parent c94016f commit d2be215

1 file changed

Lines changed: 70 additions & 0 deletions

File tree

FlagEmbedding/abc/inference/AbsEmbedder.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,17 @@ def __init__(
8383

8484
@staticmethod
8585
def get_target_devices(devices: Union[str, int, List[str], List[int]]) -> List[str]:
86+
"""
87+
88+
Args:
89+
devices (Union[str, int, List[str], List[int]]): specified devices, can be `str`, `int`, list of `str`, or list of `int`.
90+
91+
Raises:
92+
ValueError: devices should be a string or an integer or a list of strings or a list of integers.
93+
94+
Returns:
95+
List[str]: a list of target devices in format
96+
"""
8697
if devices is None:
8798
if torch.cuda.is_available():
8899
return [f"cuda:{i}" for i in range(torch.cuda.device_count())]
@@ -108,6 +119,16 @@ def get_target_devices(devices: Union[str, int, List[str], List[int]]) -> List[s
108119

109120
@staticmethod
110121
def get_detailed_instruct(instruction_format: str, instruction: str, sentence: str):
122+
"""Combine the instruction and sentence along with the instruction format.
123+
124+
Args:
125+
instruction_format (str): Format for instruction.
126+
instruction (str): The text of instruction.
127+
sentence (str): The sentence to concatenate with.
128+
129+
Returns:
130+
str: the complete sentence with instruction
131+
"""
111132
return instruction_format.format(instruction, sentence)
112133

113134
def encode_queries(
@@ -118,6 +139,18 @@ def encode_queries(
118139
convert_to_numpy: Optional[bool] = None,
119140
**kwargs: Any
120141
):
142+
"""encode the queries using the instruction if provided.
143+
144+
Args:
145+
queries (Union[List[str], str]): Input queries to encode.
146+
batch_size (Optional[int], optional): Number of sentences for each iter. Defaults to None.
147+
max_length (Optional[int], optional): Maximum length of tokens. Defaults to None.
148+
convert_to_numpy (Optional[bool], optional): If True, the output embedding will be a Numpy array. Otherwise, it will
149+
be a Torch Tensor. Defaults to None.
150+
151+
Returns:
152+
Union[torch.Tensor, np.ndarray]: return the embedding vectors in a numpy array or tensor.
153+
"""
121154
if batch_size is None: batch_size = self.batch_size
122155
if max_length is None: max_length = self.query_max_length
123156
if convert_to_numpy is None: convert_to_numpy = self.convert_to_numpy
@@ -140,6 +173,18 @@ def encode_corpus(
140173
convert_to_numpy: Optional[bool] = None,
141174
**kwargs: Any
142175
):
176+
"""encode the corpus using the instruction if provided.
177+
178+
Args:
179+
corpus (Union[List[str], str]): Input corpus to encode.
180+
batch_size (Optional[int], optional): Number of sentences for each iter. Defaults to None.
181+
max_length (Optional[int], optional): Maximum length of tokens. Defaults to None.
182+
convert_to_numpy (Optional[bool], optional): If True, the output embedding will be a Numpy array. Otherwise, it will
183+
be a Torch Tensor. Defaults to None.
184+
185+
Returns:
186+
Union[torch.Tensor, np.ndarray]: return the embedding vectors in a numpy array or tensor.
187+
"""
143188
passage_instruction_for_retrieval = self.kwargs.get("passage_instruction_for_retrieval", None)
144189
passage_instruction_format = self.kwargs.get("passage_instruction_format", "{}{}")
145190

@@ -167,6 +212,20 @@ def encode(
167212
instruction_format: Optional[str] = None,
168213
**kwargs: Any
169214
):
215+
"""encode the input sentences with the embedding model.
216+
217+
Args:
218+
sentences (Union[List[str], str]): Input sentences to encode.
219+
batch_size (Optional[int], optional): Number of sentences for each iter. Defaults to None.
220+
max_length (Optional[int], optional): Maximum length of tokens. Defaults to None.
221+
convert_to_numpy (Optional[bool], optional): If True, the output embedding will be a Numpy array. Otherwise, it will
222+
be a Torch Tensor. Defaults to None.
223+
instruction (Optional[str], optional): The text of instruction. Defaults to None.
224+
instruction_format (Optional[str], optional): Format for instruction. Defaults to None.
225+
226+
Returns:
227+
Union[torch.Tensor, np.ndarray]: return the embedding vectors in a numpy array or tensor.
228+
"""
170229
if batch_size is None: batch_size = self.batch_size
171230
if max_length is None: max_length = self.passage_max_length
172231
if convert_to_numpy is None: convert_to_numpy = self.convert_to_numpy
@@ -338,6 +397,17 @@ def encode_multi_process(
338397
return embeddings
339398

340399
def _concatenate_results_from_multi_process(self, results_list: List[Union[torch.Tensor, np.ndarray, Any]]):
400+
"""concatenate and return the results from all the processes
401+
402+
Args:
403+
results_list (List[Union[torch.Tensor, np.ndarray, Any]]): a list of results from all the processes
404+
405+
Raises:
406+
NotImplementedError: Unsupported type for results_list
407+
408+
Returns:
409+
Union[torch.Tensor, np.ndarray]: return the embedding vectors in a numpy array or tensor.
410+
"""
341411
if isinstance(results_list[0], torch.Tensor):
342412
return torch.cat(results_list, dim=0)
343413
elif isinstance(results_list[0], np.ndarray):

0 commit comments

Comments
 (0)