@@ -83,6 +83,17 @@ def __init__(
8383
8484 @staticmethod
8585 def get_target_devices (devices : Union [str , int , List [str ], List [int ]]) -> List [str ]:
86+ """
87+
88+ Args:
89+ devices (Union[str, int, List[str], List[int]]): specified devices, can be `str`, `int`, list of `str`, or list of `int`.
90+
91+ Raises:
92+ ValueError: Devices should be a string or an integer or a list of strings or a list of integers.
93+
94+ Returns:
95+ List[str]: A list of target devices in format
96+ """
8697 if devices is None :
8798 if torch .cuda .is_available ():
8899 return [f"cuda:{ i } " for i in range (torch .cuda .device_count ())]
@@ -108,6 +119,16 @@ def get_target_devices(devices: Union[str, int, List[str], List[int]]) -> List[s
108119
109120 @staticmethod
110121 def get_detailed_instruct (instruction_format : str , instruction : str , sentence : str ):
122+ """Combine the instruction and sentence along with the instruction format.
123+
124+ Args:
125+ instruction_format (str): Format for instruction.
126+ instruction (str): The text of instruction.
127+ sentence (str): The sentence to concatenate with.
128+
129+ Returns:
130+ str: The complete sentence with instruction
131+ """
111132 return instruction_format .format (instruction , sentence )
112133
113134 def encode_queries (
@@ -118,6 +139,18 @@ def encode_queries(
118139 convert_to_numpy : Optional [bool ] = None ,
119140 ** kwargs : Any
120141 ):
142+ """encode the queries using the instruction if provided.
143+
144+ Args:
145+ queries (Union[List[str], str]): Input queries to encode.
146+ batch_size (Optional[int], optional): Number of sentences for each iter. Defaults to None.
147+ max_length (Optional[int], optional): Maximum length of tokens. Defaults to None.
148+ convert_to_numpy (Optional[bool], optional): If True, the output embedding will be a Numpy array. Otherwise, it will
149+ be a Torch Tensor. Defaults to None.
150+
151+ Returns:
152+ Union[torch.Tensor, np.ndarray]: Return the embedding vectors in a numpy array or tensor.
153+ """
121154 if batch_size is None : batch_size = self .batch_size
122155 if max_length is None : max_length = self .query_max_length
123156 if convert_to_numpy is None : convert_to_numpy = self .convert_to_numpy
@@ -140,6 +173,18 @@ def encode_corpus(
140173 convert_to_numpy : Optional [bool ] = None ,
141174 ** kwargs : Any
142175 ):
176+ """encode the corpus using the instruction if provided.
177+
178+ Args:
179+ corpus (Union[List[str], str]): Input corpus to encode.
180+ batch_size (Optional[int], optional): Number of sentences for each iter. Defaults to None.
181+ max_length (Optional[int], optional): Maximum length of tokens. Defaults to None.
182+ convert_to_numpy (Optional[bool], optional): If True, the output embedding will be a Numpy array. Otherwise, it will
183+ be a Torch Tensor. Defaults to None.
184+
185+ Returns:
186+ Union[torch.Tensor, np.ndarray]: Return the embedding vectors in a numpy array or tensor.
187+ """
143188 passage_instruction_for_retrieval = self .kwargs .get ("passage_instruction_for_retrieval" , None )
144189 passage_instruction_format = self .kwargs .get ("passage_instruction_format" , "{}{}" )
145190
@@ -167,6 +212,20 @@ def encode(
167212 instruction_format : Optional [str ] = None ,
168213 ** kwargs : Any
169214 ):
215+ """encode the input sentences with the embedding model.
216+
217+ Args:
218+ sentences (Union[List[str], str]): Input sentences to encode.
219+ batch_size (Optional[int], optional): Number of sentences for each iter. Defaults to None.
220+ max_length (Optional[int], optional): Maximum length of tokens. Defaults to None.
221+ convert_to_numpy (Optional[bool], optional): If True, the output embedding will be a Numpy array. Otherwise, it will
222+ be a Torch Tensor. Defaults to None.
223+ instruction (Optional[str], optional): The text of instruction. Defaults to None.
224+ instruction_format (Optional[str], optional): Format for instruction. Defaults to None.
225+
226+ Returns:
227+ Union[torch.Tensor, np.ndarray]: return the embedding vectors in a numpy array or tensor.
228+ """
170229 if batch_size is None : batch_size = self .batch_size
171230 if max_length is None : max_length = self .passage_max_length
172231 if convert_to_numpy is None : convert_to_numpy = self .convert_to_numpy
@@ -338,6 +397,17 @@ def encode_multi_process(
338397 return embeddings
339398
340399 def _concatenate_results_from_multi_process (self , results_list : List [Union [torch .Tensor , np .ndarray , Any ]]):
400+ """concatenate and return the results from all the processes
401+
402+ Args:
403+ results_list (List[Union[torch.Tensor, np.ndarray, Any]]): a list of results from all the processes
404+
405+ Raises:
406+ NotImplementedError: Unsupported type for results_list
407+
408+ Returns:
409+ Union[torch.Tensor, np.ndarray]: return the embedding vectors in a numpy array or tensor.
410+ """
341411 if isinstance (results_list [0 ], torch .Tensor ):
342412 return torch .cat (results_list , dim = 0 )
343413 elif isinstance (results_list [0 ], np .ndarray ):
0 commit comments