FlagOpen
diff --git a/‎FlagEmbedding/abc/inference/AbsEmbedder.py‎
Lines changed: 63 additions & 53 deletions b/‎FlagEmbedding/abc/inference/AbsEmbedder.py‎
Lines changed: 63 additions & 53 deletions
diff --git a/‎FlagEmbedding/abc/inference/AbsReranker.py‎
Lines changed: 8 additions & 3 deletions b/‎FlagEmbedding/abc/inference/AbsReranker.py‎
Lines changed: 8 additions & 3 deletions
diff --git a/‎FlagEmbedding/inference/embedder/encoder_only/m3.py‎
Lines changed: 6 additions & 0 deletions b/‎FlagEmbedding/inference/embedder/encoder_only/m3.py‎
Lines changed: 6 additions & 0 deletions
@@ -39,22 +39,23 @@ class AbsEmbedder(ABC):
             Default: `True`.
         kwargs (Dict[Any], optional): Additional parameters for HuggingFace Transformers config or children classes.
     """
+
     def __init__(
-        self,
-        model_name_or_path: str,
-        normalize_embeddings: bool = True,
-        use_fp16: bool = True,
-        query_instruction_for_retrieval: Optional[str] = None,
-        query_instruction_format: str = "{}{}", # specify the format of query_instruction_for_retrieval
-        devices: Optional[Union[str, int, List[str], List[int]]] = None,
-        # inference
-        batch_size: int = 256,
-        query_max_length: int = 512,
-        passage_max_length: int = 512,
-        instruction: Optional[str] = None,
-        instruction_format: str = "{}{}",
-        convert_to_numpy: bool = True,
-        **kwargs: Any,
+            self,
+            model_name_or_path: str,
+            normalize_embeddings: bool = True,
+            use_fp16: bool = True,
+            query_instruction_for_retrieval: Optional[str] = None,
+            query_instruction_format: str = "{}{}",  # specify the format of query_instruction_for_retrieval
+            devices: Optional[Union[str, int, List[str], List[int]]] = None,
+            # inference
+            batch_size: int = 256,
+            query_max_length: int = 512,
+            passage_max_length: int = 512,
+            instruction: Optional[str] = None,
+            instruction_format: str = "{}{}",
+            convert_to_numpy: bool = True,
+            **kwargs: Any,
     ):
         self.model_name_or_path = model_name_or_path
         self.normalize_embeddings = normalize_embeddings
@@ -78,6 +79,7 @@ def __init__(
         # tokenizer and model are initialized in the child class
         self.tokenizer = None
         self.model = None
+        self.pool = None
 
     @staticmethod
     def get_target_devices(devices: Union[str, int, List[str], List[int]]) -> List[str]:
@@ -109,12 +111,12 @@ def get_detailed_instruct(instruction_format: str, instruction: str, sentence: s
         return instruction_format.format(instruction, sentence)
 
     def encode_queries(
-        self,
-        queries: Union[List[str], str],
-        batch_size: Optional[int] = None,
-        max_length: Optional[int] = None,
-        convert_to_numpy: Optional[bool] = None,
-        **kwargs: Any
+            self,
+            queries: Union[List[str], str],
+            batch_size: Optional[int] = None,
+            max_length: Optional[int] = None,
+            convert_to_numpy: Optional[bool] = None,
+            **kwargs: Any
     ):
         if batch_size is None: batch_size = self.batch_size
         if max_length is None: max_length = self.query_max_length
@@ -131,12 +133,12 @@ def encode_queries(
         )
 
     def encode_corpus(
-        self,
-        corpus: Union[List[str], str],
-        batch_size: Optional[int] = None,
-        max_length: Optional[int] = None,
-        convert_to_numpy: Optional[bool] = None,
-        **kwargs: Any
+            self,
+            corpus: Union[List[str], str],
+            batch_size: Optional[int] = None,
+            max_length: Optional[int] = None,
+            convert_to_numpy: Optional[bool] = None,
+            **kwargs: Any
     ):
         passage_instruction_for_retrieval = self.kwargs.get("passage_instruction_for_retrieval", None)
         passage_instruction_format = self.kwargs.get("passage_instruction_format", "{}{}")
@@ -156,23 +158,27 @@ def encode_corpus(
         )
 
     def encode(
-        self,
-        sentences: Union[List[str], str],
-        batch_size: Optional[int] = None,
-        max_length: Optional[int] = None,
-        convert_to_numpy: Optional[bool] = None,
-        instruction: Optional[str] = None,
-        instruction_format: Optional[str] = None,
-        **kwargs: Any
+            self,
+            sentences: Union[List[str], str],
+            batch_size: Optional[int] = None,
+            max_length: Optional[int] = None,
+            convert_to_numpy: Optional[bool] = None,
+            instruction: Optional[str] = None,
+            instruction_format: Optional[str] = None,
+            **kwargs: Any
     ):
         if instruction is None: instruction = self.instruction
         if instruction_format is None: instruction_format = self.instruction_format
+        if batch_size is None: batch_size = self.batch_size
+        if max_length is None: max_length = self.passage_max_length
+        if convert_to_numpy is None: convert_to_numpy = self.convert_to_numpy
 
         if instruction is not None:
             if isinstance(sentences, str):
                 sentences = self.get_detailed_instruct(instruction_format, instruction, sentences)
             else:
-                sentences = [self.get_detailed_instruct(instruction_format, instruction, sentence) for sentence in sentences]
+                sentences = [self.get_detailed_instruct(instruction_format, instruction, sentence) for sentence in
+                             sentences]
 
         if isinstance(sentences, str) or len(self.target_devices) == 1:
             return self.encode_single_device(
@@ -184,27 +190,31 @@ def encode(
                 **kwargs
             )
 
-        pool = self.start_multi_process_pool(AbsEmbedder._encode_multi_process_worker)
+        if self.pool is None:
+            self.pool = self.start_multi_process_pool(AbsEmbedder._encode_multi_process_worker)
         embeddings = self.encode_multi_process(
             sentences,
-            pool,
+            self.pool,
             batch_size=batch_size,
             max_length=max_length,
             convert_to_numpy=convert_to_numpy,
             **kwargs
         )
-        self.stop_multi_process_pool(pool)
         return embeddings
 
+    def __del__(self):
+        if self.pool is not None:
+            self.stop_multi_process_pool(self.pool)
+
     @abstractmethod
     def encode_single_device(
-        self,
-        sentences: Union[List[str], str],
-        batch_size: int = 256,
-        max_length: int = 512,
-        convert_to_numpy: bool = True,
-        device: Optional[str] = None,
-        **kwargs: Any,
+            self,
+            sentences: Union[List[str], str],
+            batch_size: int = 256,
+            max_length: int = 512,
+            convert_to_numpy: bool = True,
+            device: Optional[str] = None,
+            **kwargs: Any,
     ):
         """
         This method should encode sentences and return embeddings on a single device.
@@ -213,8 +223,8 @@ def encode_single_device(
 
     # adapted from https://github.com/UKPLab/sentence-transformers/blob/1802076d4eae42ff0a5629e1b04e75785d4e193b/sentence_transformers/SentenceTransformer.py#L807
     def start_multi_process_pool(
-        self,
-        process_target_func: Any,
+            self,
+            process_target_func: Any,
     ) -> Dict[Literal["input", "output", "processes"], Any]:
         """
         Starts a multi-process pool to process the encoding with several independent processes
@@ -253,7 +263,7 @@ def start_multi_process_pool(
     # adapted from https://github.com/UKPLab/sentence-transformers/blob/1802076d4eae42ff0a5629e1b04e75785d4e193b/sentence_transformers/SentenceTransformer.py#L976
     @staticmethod
     def _encode_multi_process_worker(
-        target_device: str, model: 'AbsEmbedder', input_queue: Queue, results_queue: Queue
+            target_device: str, model: 'AbsEmbedder', input_queue: Queue, results_queue: Queue
     ) -> None:
         """
         Internal working process to encode sentences in multi-process setup
@@ -297,10 +307,10 @@ def stop_multi_process_pool(pool: Dict[Literal["input", "output", "processes"],
 
     # adapted from https://github.com/UKPLab/sentence-transformers/blob/1802076d4eae42ff0a5629e1b04e75785d4e193b/sentence_transformers/SentenceTransformer.py#L877
     def encode_multi_process(
-        self,
-        sentences: List[str],
-        pool: Dict[Literal["input", "output", "processes"], Any],
-        **kwargs
+            self,
+            sentences: List[str],
+            pool: Dict[Literal["input", "output", "processes"], Any],
+            **kwargs
     ):
         chunk_size = math.ceil(len(sentences) / len(pool["processes"]))
 
 
@@ -57,6 +57,7 @@ def __init__(
         # tokenizer and model are initialized in the child class
         self.model = None
         self.tokenizer = None
+        self.pool = None
 
     @staticmethod
     def get_target_devices(devices: Union[str, int, List[str], List[int]]) -> List[str]:
@@ -137,13 +138,17 @@ def compute_score(
                 **kwargs
             )
 
-        pool = self.start_multi_process_pool()
+        if self.pool is None:
+            self.pool = self.start_multi_process_pool()
         scores = self.encode_multi_process(sentence_pairs,
-                                           pool,
+                                           self.pool,
                                            **kwargs)
-        self.stop_multi_process_pool(pool)
         return scores
 
+    def __del__(self):
+        if self.pool is not None:
+            self.stop_multi_process_pool(self.pool)
+
     @abstractmethod
     def compute_score_single_gpu(
         self,
 
@@ -198,6 +198,12 @@ def encode(
         Literal["dense_vecs", "lexical_weights", "colbert_vecs"],
         Union[np.ndarray, List[Dict[str, float]], List[np.ndarray]]
     ]:
+        if batch_size is None: batch_size = self.batch_size
+        if max_length is None: max_length = self.passage_max_length
+        if return_dense is None: return_dense = self.return_dense
+        if return_sparse is None: return_sparse = self.return_sparse
+        if return_colbert_vecs is None: return_colbert_vecs = self.return_colbert_vecs
+
         return super().encode(
             queries,
             batch_size=batch_size,