diff --git a/FlagEmbedding/inference/reranker/decoder_only/base.py b/FlagEmbedding/inference/reranker/decoder_only/base.py index 4d5b26ec6..718a5a0b4 100644 --- a/FlagEmbedding/inference/reranker/decoder_only/base.py +++ b/FlagEmbedding/inference/reranker/decoder_only/base.py @@ -11,6 +11,33 @@ from FlagEmbedding.abc.inference import AbsReranker from FlagEmbedding.inference.reranker.encoder_only.base import sigmoid +def prepare_for_model_compat( + tokenizer, + first_ids: List[int], + second_ids: List[int], + max_length: Optional[int], + add_special_tokens: bool = False +) -> dict: + if not add_special_tokens: + if max_length is not None: + max_second_len = max_length - len(first_ids) + second_ids = second_ids[:max(0, max_second_len)] + return {"input_ids": first_ids + second_ids} + + query_text = tokenizer.decode(first_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False) + doc_text = tokenizer.decode(second_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False) + want_tti = "token_type_ids" in getattr(tokenizer, "model_input_names", []) + return tokenizer( + query_text, + doc_text, + truncation='only_second', + max_length=max_length, + padding=False, + return_attention_mask=False, + return_token_type_ids=want_tti, + add_special_tokens=add_special_tokens + ) + def last_logit_pool(logits: Tensor, attention_mask: Tensor) -> Tensor: @@ -89,25 +116,19 @@ def __getitem__(self, item): query_inputs = self.all_queries_inputs[item] passage_inputs = self.all_passages_inputs[item] if self.tokenizer.bos_token_id is not None and self.tokenizer.bos_token_id != self.tokenizer.pad_token_id: - item = self.tokenizer.prepare_for_model( + item = prepare_for_model_compat( + self.tokenizer, [self.tokenizer.bos_token_id] + query_inputs['input_ids'], self.sep_inputs + passage_inputs['input_ids'], - truncation='only_second', max_length=self.encode_max_length, - padding=False, - return_attention_mask=False, - return_token_type_ids=False, add_special_tokens=False ) else: - item = self.tokenizer.prepare_for_model( + item = prepare_for_model_compat( + self.tokenizer, query_inputs['input_ids'], self.sep_inputs + passage_inputs['input_ids'], - truncation='only_second', max_length=self.encode_max_length, - padding=False, - return_attention_mask=False, - return_token_type_ids=False, add_special_tokens=False ) item['input_ids'] = item['input_ids'] + self.sep_inputs + self.prompt_inputs @@ -371,25 +392,19 @@ def compute_score_single_gpu( all_passages_inputs_sorted[:min(len(all_passages_inputs_sorted), batch_size)] ): if self.tokenizer.bos_token_id is not None and self.tokenizer.bos_token_id != self.tokenizer.pad_token_id: - item = self.tokenizer.prepare_for_model( + item = prepare_for_model_compat( + self.tokenizer, [self.tokenizer.bos_token_id] + query_inputs['input_ids'], sep_inputs + passage_inputs['input_ids'], - truncation='only_second', max_length=encode_max_length, - padding=False, - return_attention_mask=False, - return_token_type_ids=False, add_special_tokens=False ) else: - item = self.tokenizer.prepare_for_model( + item = prepare_for_model_compat( + self.tokenizer, query_inputs['input_ids'], sep_inputs + passage_inputs['input_ids'], - truncation='only_second', max_length=encode_max_length, - padding=False, - return_attention_mask=False, - return_token_type_ids=False, add_special_tokens=False ) item['input_ids'] = item['input_ids'] + sep_inputs + prompt_inputs @@ -452,25 +467,19 @@ def compute_score_single_gpu( batch_inputs = [] for query_inputs, passage_inputs in zip(queries_inputs, passages_inputs): if self.tokenizer.bos_token_id is not None and self.tokenizer.bos_token_id != self.tokenizer.pad_token_id: - item = self.tokenizer.prepare_for_model( + item = prepare_for_model_compat( + self.tokenizer, [self.tokenizer.bos_token_id] + query_inputs['input_ids'], sep_inputs + passage_inputs['input_ids'], - truncation='only_second', max_length=encode_max_length, - padding=False, - return_attention_mask=False, - return_token_type_ids=False, add_special_tokens=False ) else: - item = self.tokenizer.prepare_for_model( + item = prepare_for_model_compat( + self.tokenizer, query_inputs['input_ids'], sep_inputs + passage_inputs['input_ids'], - truncation='only_second', max_length=encode_max_length, - padding=False, - return_attention_mask=False, - return_token_type_ids=False, add_special_tokens=False ) item['input_ids'] = item['input_ids'] + sep_inputs + prompt_inputs diff --git a/FlagEmbedding/inference/reranker/decoder_only/layerwise.py b/FlagEmbedding/inference/reranker/decoder_only/layerwise.py index 4b75da36a..1dece738f 100644 --- a/FlagEmbedding/inference/reranker/decoder_only/layerwise.py +++ b/FlagEmbedding/inference/reranker/decoder_only/layerwise.py @@ -10,7 +10,7 @@ from FlagEmbedding.abc.inference import AbsReranker from FlagEmbedding.inference.reranker.encoder_only.base import sigmoid -from FlagEmbedding.inference.reranker.decoder_only.base import DatasetForReranker, Collater +from FlagEmbedding.inference.reranker.decoder_only.base import DatasetForReranker, Collater, prepare_for_model_compat from .models.modeling_minicpm_reranker import LayerWiseMiniCPMForCausalLM @@ -252,14 +252,11 @@ def compute_score_single_gpu( all_queries_inputs_sorted[:min(len(all_queries_inputs_sorted), batch_size)], all_passages_inputs_sorted[:min(len(all_passages_inputs_sorted), batch_size)] ): - item = self.tokenizer.prepare_for_model( + item = prepare_for_model_compat( + self.tokenizer, [self.tokenizer.bos_token_id] + query_inputs['input_ids'], sep_inputs + passage_inputs['input_ids'], - truncation='only_second', max_length=encode_max_length, - padding=False, - return_attention_mask=False, - return_token_type_ids=False, add_special_tokens=False ) item['input_ids'] = item['input_ids'] + sep_inputs + prompt_inputs @@ -329,14 +326,11 @@ def compute_score_single_gpu( batch_inputs = [] for query_inputs, passage_inputs in zip(queries_inputs, passages_inputs): - item = self.tokenizer.prepare_for_model( + item = prepare_for_model_compat( + self.tokenizer, [self.tokenizer.bos_token_id] + query_inputs['input_ids'], sep_inputs + passage_inputs['input_ids'], - truncation='only_second', max_length=encode_max_length, - padding=False, - return_attention_mask=False, - return_token_type_ids=False, add_special_tokens=False ) item['input_ids'] = item['input_ids'] + sep_inputs + prompt_inputs diff --git a/FlagEmbedding/inference/reranker/decoder_only/lightweight.py b/FlagEmbedding/inference/reranker/decoder_only/lightweight.py index 000478afb..2d5d5b511 100644 --- a/FlagEmbedding/inference/reranker/decoder_only/lightweight.py +++ b/FlagEmbedding/inference/reranker/decoder_only/lightweight.py @@ -10,6 +10,7 @@ from FlagEmbedding.abc.inference import AbsReranker from FlagEmbedding.inference.reranker.encoder_only.base import sigmoid +from FlagEmbedding.inference.reranker.decoder_only.base import prepare_for_model_compat def last_logit_pool_lightweight(logits: Tensor, @@ -333,14 +334,11 @@ def compute_score_single_gpu( all_queries_inputs_sorted[:min(len(all_queries_inputs_sorted), batch_size)], all_passages_inputs_sorted[:min(len(all_passages_inputs_sorted), batch_size)] ): - item = self.tokenizer.prepare_for_model( + item = prepare_for_model_compat( + self.tokenizer, [self.tokenizer.bos_token_id] + query_inputs['input_ids'], sep_inputs + passage_inputs['input_ids'], - truncation='only_second', max_length=encode_max_length, - padding=False, - return_attention_mask=False, - return_token_type_ids=False, add_special_tokens=False ) item['input_ids'] = item['input_ids'] + sep_inputs + prompt_inputs @@ -388,14 +386,11 @@ def compute_score_single_gpu( query_lengths = [] prompt_lengths = [] for query_inputs, passage_inputs in zip(queries_inputs, passages_inputs): - item = self.tokenizer.prepare_for_model( + item = prepare_for_model_compat( + self.tokenizer, [self.tokenizer.bos_token_id] + query_inputs['input_ids'], sep_inputs + passage_inputs['input_ids'], - truncation='only_second', max_length=encode_max_length, - padding=False, - return_attention_mask=False, - return_token_type_ids=False, add_special_tokens=False ) item['input_ids'] = item['input_ids'] + sep_inputs + prompt_inputs diff --git a/FlagEmbedding/inference/reranker/encoder_only/base.py b/FlagEmbedding/inference/reranker/encoder_only/base.py index 1a4d8b6a4..b20b9e403 100644 --- a/FlagEmbedding/inference/reranker/encoder_only/base.py +++ b/FlagEmbedding/inference/reranker/encoder_only/base.py @@ -74,6 +74,38 @@ def __init__( cache_dir=cache_dir ) + def _prepare_pair_inputs( + self, + q_inp: List[int], + d_inp: List[int], + max_length: int + ) -> dict: + """Build model inputs for a tokenized pair with v4/v5 tokenizer compatibility.""" + prepare_for_model = getattr(self.tokenizer, "prepare_for_model", None) + if callable(prepare_for_model): + return prepare_for_model( + q_inp, + d_inp, + truncation='only_second', + max_length=max_length, + padding=False, + ) + + # v5 tokenizers may not expose id-level pair builders. + query_text = self.tokenizer.decode(q_inp, skip_special_tokens=True, clean_up_tokenization_spaces=False) + doc_text = self.tokenizer.decode(d_inp, skip_special_tokens=True, clean_up_tokenization_spaces=False) + want_tti = "token_type_ids" in getattr(self.tokenizer, "model_input_names", []) + return self.tokenizer( + query_text, + doc_text, + truncation='only_second', + max_length=max_length, + padding=False, + return_attention_mask=False, + return_token_type_ids=want_tti, + add_special_tokens=True, + ) + @torch.no_grad() def compute_score_single_gpu( self, @@ -144,13 +176,7 @@ def compute_score_single_gpu( **kwargs )['input_ids'] for q_inp, d_inp in zip(queries_inputs_batch, passages_inputs_batch): - item = self.tokenizer.prepare_for_model( - q_inp, - d_inp, - truncation='only_second', - max_length=max_length, - padding=False, - ) + item = self._prepare_pair_inputs(q_inp, d_inp, max_length=max_length) all_inputs.append(item) # sort by length for less padding length_sorted_idx = np.argsort([-len(x['input_ids']) for x in all_inputs])