FlagOpen · nedeadinside · Apr 16, 2026 · Apr 16, 2026 · Apr 16, 2026
diff --git a/FlagEmbedding/inference/reranker/decoder_only/base.py b/FlagEmbedding/inference/reranker/decoder_only/base.py
@@ -297,7 +297,8 @@ def compute_score_single_gpu(
             device = self.target_devices[0]
 
         if device == "cpu": self.use_fp16 = False
-        if self.use_fp16: self.model.half()
+        if self.use_fp16 and next(self.model.parameters()).dtype != torch.float16:
+            self.model.half()
 
         self.model.to(device)
         self.model.eval()

diff --git a/FlagEmbedding/inference/reranker/decoder_only/layerwise.py b/FlagEmbedding/inference/reranker/decoder_only/layerwise.py
@@ -179,7 +179,8 @@ def compute_score_single_gpu(
             device = self.target_devices[0]
 
         if device == "cpu": self.use_fp16 = False
-        if self.use_fp16: self.model.half()
+        if self.use_fp16 and next(self.model.parameters()).dtype != torch.float16:
+            self.model.half()
 
         self.model.to(device)
         self.model.eval()

diff --git a/FlagEmbedding/inference/reranker/decoder_only/lightweight.py b/FlagEmbedding/inference/reranker/decoder_only/lightweight.py
@@ -258,7 +258,8 @@ def compute_score_single_gpu(
             device = self.target_devices[0]
 
         if device == "cpu": self.use_fp16 = False
-        if self.use_fp16: self.model.half()
+        if self.use_fp16 and next(self.model.parameters()).dtype != torch.float16:
+            self.model.half()
 
         self.model.to(device)
         self.model.eval()

diff --git a/FlagEmbedding/inference/reranker/encoder_only/base.py b/FlagEmbedding/inference/reranker/encoder_only/base.py
@@ -111,7 +111,8 @@ def compute_score_single_gpu(
             device = self.target_devices[0]
 
         if device == "cpu": self.use_fp16 = False
-        if self.use_fp16: self.model.half()
+        if self.use_fp16 and next(self.model.parameters()).dtype != torch.float16:
+            self.model.half()
 
         self.model.to(device)
         self.model.eval()

diff --git a/research/Matroyshka_reranker/inference/rank_model.py b/research/Matroyshka_reranker/inference/rank_model.py
@@ -203,7 +203,8 @@ def compute_score_single_gpu(
             device = self.target_devices[0]
 
         if device == "cpu": self.use_fp16 = False
-        if self.use_fp16: self.model.half()
+        if self.use_fp16 and next(self.model.parameters()).dtype != torch.float16:
+            self.model.half()
 
         self.model.to(device)
         self.model.eval()