Skip to content

Commit ee3437f

Browse files
committed
2 parents eade969 + ce88f0f commit ee3437f

33 files changed

Lines changed: 23 additions & 28 deletions

FlagEmbedding/inference/embedder/decoder_only/base.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,6 @@ class BaseLLMEmbedder(AbsEmbedder):
4747
batch_size (int, optional): Batch size for inference. Defaults to :data:`256`.
4848
query_max_length (int, optional): Maximum length for query. Defaults to :data:`512`.
4949
passage_max_length (int, optional): Maximum length for passage. Defaults to :data:`512`.
50-
instruction (Optional[str], optional): Instruction for embedding with :attr:`instruction_format`. Defaults to :data:`None`.
51-
instruction_format (str, optional): Instruction format when using :attr:`instruction`. Defaults to :data:`"{}{}"`.
5250
convert_to_numpy (bool, optional): If True, the output embedding will be a Numpy array. Otherwise, it will be a Torch Tensor.
5351
Defaults to :data:`True`.
5452
@@ -72,8 +70,6 @@ def __init__(
7270
batch_size: int = 256,
7371
query_max_length: int = 512,
7472
passage_max_length: int = 512,
75-
instruction: Optional[str] = None,
76-
instruction_format: str = "{}{}",
7773
convert_to_numpy: bool = True,
7874
**kwargs: Any,
7975
):
@@ -87,8 +83,6 @@ def __init__(
8783
batch_size=batch_size,
8884
query_max_length=query_max_length,
8985
passage_max_length=passage_max_length,
90-
instruction=instruction,
91-
instruction_format=instruction_format,
9286
convert_to_numpy=convert_to_numpy,
9387
**kwargs
9488
)

FlagEmbedding/inference/embedder/decoder_only/icl.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ class ICLLLMEmbedder(AbsEmbedder):
5454
batch_size (int, optional): Batch size for inference. Defaults to :data:`256`.
5555
query_max_length (int, optional): Maximum length for query. Defaults to :data:`512`.
5656
passage_max_length (int, optional): Maximum length for passage. Defaults to :data:`512`.
57+
convert_to_numpy (bool, optional): If True, the output embedding will be a Numpy array. Otherwise, it will be a Torch Tensor.
58+
Defaults to :data:`True`.
5759
5860
Attributes:
5961
DEFAULT_POOLING_METHOD: The default pooling method when running the model.
@@ -77,8 +79,6 @@ def __init__(
7779
batch_size: int = 256,
7880
query_max_length: int = 512,
7981
passage_max_length: int = 512,
80-
instruction: Optional[str] = None,
81-
instruction_format: str = "{}{}",
8282
convert_to_numpy: bool = True,
8383
**kwargs: Any,
8484
):
@@ -92,10 +92,8 @@ def __init__(
9292
batch_size=batch_size,
9393
query_max_length=query_max_length,
9494
passage_max_length=passage_max_length,
95-
instruction=instruction,
96-
instruction_format=instruction_format,
9795
convert_to_numpy=convert_to_numpy,
98-
kwargs=kwargs
96+
**kwargs
9997
)
10098

10199
self.tokenizer = AutoTokenizer.from_pretrained(

FlagEmbedding/inference/embedder/encoder_only/base.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,6 @@ class BaseEmbedder(AbsEmbedder):
2828
batch_size (int, optional): Batch size for inference. Defaults to :data:`256`.
2929
query_max_length (int, optional): Maximum length for query. Defaults to :data:`512`.
3030
passage_max_length (int, optional): Maximum length for passage. Defaults to :data:`512`.
31-
instruction (Optional[str], optional): Instruction for embedding with :attr:`instruction_format`. Defaults to :data:`None`.
32-
instruction_format (str, optional): Instruction format when using :attr:`instruction`. Defaults to :data:`"{}{}"`.
3331
convert_to_numpy (bool, optional): If True, the output embedding will be a Numpy array. Otherwise, it will be a Torch Tensor.
3432
Defaults to :data:`True`.
3533
@@ -55,8 +53,6 @@ def __init__(
5553
batch_size: int = 256,
5654
query_max_length: int = 512,
5755
passage_max_length: int = 512,
58-
instruction: Optional[str] = None,
59-
instruction_format: str = "{}{}",
6056
convert_to_numpy: bool = True,
6157
**kwargs: Any,
6258
):
@@ -70,8 +66,6 @@ def __init__(
7066
batch_size=batch_size,
7167
query_max_length=query_max_length,
7268
passage_max_length=passage_max_length,
73-
instruction=instruction,
74-
instruction_format=instruction_format,
7569
convert_to_numpy=convert_to_numpy,
7670
**kwargs
7771
)
@@ -201,9 +195,6 @@ def encode_single_device(
201195
if device == "cpu": self.use_fp16 = False
202196
if self.use_fp16: self.model.half()
203197

204-
if device == "cpu": self.use_fp16 = False
205-
if self.use_fp16: self.model.half()
206-
207198
self.model.to(device)
208199
self.model.eval()
209200

FlagEmbedding/inference/embedder/encoder_only/m3.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,6 @@ class M3Embedder(AbsEmbedder):
3838
batch_size (int, optional): Batch size for inference. Defaults to :data:`256`.
3939
query_max_length (int, optional): Maximum length for query. Defaults to :data:`512`.
4040
passage_max_length (int, optional): Maximum length for passage. Defaults to :data:`512`.
41-
instruction (Optional[str], optional): Instruction for embedding with :attr:`instruction_format`. Defaults to :data:`None`.
42-
instruction_format (str, optional): Instruction format when using :attr:`instruction`. Defaults to :data:`"{}{}"`.
4341
return_dense (bool, optional): If true, will return the dense embedding. Defaults to :data:`True`.
4442
return_sparse (bool, optional): If true, will return the sparce embedding. Defaults to :data:`False`.
4543
return_colbert_vecs (bool, optional): If true, will return the colbert vectors. Defaults to :data:`False`.
@@ -66,8 +64,6 @@ def __init__(
6664
batch_size: int = 256,
6765
query_max_length: int = 512,
6866
passage_max_length: int = 512,
69-
instruction: Optional[str] = None,
70-
instruction_format: str = "{}{}",
7167
return_dense: bool = True,
7268
return_sparse: bool = False,
7369
return_colbert_vecs: bool = False,
@@ -83,8 +79,6 @@ def __init__(
8379
batch_size=batch_size,
8480
query_max_length=query_max_length,
8581
passage_max_length=passage_max_length,
86-
instruction=instruction,
87-
instruction_format=instruction_format,
8882
return_dense=return_dense,
8983
return_sparse=return_sparse,
9084
return_colbert_vecs=return_colbert_vecs,

research/BGE_M3/modeling.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ def forward(self, query: Dict[str, Tensor] = None, passage: Dict[str, Tensor] =
221221
if teacher_scores is not None:
222222
# print("Use soft-label distillation...")
223223
teacher_targets = F.softmax(teacher_scores, dim=-1) # B N
224-
group_size = p_sparse_vecs.size(0) // q_sparse_vecs.size(0)
224+
group_size = p_dense_vecs.size(0) // q_dense_vecs.size(0)
225225

226226
# dense loss
227227
dense_scores = self.dense_score(q_dense_vecs, p_dense_vecs) # B, B * N

research/visual_bge/setup.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from setuptools import setup, find_packages
2+
3+
setup(
4+
name="visual_bge",
5+
version="0.1.0",
6+
description='visual_bge',
7+
long_description="./README.md",
8+
long_description_content_type="text/markdown",
9+
url='https://github.com/FlagOpen/FlagEmbedding/tree/master/research/visual_bge',
10+
packages=find_packages(),
11+
install_requires=[
12+
'torchvision',
13+
'timm',
14+
'einops',
15+
'ftfy'
16+
],
17+
python_requires='>=3.6',
18+
)
File renamed without changes.

research/visual_bge/eva_clip/bpe_simple_vocab_16e6.txt.gz renamed to research/visual_bge/visual_bge/eva_clip/bpe_simple_vocab_16e6.txt.gz

File renamed without changes.
File renamed without changes.
File renamed without changes.

0 commit comments

Comments
 (0)