Skip to content

Commit 134a1ad

Browse files
committed
abc evaluator
1 parent 035bbf5 commit 134a1ad

1 file changed

Lines changed: 32 additions & 1 deletion

File tree

FlagEmbedding/abc/evaluation/evaluator.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,18 @@ def __call__(
111111
dataset_name: Optional[str] = None,
112112
**kwargs,
113113
):
114+
"""Called to the whole evaluation process.
115+
116+
Args:
117+
splits (Union[str, List[str]]): Splits of datasets.
118+
search_results_save_dir (str): Directory to save the search results.
119+
retriever (EvalRetriever): object of :class:EvalRetriever
120+
reranker (Optional[EvalReranker], optional): Object of :class:EvalReranker. Defaults to None.
121+
corpus_embd_save_dir (Optional[str], optional): Directory to save the embedded corpus. Defaults to None.
122+
ignore_identical_ids (bool, optional): If True, will ignore identical ids in search results. Defaults to False.
123+
k_values (List[int], optional): Cutoffs. Defaults to [1, 3, 5, 10, 100, 1000].
124+
dataset_name (Optional[str], optional): Name of the datasets. Defaults to None.
125+
"""
114126
# Check Splits
115127
checked_splits = self.data_loader.check_splits(splits, dataset_name=dataset_name)
116128
if len(checked_splits) == 0:
@@ -263,7 +275,7 @@ def save_search_results(
263275
eval_name (str): The experiment name of current evaluation.
264276
model_name (str): Name of model used.
265277
reranker_name (str): Name of reranker used.
266-
search_results (Dict[str, Dict[str, float]]): The search results.
278+
search_results (Dict[str, Dict[str, float]]): Dictionary of search results.
267279
output_path (str): Output path to write the results.
268280
split (str): Split used in searching.
269281
dataset_name (Optional[str], optional): Name of dataset used. Defaults to None.
@@ -304,6 +316,16 @@ def compute_metrics(
304316
search_results: Dict[str, Dict[str, float]],
305317
k_values: List[int],
306318
):
319+
"""Evaluate the model with metrics.
320+
321+
Args:
322+
qrels (Dict[str, Dict[str, int]]): Ground truth relevance of queries and documents.
323+
search_results (Dict[str, Dict[str, float]]): Dictionary of search results
324+
k_values (List[int]): Cutoffs.
325+
326+
Returns:
327+
dict: The results of the metrics.
328+
"""
307329
ndcg, _map, recall, precision = evaluate_metrics(
308330
qrels=qrels,
309331
results=search_results,
@@ -328,6 +350,15 @@ def evaluate_results(
328350
search_results_save_dir: str,
329351
k_values: List[int] = [1, 3, 5, 10, 100, 1000]
330352
):
353+
"""Compute metrics according to the results in the directory.
354+
355+
Args:
356+
search_results_save_dir (str): Path to the search results.
357+
k_values (List[int], optional): Cutoffs. Defaults to [1, 3, 5, 10, 100, 1000].
358+
359+
Returns:
360+
_type_: _description_
361+
"""
331362
eval_results_dict = {}
332363

333364
for file in os.listdir(search_results_save_dir):

0 commit comments

Comments
 (0)