1414
1515
1616class AbsEvalRunner :
17+ """
18+ Abstract class of evaluation runner.
19+
20+ Args:
21+ eval_args (AbsEvalArgs): :class:AbsEvalArgs object with the evaluation arguments.
22+ model_args (AbsEvalModelArgs): :class:AbsEvalModelArgs object with the model arguments.
23+ """
1724 def __init__ (
1825 self ,
1926 eval_args : AbsEvalArgs ,
@@ -28,6 +35,15 @@ def __init__(
2835
2936 @staticmethod
3037 def get_models (model_args : AbsEvalModelArgs ) -> Tuple [FlagAutoModel , Union [FlagAutoReranker , None ]]:
38+ """Get the embedding and reranker model
39+
40+ Args:
41+ model_args (AbsEvalModelArgs): :class:AbsEvalModelArgs object with the model arguments.
42+
43+ Returns:
44+ Tuple[FlagAutoModel, Union[FlagAutoReranker, None]]: A :class:FlagAutoModel object of embedding model, and
45+ :class:FlagAutoReranker object of reranker model if path provided.
46+ """
3147 embedder = FlagAutoModel .from_finetuned (
3248 model_name_or_path = model_args .embedder_name_or_path ,
3349 model_class = model_args .embedder_model_class ,
@@ -74,6 +90,12 @@ def get_models(model_args: AbsEvalModelArgs) -> Tuple[FlagAutoModel, Union[FlagA
7490 return embedder , reranker
7591
7692 def load_retriever_and_reranker (self ) -> Tuple [EvalDenseRetriever , Union [EvalReranker , None ]]:
93+ """Load retriever and reranker for evaluation
94+
95+ Returns:
96+ Tuple[EvalDenseRetriever, Union[EvalReranker, None]]: A :class:EvalDenseRetriever object for retrieval, and a
97+ :class:EvalReranker object if reranker provided.
98+ """
7799 embedder , reranker = self .get_models (self .model_args )
78100 retriever = EvalDenseRetriever (
79101 embedder ,
@@ -85,6 +107,11 @@ def load_retriever_and_reranker(self) -> Tuple[EvalDenseRetriever, Union[EvalRer
85107 return retriever , reranker
86108
87109 def load_data_loader (self ) -> AbsEvalDataLoader :
110+ """Load the data loader
111+
112+ Returns:
113+ AbsEvalDataLoader: Data loader object for that specific task.
114+ """
88115 data_loader = AbsEvalDataLoader (
89116 eval_name = self .eval_args .eval_name ,
90117 dataset_dir = self .eval_args .dataset_dir ,
@@ -95,6 +122,11 @@ def load_data_loader(self) -> AbsEvalDataLoader:
95122 return data_loader
96123
97124 def load_evaluator (self ) -> AbsEvaluator :
125+ """Load the evaluator for evaluation
126+
127+ Returns:
128+ AbsEvaluator: the evaluator to run the evaluation.
129+ """
98130 evaluator = AbsEvaluator (
99131 eval_name = self .eval_args .eval_name ,
100132 data_loader = self .data_loader ,
@@ -109,6 +141,18 @@ def evaluate_metrics(
109141 output_path : str = "./eval_dev_results.md" ,
110142 metrics : Union [str , List [str ]] = ["ndcg_at_10" , "recall_at_10" ]
111143 ):
144+ """Evaluate the provided metrics and write the results.
145+
146+ Args:
147+ search_results_save_dir (str): Path to save the search results.
148+ output_method (str, optional): Output results to `json` or `markdown`. Defaults to "markdown".
149+ output_path (str, optional): Path to write the output. Defaults to "./eval_dev_results.md".
150+ metrics (Union[str, List[str]], optional): metrics to use. Defaults to ["ndcg_at_10", "recall_at_10"].
151+
152+ Raises:
153+ FileNotFoundError: Eval results not found
154+ ValueError: Invalid output method
155+ """
112156 eval_results_dict = {}
113157 for model_name in sorted (os .listdir (search_results_save_dir )):
114158 model_search_results_save_dir = os .path .join (search_results_save_dir , model_name )
@@ -136,6 +180,9 @@ def evaluate_metrics(
136180 raise ValueError (f"Invalid output method: { output_method } . Available methods: ['json', 'markdown']" )
137181
138182 def run (self ):
183+ """
184+ Run the whole evaluation.
185+ """
139186 if self .eval_args .dataset_names is None :
140187 dataset_names = self .data_loader .available_dataset_names ()
141188 else :
0 commit comments