Skip to content

Commit 2bfc922

Browse files
committed
update C-MTEB matcing MTEB new structure
1 parent 0abb8af commit 2bfc922

6 files changed

Lines changed: 1594 additions & 607 deletions

File tree

Lines changed: 281 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -1,101 +1,304 @@
1-
from mteb import AbsTaskClassification
1+
from __future__ import annotations
2+
3+
from mteb.abstasks.AbsTaskClassification import AbsTaskClassification
4+
from mteb.abstasks.TaskMetadata import TaskMetadata
5+
26

37
class TNews(AbsTaskClassification):
8+
metadata = TaskMetadata(
9+
name="TNews",
10+
description="Short Text Classification for News",
11+
reference="https://www.cluebenchmarks.com/introduce.html",
12+
dataset={
13+
"path": "C-MTEB/TNews-classification",
14+
"revision": "317f262bf1e6126357bbe89e875451e4b0938fe4",
15+
},
16+
type="Classification",
17+
category="s2s",
18+
modalities=["text"],
19+
eval_splits=["validation"],
20+
eval_langs=["cmn-Hans"],
21+
main_score="accuracy",
22+
date=None,
23+
domains=None,
24+
task_subtypes=None,
25+
license=None,
26+
annotations_creators=None,
27+
dialect=None,
28+
sample_creation=None,
29+
bibtex_citation="""@inproceedings {xu-etal-2020-clue,
30+
title = "{CLUE}: A {C}hinese Language Understanding Evaluation Benchmark",
31+
author = "Xu, Liang and
32+
Hu, Hai and
33+
Zhang, Xuanwei and
34+
Li, Lu and
35+
Cao, Chenjie and
36+
Li, Yudong and
37+
Xu, Yechen and
38+
Sun, Kai and
39+
Yu, Dian and
40+
Yu, Cong and
41+
Tian, Yin and
42+
Dong, Qianqian and
43+
Liu, Weitang and
44+
Shi, Bo and
45+
Cui, Yiming and
46+
Li, Junyi and
47+
Zeng, Jun and
48+
Wang, Rongzhao and
49+
Xie, Weijian and
50+
Li, Yanting and
51+
Patterson, Yina and
52+
Tian, Zuoyu and
53+
Zhang, Yiwen and
54+
Zhou, He and
55+
Liu, Shaoweihua and
56+
Zhao, Zhe and
57+
Zhao, Qipeng and
58+
Yue, Cong and
59+
Zhang, Xinrui and
60+
Yang, Zhengliang and
61+
Richardson, Kyle and
62+
Lan, Zhenzhong ",
63+
booktitle = "Proceedings of the 28th International Conference on Computational Linguistics",
64+
month = dec,
65+
year = "2020",
66+
address = "Barcelona, Spain (Online)",
67+
publisher = "International Committee on Computational Linguistics",
68+
url = "https://aclanthology.org/2020.coling-main.419",
69+
doi = "10.18653/v1/2020.coling-main.419",
70+
pages = "4762--4772",
71+
}""",
72+
descriptive_stats={"n_samples": None, "avg_character_length": None},
73+
)
74+
475
@property
5-
def description(self):
6-
return {
7-
'name': 'TNews',
8-
'hf_hub_name': 'C-MTEB/TNews-classification',
9-
'description': 'Short Text Classification for News',
10-
"reference": "https://www.cluebenchmarks.com/introduce.html",
11-
'type': 'Classification',
12-
'category': 's2s',
13-
'eval_splits': ['validation'],
14-
'eval_langs': ['zh'],
15-
'main_score': 'accuracy',
16-
'samples_per_label': 32,
17-
}
76+
def metadata_dict(self) -> dict[str, str]:
77+
metadata_dict = super().metadata_dict
78+
metadata_dict["samples_per_label"] = 32
79+
return metadata_dict
1880

1981

2082
class IFlyTek(AbsTaskClassification):
83+
metadata = TaskMetadata(
84+
name="IFlyTek",
85+
description="Long Text classification for the description of Apps",
86+
reference="https://www.cluebenchmarks.com/introduce.html",
87+
dataset={
88+
"path": "C-MTEB/IFlyTek-classification",
89+
"revision": "421605374b29664c5fc098418fe20ada9bd55f8a",
90+
},
91+
type="Classification",
92+
category="s2s",
93+
modalities=["text"],
94+
eval_splits=["validation"],
95+
eval_langs=["cmn-Hans"],
96+
main_score="accuracy",
97+
date=None,
98+
domains=None,
99+
task_subtypes=None,
100+
license=None,
101+
annotations_creators=None,
102+
dialect=None,
103+
sample_creation=None,
104+
bibtex_citation="""@inproceedings {xu-etal-2020-clue,
105+
title = "{CLUE}: A {C}hinese Language Understanding Evaluation Benchmark",
106+
author = "Xu, Liang and
107+
Hu, Hai and
108+
Zhang, Xuanwei and
109+
Li, Lu and
110+
Cao, Chenjie and
111+
Li, Yudong and
112+
Xu, Yechen and
113+
Sun, Kai and
114+
Yu, Dian and
115+
Yu, Cong and
116+
Tian, Yin and
117+
Dong, Qianqian and
118+
Liu, Weitang and
119+
Shi, Bo and
120+
Cui, Yiming and
121+
Li, Junyi and
122+
Zeng, Jun and
123+
Wang, Rongzhao and
124+
Xie, Weijian and
125+
Li, Yanting and
126+
Patterson, Yina and
127+
Tian, Zuoyu and
128+
Zhang, Yiwen and
129+
Zhou, He and
130+
Liu, Shaoweihua and
131+
Zhao, Zhe and
132+
Zhao, Qipeng and
133+
Yue, Cong and
134+
Zhang, Xinrui and
135+
Yang, Zhengliang and
136+
Richardson, Kyle and
137+
Lan, Zhenzhong ",
138+
booktitle = "Proceedings of the 28th International Conference on Computational Linguistics",
139+
month = dec,
140+
year = "2020",
141+
address = "Barcelona, Spain (Online)",
142+
publisher = "International Committee on Computational Linguistics",
143+
url = "https://aclanthology.org/2020.coling-main.419",
144+
doi = "10.18653/v1/2020.coling-main.419",
145+
pages = "4762--4772",
146+
abstract = "The advent of natural language understanding (NLU) benchmarks for English, such as GLUE and SuperGLUE allows new NLU models to be evaluated across a diverse set of tasks. These comprehensive benchmarks have facilitated a broad range of research and applications in natural language processing (NLP). The problem, however, is that most such benchmarks are limited to English, which has made it difficult to replicate many of the successes in English NLU for other languages. To help remedy this issue, we introduce the first large-scale Chinese Language Understanding Evaluation (CLUE) benchmark. CLUE is an open-ended, community-driven project that brings together 9 tasks spanning several well-established single-sentence/sentence-pair classification tasks, as well as machine reading comprehension, all on original Chinese text. To establish results on these tasks, we report scores using an exhaustive set of current state-of-the-art pre-trained Chinese models (9 in total). We also introduce a number of supplementary datasets and additional tools to help facilitate further progress on Chinese NLU. Our benchmark is released at https://www.cluebenchmarks.com",
147+
}""",
148+
descriptive_stats={"n_samples": None, "avg_character_length": None},
149+
)
150+
21151
@property
22-
def description(self):
23-
return {
24-
'name': 'IFlyTek',
25-
'hf_hub_name': 'C-MTEB/IFlyTek-classification',
26-
'description': 'Long Text classification for the description of Apps',
27-
"reference": "https://www.cluebenchmarks.com/introduce.html",
28-
'type': 'Classification',
29-
'category': 's2s',
30-
'eval_splits': ['validation'],
31-
'eval_langs': ['zh'],
32-
'main_score': 'accuracy',
33-
'samples_per_label': 32,
34-
'n_experiments': 5
35-
}
152+
def metadata_dict(self) -> dict[str, str]:
153+
metadata_dict = super().metadata_dict
154+
metadata_dict["samples_per_label"] = 32
155+
metadata_dict["n_experiments"] = 5
156+
return metadata_dict
36157

37158

38159
class MultilingualSentiment(AbsTaskClassification):
39-
@property
40-
def description(self):
41-
return {
42-
'name': 'MultilingualSentiment',
43-
'hf_hub_name': 'C-MTEB/MultilingualSentiment-classification',
44-
'description': 'A collection of multilingual sentiments datasets grouped into 3 classes -- positive, neutral, negative',
45-
"reference": "https://github.com/tyqiangz/multilingual-sentiment-datasets",
46-
'category': 's2s',
47-
'type': 'Classification',
48-
'eval_splits': ['validation'],
49-
'eval_langs': ['zh'],
50-
'main_score': 'accuracy',
51-
'samples_per_label': 32,
52-
}
160+
metadata = TaskMetadata(
161+
name="MultilingualSentiment",
162+
description="A collection of multilingual sentiments datasets grouped into 3 classes -- positive, neutral, negative",
163+
reference="https://github.com/tyqiangz/multilingual-sentiment-datasets",
164+
dataset={
165+
"path": "C-MTEB/MultilingualSentiment-classification",
166+
"revision": "46958b007a63fdbf239b7672c25d0bea67b5ea1a",
167+
},
168+
type="Classification",
169+
category="s2s",
170+
modalities=["text"],
171+
eval_splits=["validation", "test"],
172+
eval_langs=["cmn-Hans"],
173+
main_score="accuracy",
174+
date=None,
175+
domains=None,
176+
task_subtypes=None,
177+
license=None,
178+
annotations_creators=None,
179+
dialect=None,
180+
sample_creation=None,
181+
bibtex_citation=None,
182+
descriptive_stats={"n_samples": None, "avg_character_length": None},
183+
)
53184

185+
@property
186+
def metadata_dict(self) -> dict[str, str]:
187+
metadata_dict = super().metadata_dict
188+
metadata_dict["samples_per_label"] = 32
189+
return metadata_dict
54190

55191

56192
class JDReview(AbsTaskClassification):
193+
metadata = TaskMetadata(
194+
name="JDReview",
195+
description="review for iphone",
196+
reference="https://aclanthology.org/2023.nodalida-1.20/",
197+
dataset={
198+
"path": "C-MTEB/JDReview-classification",
199+
"revision": "b7c64bd89eb87f8ded463478346f76731f07bf8b",
200+
},
201+
type="Classification",
202+
category="s2s",
203+
modalities=["text"],
204+
eval_splits=["test"],
205+
eval_langs=["cmn-Hans"],
206+
main_score="accuracy",
207+
date=None,
208+
domains=None,
209+
task_subtypes=None,
210+
license=None,
211+
annotations_creators=None,
212+
dialect=None,
213+
sample_creation=None,
214+
bibtex_citation="""@article{xiao2023c,
215+
title={C-pack: Packaged resources to advance general chinese embedding},
216+
author={Xiao, Shitao and Liu, Zheng and Zhang, Peitian and Muennighof, Niklas},
217+
journal={arXiv preprint arXiv:2309.07597},
218+
year={2023}
219+
}""",
220+
descriptive_stats={"n_samples": None, "avg_character_length": None},
221+
)
222+
57223
@property
58-
def description(self):
59-
return {
60-
'name': 'JDReview',
61-
'hf_hub_name': 'C-MTEB/JDReview-classification',
62-
'description': 'review for iphone',
63-
'category': 's2s',
64-
'type': 'Classification',
65-
'eval_splits': ['test'],
66-
'eval_langs': ['zh'],
67-
'main_score': 'accuracy',
68-
'samples_per_label': 32,
69-
}
224+
def metadata_dict(self) -> dict[str, str]:
225+
metadata_dict = super().metadata_dict
226+
metadata_dict["samples_per_label"] = 32
227+
return metadata_dict
70228

71229

72230
class OnlineShopping(AbsTaskClassification):
231+
metadata = TaskMetadata(
232+
name="OnlineShopping",
233+
description="Sentiment Analysis of User Reviews on Online Shopping Websites",
234+
reference="https://aclanthology.org/2023.nodalida-1.20/",
235+
dataset={
236+
"path": "C-MTEB/OnlineShopping-classification",
237+
"revision": "e610f2ebd179a8fda30ae534c3878750a96db120",
238+
},
239+
type="Classification",
240+
category="s2s",
241+
modalities=["text"],
242+
eval_splits=["test"],
243+
eval_langs=["cmn-Hans"],
244+
main_score="accuracy",
245+
date=None,
246+
domains=None,
247+
task_subtypes=None,
248+
license=None,
249+
annotations_creators=None,
250+
dialect=None,
251+
sample_creation=None,
252+
bibtex_citation="""@article{xiao2023c,
253+
title={C-pack: Packaged resources to advance general chinese embedding},
254+
author={Xiao, Shitao and Liu, Zheng and Zhang, Peitian and Muennighof, Niklas},
255+
journal={arXiv preprint arXiv:2309.07597},
256+
year={2023}
257+
}""",
258+
descriptive_stats={"n_samples": None, "avg_character_length": None},
259+
)
260+
73261
@property
74-
def description(self):
75-
return {
76-
'name': 'OnlineShopping',
77-
'hf_hub_name': 'C-MTEB/OnlineShopping-classification',
78-
'description': 'Sentiment Analysis of User Reviews on Online Shopping Websites',
79-
'category': 's2s',
80-
'type': 'Classification',
81-
'eval_splits': ['test'],
82-
'eval_langs': ['zh'],
83-
'main_score': 'accuracy',
84-
'samples_per_label': 32,
85-
}
262+
def metadata_dict(self) -> dict[str, str]:
263+
metadata_dict = super().metadata_dict
264+
metadata_dict["samples_per_label"] = 32
265+
return metadata_dict
86266

87267

88268
class Waimai(AbsTaskClassification):
269+
metadata = TaskMetadata(
270+
name="Waimai",
271+
description="Sentiment Analysis of user reviews on takeaway platforms",
272+
reference="https://aclanthology.org/2023.nodalida-1.20/",
273+
dataset={
274+
"path": "C-MTEB/waimai-classification",
275+
"revision": "339287def212450dcaa9df8c22bf93e9980c7023",
276+
},
277+
type="Classification",
278+
category="s2s",
279+
modalities=["text"],
280+
eval_splits=["test"],
281+
eval_langs=["cmn-Hans"],
282+
main_score="accuracy",
283+
date=None,
284+
domains=None,
285+
task_subtypes=None,
286+
license=None,
287+
annotations_creators=None,
288+
dialect=None,
289+
sample_creation=None,
290+
bibtex_citation="""@article{xiao2023c,
291+
title={C-pack: Packaged resources to advance general chinese embedding},
292+
author={Xiao, Shitao and Liu, Zheng and Zhang, Peitian and Muennighof, Niklas},
293+
journal={arXiv preprint arXiv:2309.07597},
294+
year={2023}
295+
}""",
296+
descriptive_stats={"n_samples": None, "avg_character_length": None},
297+
)
298+
89299
@property
90-
def description(self):
91-
return {
92-
'name': 'Waimai',
93-
'hf_hub_name': 'C-MTEB/waimai-classification',
94-
'description': 'Sentiment Analysis of user reviews on takeaway platforms',
95-
'category': 's2s',
96-
'type': 'Classification',
97-
'eval_splits': ['test'],
98-
'eval_langs': ['zh'],
99-
'main_score': 'accuracy',
100-
'samples_per_label': 32,
101-
}
300+
def metadata_dict(self) -> dict[str, str]:
301+
metadata_dict = super().metadata_dict
302+
metadata_dict["samples_per_label"] = 32
303+
304+
return metadata_dict

0 commit comments

Comments
 (0)