Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for FinMTEB benchmark #1379

Open
wants to merge 12 commits into
base: v2.0.0
Choose a base branch
from
1 change: 1 addition & 0 deletions docs/adding_a_dataset.md
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ The domains follow the categories used in the [Universal Dependencies project](h
| Religious | Religious text e.g. bibles |
| Blog | [Blogpost, weblog etc.](https://en.wikipedia.org/wiki/Blog) |
| Fiction | Works of [fiction](https://en.wikipedia.org/wiki/Fiction) |
| Finance | Financial documents, reports etc. |
| Government | Governmental communication, websites or similar |
| Legal | Legal documents, laws etc. |
| Medical | doctors notes, medical procedures or similar |
Expand Down
1 change: 0 additions & 1 deletion mteb/abstasks/AbsTaskClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ class AbsTaskClassification(AbsTask):
k: int = 3
train_split = "train"
sentence_column: str = "text"

def evaluate(
self,
model: Encoder,
Expand Down
4 changes: 3 additions & 1 deletion mteb/abstasks/TaskMetadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
"Constructed",
"Encyclopaedic",
"Fiction",
"Finance",
"Government",
"Legal",
"Medical",
Expand Down Expand Up @@ -207,6 +208,7 @@
"mpl-2.0",
"msr-la-nc",
"multiple",
"acm"
]
)

Expand Down Expand Up @@ -274,7 +276,7 @@ class TaskMetadata(BaseModel):
huggingface dataset contain different languages).
main_score: The main score used for evaluation.
date: The date when the data was collected. Specified as a tuple of two dates.
domains: The domains of the data. These includes "Non-fiction", "Social", "Fiction", "News", "Academic", "Blog", "Encyclopaedic",
domains: The domains of the data. These includes "Non-fiction", "Social", "Fiction", "Finance", "News", "Academic", "Blog", "Encyclopaedic",
"Government", "Legal", "Medical", "Poetry", "Religious", "Reviews", "Web", "Spoken", "Written". A dataset can belong to multiple domains.
task_subtypes: The subtypes of the task. E.g. includes "Sentiment/Hate speech", "Thematic Clustering". Feel free to update the list as needed.
license: The license of the data specified as lowercase, e.g. "cc-by-nc-4.0". If the license is not specified, use "not specified". For custom licenses a URL is used.
Expand Down
34 changes: 31 additions & 3 deletions mteb/tasks/Classification/eng/ESGClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,43 @@
class ESGClassification(AbsTaskClassification):
metadata = TaskMetadata(
name="ESGClassification",
description="A finance dataset performs sentence classification under the environmental, social, and corporate governance (ESG) framework.",
reference="https://arxiv.org/abs/2309.13064",
dataset={
"path": "FinanceMTEB/ESG",
"revision": "521d56feabadda80b11d6adcc6b335d4c5ad8285",
},
type="Classification",
description="A finance dataset performs sentence classification under the environmental, social, and corporate governance (ESG) framework.",
reference="https://arxiv.org/abs/2309.13064",
category="s2s",
modalities=["text"],
type="Classification",
eval_splits=["test"],
eval_langs=["eng-Latn"],
main_score="accuracy",
date=("2023-09-23", "2023-09-23"),
domains=["Finance"],
task_subtypes=[],
license="not specified",
annotations_creators="derived", # the annotations are a mix of derived, LM-generated and reviewed and expert-annotated. but derived is the predominant source.
bibtex_citation="""@misc{yang2023investlmlargelanguagemodel,
title={InvestLM: A Large Language Model for Investment using Financial Domain Instruction Tuning},
author={Yi Yang and Yixuan Tang and Kar Yan Tam},
year={2023},
eprint={2309.13064},
archivePrefix={arXiv},
primaryClass={q-fin.GN},
url={https://arxiv.org/abs/2309.13064},
}""",
descriptive_stats={
"num_samples": {"test": 1000},
"average_text_length": {"test": 170.817},
"unique_labels": {"test": 4},
"labels": {
"test": {
"2": {"count": 497},
"0": {"count": 190},
"3": {"count": 276},
"1": {"count": 37},
}
},
},
)
24 changes: 21 additions & 3 deletions mteb/tasks/Classification/eng/FLSClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,33 @@
class FLSClassification(AbsTaskClassification):
metadata = TaskMetadata(
name="FLSClassification",
description="A finance dataset detects whether the sentence is a forward-looking statement.",
reference="https://arxiv.org/abs/2309.13064",
dataset={
"path": "FinanceMTEB/FLS",
"revision": "39b6719f1d7197df4498fea9fce20d4ad782a083",
},
type="Classification",
description="A finance dataset detects whether the sentence is a forward-looking statement.",
reference="https://arxiv.org/abs/2309.13064",
category="s2s",
modalities=["text"],
type="Classification",
eval_splits=["test"],
eval_langs=["eng-Latn"],
main_score="accuracy",
date=("2023-09-23", "2023-09-23"),
domains=["Finance"],
task_subtypes=[],
license="not specified",
annotations_creators="derived",
descriptive_stats={
"num_samples": {"test": 1000},
"average_text_length": {"test": 187.923},
"unique_labels": {"test": 3},
"labels": {
"test": {
"2": {"count": 292},
"1": {"count": 539},
"0": {"count": 169},
}
},
},
Comment on lines +27 to +38
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now, the descriptive statistics are saved as JSON and do not need to be added in this way.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah gotcha. let me rework this.

thanks for the heads-up!

)
24 changes: 21 additions & 3 deletions mteb/tasks/Classification/eng/FOMCClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,35 @@
class FOMCClassification(AbsTaskClassification):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

General comment: Meta data is required to be filled out.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah understood. Working on it.

metadata = TaskMetadata(
name="FOMCClassification",
description="A task of hawkish-dovish classification in finance domain.",
reference="https://github.com/gtfintechlab/fomc-hawkish-dovish",
dataset={
"path": "FinanceMTEB/FOMC",
"revision": "cdaf1306a24bc5e7441c7c871343efdf4c721bc2",
},
type="Classification",
description="A task of hawkish-dovish classification in finance domain.",
reference="https://github.com/gtfintechlab/fomc-hawkish-dovish",
category="s2s",
modalities=["text"],
type="Classification",
eval_splits=["test"],
eval_langs=["eng-Latn"],
main_score="accuracy",
date=("1996-01-01", "2022-10-15"),
domains=["Finance"],
task_subtypes=[],
license="cc-by-nc-4.0",
annotations_creators="human-annotated",
descriptive_stats={
"num_samples": {"test": 1000},
"average_text_length": {"test": 199.403},
"unique_labels": {"test": 3},
"labels": {
"test": {
"1": {"count": 263},
"2": {"count": 466},
"0": {"count": 271},
}
},
},
)

def dataset_transform(self):
Expand Down
18 changes: 15 additions & 3 deletions mteb/tasks/Classification/eng/FiQAClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,27 @@
class FiQAClassification(AbsTaskClassification):
metadata = TaskMetadata(
name="FiQAClassification",
description="Polar sentiment dataset of sentences from financial news, categorized by sentiment into positive, negative, or neutral.",
reference="https://sites.google.com/view/fiqa/home",
dataset={
"path": "FinanceMTEB/FiQA_ABSA",
"revision": "afa907ab4c6441afb8ee70bd99802bb707d3d2ab",
},
type="Classification",
description="Polar sentiment dataset of sentences from financial news, categorized by sentiment into positive, negative, or neutral.",
reference="https://sites.google.com/view/fiqa/home",
category="s2s",
modalities=["text"],
type="Classification",
eval_splits=["test"],
eval_langs=["eng-Latn"],
main_score="accuracy",
date=("2018-04-23", "2018-04-27"),
domains=["Finance"],
task_subtypes=[],
license="not specified",
annotations_creators="human-annotated",
descriptive_stats={
"num_samples": {"test": 352},
"average_text_length": {"test": 140.9005681818182},
"unique_labels": {"test": 2},
"labels": {"test": {"1": {"count": 236}, "0": {"count": 116}}},
},
)
22 changes: 19 additions & 3 deletions mteb/tasks/Classification/eng/FinSentClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,31 @@
class FinSentClassification(AbsTaskClassification):
metadata = TaskMetadata(
name="FinSentClassification",
description="Polar sentiment dataset of sentences from financial news, categorized by sentiment into positive, negative, or neutral.",
reference="https://finsent.hkust.edu.hk/",
dataset={
"path": "FinanceMTEB/FinSent",
"revision": "68ee0f0abf596e371ef6a308f685071e3b737bbb",
},
type="Classification",
description="Polar sentiment dataset of sentences from financial news, categorized by sentiment into positive, negative, or neutral.",
reference="https://finsent.hkust.edu.hk/",
category="s2s",
modalities=["text"],
type="Classification",
eval_splits=["test"],
eval_langs=["eng-Latn"],
main_score="accuracy",
date=("2023-09-23", "2023-09-23"),
domains=["Finance"],
task_subtypes=[],
license="not specified",
annotations_creators="derived", # the annotations are a mix of derived, LM-generated and reviewed and expert-annotated. but derived is the predominant source.
descriptive_stats={
"num_samples": {"test": 1000},
"average_text_length": {"test": 138.939},
"unique_labels": {"test": 3},
"labels": {
"0": {"test": {"count": 465}},
"1": {"test": {"count": 358}},
"2": {"test": {"count": 177}},
},
},
)
22 changes: 22 additions & 0 deletions mteb/tasks/Classification/eng/FinancialFraudClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,31 @@ class FinancialFraudClassification(AbsTaskClassification):
},
type="Classification",
category="s2s",
modalities=["text"],
eval_splits=["test"],
eval_langs=["eng-Latn"],
main_score="accuracy",
date=("1999-01-01", "2019-12-31"),
domains=["Finance"],
task_subtypes=[],
license="mit",
annotations_creators="derived",
bibtex_citation="""@mastersthesis{kedia2023enhancing,
author = {Kedia, Amit Shushil},
title = {Enhancing Financial Fraud Detection: A Comparative Analysis of Large Language Models and Traditional Machine Learning and Deep Learning Approaches},
school = {Brunel University London},
year = {2023},
address = {Uxbridge, Middlesex UB8 3PH, United Kingdom},
type = {MSc Thesis},
department = {Department of Computer Science},
program = {MSc Data Science and Analytics}
}""",
descriptive_stats={
"num_samples": {"test": 51},
"average_text_length": {"test": 1096025.2156862745},
"unique_labels": {"test": 2},
"labels": {"test": {"0": {"count": 32}, "1": {"count": 19}}},
},
)

def dataset_transform(self):
Expand Down
21 changes: 0 additions & 21 deletions mteb/tasks/Classification/eng/FinancialPhraseBankClassification.py

This file was deleted.

24 changes: 21 additions & 3 deletions mteb/tasks/Classification/eng/SemEva2017Classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,35 @@
class SemEva2017Classification(AbsTaskClassification):
metadata = TaskMetadata(
name="SemEva2017Classification",
description="Polar sentiment dataset of sentences from financial news, categorized by sentiment into positive, negative, or neutral.",
reference="https://alt.qcri.org/semeval2017/task5/",
dataset={
"path": "FinanceMTEB/SemEva2017_Headline",
"revision": "f0e198ba04c23d949ef803ce32ee1e4f2d8d3696",
},
type="Classification",
description="Polar sentiment dataset of sentences from financial news, categorized by sentiment into positive, negative, or neutral.",
reference="https://alt.qcri.org/semeval2017/task5/",
category="s2s",
modalities=["text"],
type="Classification",
eval_splits=["test"],
eval_langs=["eng-Latn"],
main_score="accuracy",
date=("2016-07-01", "2017-12-31"),
domains=["Finance"],
task_subtypes=[],
license="cc-by-4.0",
annotations_creators="expert-annotated",
descriptive_stats={
"num_samples": {"test": 343},
"average_text_length": {"test": 59.80466472303207},
"unique_labels": {"test": 3},
"labels": {
"test": {
"0": {"count": 122},
"2": {"count": 204},
"1": {"count": 17},
}
},
},
)

def dataset_transform(self):
Expand Down
28 changes: 28 additions & 0 deletions mteb/tasks/Classification/zho/FinChinaSentimentClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,37 @@ class FinChinaSentimentClassification(AbsTaskClassification):
},
type="Classification",
category="s2s",
modalities=["text"],
eval_splits=["test"],
eval_langs=["cmn-Hans"],
main_score="accuracy",
date=("2023-06-23", "2023-09-15"),
domains=["Finance"],
license="apache-2.0",
annotations_creators="expert-annotated",
dialect=[],
bibtex_citation="""@misc{lu2023bbtfincomprehensiveconstructionchinese,
title={BBT-Fin: Comprehensive Construction of Chinese Financial Domain Pre-trained Language Model, Corpus and Benchmark},
author={Dakuan Lu and Hengkui Wu and Jiaqing Liang and Yipei Xu and Qianyu He and Yipeng Geng and Mengkun Han and Yingsi Xin and Yanghua Xiao},
year={2023},
eprint={2302.09432},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2302.09432},
}""",
descriptive_stats={
"num_samples": {"test": 1000},
"average_text_length": {"test": 1202.622},
"unique_labels": {"test": 4},
"labels": {
"test": {
"-1": {"count": 762},
"-2": {"count": 118},
"0": {"count": 102},
"-3": {"count": 18},
}
},
},
)

def dataset_transform(self):
Expand Down
26 changes: 26 additions & 0 deletions mteb/tasks/Classification/zho/FinFEClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,35 @@ class FinFEClassification(AbsTaskClassification):
},
type="Classification",
category="s2s",
modalities=["text"],
eval_splits=["test"],
eval_langs=["cmn-Hans"],
main_score="accuracy",
date=("2023-06-23", "2023-09-15"),
domains=["Finance"],
license="apache-2.0",
annotations_creators="expert-annotated",
bibtex_citation="""@misc{lu2023bbtfincomprehensiveconstructionchinese,
title={BBT-Fin: Comprehensive Construction of Chinese Financial Domain Pre-trained Language Model, Corpus and Benchmark},
author={Dakuan Lu and Hengkui Wu and Jiaqing Liang and Yipei Xu and Qianyu He and Yipeng Geng and Mengkun Han and Yingsi Xin and Yanghua Xiao},
year={2023},
eprint={2302.09432},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2302.09432},
}""",
descriptive_stats={
"num_samples": {"test": 1000},
"average_text_length": {"test": 20.767},
"unique_labels": {"test": 3},
"labels": {
"test": {
"0": {"count": 287},
"2": {"count": 462},
"1": {"count": 251},
}
},
},
)

def dataset_transform(self):
Expand Down
Loading
Loading