embeddings-benchmark · alt-glitch · Nov 4, 2024 · Nov 4, 2024 · Nov 4, 2024 · Nov 7, 2024
diff --git a/docs/adding_a_dataset.md b/docs/adding_a_dataset.md
@@ -180,6 +180,7 @@ The domains follow the categories used in the [Universal Dependencies project](h
 | Religious     | Religious text e.g. bibles                                       |
 | Blog          | [Blogpost, weblog etc.](https://en.wikipedia.org/wiki/Blog)      |
 | Fiction       | Works of [fiction](https://en.wikipedia.org/wiki/Fiction)        |
+| Finance       | Financial documents, reports etc.                                |
 | Government    | Governmental communication, websites or similar                  |
 | Legal         | Legal documents, laws etc.                                       |
 | Medical       | doctors notes, medical procedures or similar                     |

diff --git a/mteb/abstasks/AbsTaskClassification.py b/mteb/abstasks/AbsTaskClassification.py
@@ -76,7 +76,6 @@ class AbsTaskClassification(AbsTask):
     k: int = 3
     train_split = "train"
     sentence_column: str = "text"
-
     def evaluate(
         self,
         model: Encoder,

diff --git a/mteb/abstasks/TaskMetadata.py b/mteb/abstasks/TaskMetadata.py
@@ -68,6 +68,7 @@
     "Constructed",
     "Encyclopaedic",
     "Fiction",
+    "Finance",
     "Government",
     "Legal",
     "Medical",
@@ -207,6 +208,7 @@
         "mpl-2.0",
         "msr-la-nc",
         "multiple",
+        "acm"
     ]
 )
 
@@ -274,7 +276,7 @@ class TaskMetadata(BaseModel):
             huggingface dataset contain different languages).
         main_score: The main score used for evaluation.
         date: The date when the data was collected. Specified as a tuple of two dates.
-        domains: The domains of the data. These includes "Non-fiction", "Social", "Fiction", "News", "Academic", "Blog", "Encyclopaedic",
+        domains: The domains of the data. These includes "Non-fiction", "Social", "Fiction", "Finance", "News", "Academic", "Blog", "Encyclopaedic",
             "Government", "Legal", "Medical", "Poetry", "Religious", "Reviews", "Web", "Spoken", "Written". A dataset can belong to multiple domains.
         task_subtypes: The subtypes of the task. E.g. includes "Sentiment/Hate speech", "Thematic Clustering". Feel free to update the list as needed.
         license: The license of the data specified as lowercase, e.g. "cc-by-nc-4.0". If the license is not specified, use "not specified". For custom licenses a URL is used.

diff --git a/mteb/tasks/Classification/eng/ESGClassification.py b/mteb/tasks/Classification/eng/ESGClassification.py
@@ -7,15 +7,43 @@
 class ESGClassification(AbsTaskClassification):
     metadata = TaskMetadata(
         name="ESGClassification",
-        description="A finance dataset performs sentence classification under the environmental, social, and corporate governance (ESG) framework.",
-        reference="https://arxiv.org/abs/2309.13064",
         dataset={
             "path": "FinanceMTEB/ESG",
             "revision": "521d56feabadda80b11d6adcc6b335d4c5ad8285",
         },
-        type="Classification",
+        description="A finance dataset performs sentence classification under the environmental, social, and corporate governance (ESG) framework.",
+        reference="https://arxiv.org/abs/2309.13064",
         category="s2s",
+        modalities=["text"],
+        type="Classification",
         eval_splits=["test"],
         eval_langs=["eng-Latn"],
         main_score="accuracy",
+        date=("2023-09-23", "2023-09-23"),
+        domains=["Finance"],
+        task_subtypes=[],
+        license="not specified",
+        annotations_creators="derived",  # the annotations are a mix of derived, LM-generated and reviewed and expert-annotated. but derived is the predominant source.
+        bibtex_citation="""@misc{yang2023investlmlargelanguagemodel,
+              title={InvestLM: A Large Language Model for Investment using Financial Domain Instruction Tuning},
+              author={Yi Yang and Yixuan Tang and Kar Yan Tam},
+              year={2023},
+              eprint={2309.13064},
+              archivePrefix={arXiv},
+              primaryClass={q-fin.GN},
+              url={https://arxiv.org/abs/2309.13064},
+        }""",
+        descriptive_stats={
+            "num_samples": {"test": 1000},
+            "average_text_length": {"test": 170.817},
+            "unique_labels": {"test": 4},
+            "labels": {
+                "test": {
+                    "2": {"count": 497},
+                    "0": {"count": 190},
+                    "3": {"count": 276},
+                    "1": {"count": 37},
+                }
+            },
+        },
     )
diff --git a/mteb/tasks/Classification/eng/FLSClassification.py b/mteb/tasks/Classification/eng/FLSClassification.py
@@ -7,15 +7,33 @@
 class FLSClassification(AbsTaskClassification):
     metadata = TaskMetadata(
         name="FLSClassification",
-        description="A finance dataset detects whether the sentence is a forward-looking statement.",
-        reference="https://arxiv.org/abs/2309.13064",
         dataset={
             "path": "FinanceMTEB/FLS",
             "revision": "39b6719f1d7197df4498fea9fce20d4ad782a083",
         },
-        type="Classification",
+        description="A finance dataset detects whether the sentence is a forward-looking statement.",
+        reference="https://arxiv.org/abs/2309.13064",
         category="s2s",
+        modalities=["text"],
+        type="Classification",
         eval_splits=["test"],
         eval_langs=["eng-Latn"],
         main_score="accuracy",
+        date=("2023-09-23", "2023-09-23"),
+        domains=["Finance"],
+        task_subtypes=[],
+        license="not specified",
+        annotations_creators="derived",
+        descriptive_stats={
+            "num_samples": {"test": 1000},
+            "average_text_length": {"test": 187.923},
+            "unique_labels": {"test": 3},
+            "labels": {
+                "test": {
+                    "2": {"count": 292},
+                    "1": {"count": 539},
+                    "0": {"count": 169},
+                }
+            },
+        },
     )
diff --git a/mteb/tasks/Classification/eng/FOMCClassification.py b/mteb/tasks/Classification/eng/FOMCClassification.py
@@ -7,17 +7,35 @@
 class FOMCClassification(AbsTaskClassification):
     metadata = TaskMetadata(
         name="FOMCClassification",
-        description="A task of hawkish-dovish classification in finance domain.",
-        reference="https://github.com/gtfintechlab/fomc-hawkish-dovish",
         dataset={
             "path": "FinanceMTEB/FOMC",
             "revision": "cdaf1306a24bc5e7441c7c871343efdf4c721bc2",
         },
-        type="Classification",
+        description="A task of hawkish-dovish classification in finance domain.",
+        reference="https://github.com/gtfintechlab/fomc-hawkish-dovish",
         category="s2s",
+        modalities=["text"],
+        type="Classification",
         eval_splits=["test"],
         eval_langs=["eng-Latn"],
         main_score="accuracy",
+        date=("1996-01-01", "2022-10-15"),
+        domains=["Finance"],
+        task_subtypes=[],
+        license="cc-by-nc-4.0",
+        annotations_creators="human-annotated",
+        descriptive_stats={
+            "num_samples": {"test": 1000},
+            "average_text_length": {"test": 199.403},
+            "unique_labels": {"test": 3},
+            "labels": {
+                "test": {
+                    "1": {"count": 263},
+                    "2": {"count": 466},
+                    "0": {"count": 271},
+                }
+            },
+        },
     )
 
     def dataset_transform(self):

diff --git a/mteb/tasks/Classification/eng/FiQAClassification.py b/mteb/tasks/Classification/eng/FiQAClassification.py
@@ -7,15 +7,27 @@
 class FiQAClassification(AbsTaskClassification):
     metadata = TaskMetadata(
         name="FiQAClassification",
-        description="Polar sentiment dataset of sentences from financial news, categorized by sentiment into positive, negative, or neutral.",
-        reference="https://sites.google.com/view/fiqa/home",
         dataset={
             "path": "FinanceMTEB/FiQA_ABSA",
             "revision": "afa907ab4c6441afb8ee70bd99802bb707d3d2ab",
         },
-        type="Classification",
+        description="Polar sentiment dataset of sentences from financial news, categorized by sentiment into positive, negative, or neutral.",
+        reference="https://sites.google.com/view/fiqa/home",
         category="s2s",
+        modalities=["text"],
+        type="Classification",
         eval_splits=["test"],
         eval_langs=["eng-Latn"],
         main_score="accuracy",
+        date=("2018-04-23", "2018-04-27"),
+        domains=["Finance"],
+        task_subtypes=[],
+        license="not specified",
+        annotations_creators="human-annotated",
+        descriptive_stats={
+            "num_samples": {"test": 352},
+            "average_text_length": {"test": 140.9005681818182},
+            "unique_labels": {"test": 2},
+            "labels": {"test": {"1": {"count": 236}, "0": {"count": 116}}},
+        },
     )
diff --git a/mteb/tasks/Classification/eng/FinSentClassification.py b/mteb/tasks/Classification/eng/FinSentClassification.py
@@ -7,15 +7,31 @@
 class FinSentClassification(AbsTaskClassification):
     metadata = TaskMetadata(
         name="FinSentClassification",
-        description="Polar sentiment dataset of sentences from financial news, categorized by sentiment into positive, negative, or neutral.",
-        reference="https://finsent.hkust.edu.hk/",
         dataset={
             "path": "FinanceMTEB/FinSent",
             "revision": "68ee0f0abf596e371ef6a308f685071e3b737bbb",
         },
-        type="Classification",
+        description="Polar sentiment dataset of sentences from financial news, categorized by sentiment into positive, negative, or neutral.",
+        reference="https://finsent.hkust.edu.hk/",
         category="s2s",
+        modalities=["text"],
+        type="Classification",
         eval_splits=["test"],
         eval_langs=["eng-Latn"],
         main_score="accuracy",
+        date=("2023-09-23", "2023-09-23"),
+        domains=["Finance"],
+        task_subtypes=[],
+        license="not specified",
+        annotations_creators="derived", # the annotations are a mix of derived, LM-generated and reviewed and expert-annotated. but derived is the predominant source.
+        descriptive_stats={
+            "num_samples": {"test": 1000},
+            "average_text_length": {"test": 138.939},
+            "unique_labels": {"test": 3},
+            "labels": {
+                "0": {"test": {"count": 465}},
+                "1": {"test": {"count": 358}},
+                "2": {"test": {"count": 177}},
+            },
+        },
     )
diff --git a/mteb/tasks/Classification/eng/FinancialFraudClassification.py b/mteb/tasks/Classification/eng/FinancialFraudClassification.py
@@ -15,9 +15,31 @@ class FinancialFraudClassification(AbsTaskClassification):
         },
         type="Classification",
         category="s2s",
+        modalities=["text"],
         eval_splits=["test"],
         eval_langs=["eng-Latn"],
         main_score="accuracy",
+        date=("1999-01-01", "2019-12-31"),
+        domains=["Finance"],
+        task_subtypes=[],
+        license="mit",
+        annotations_creators="derived",
+        bibtex_citation="""@mastersthesis{kedia2023enhancing,
+            author = {Kedia, Amit Shushil},
+            title = {Enhancing Financial Fraud Detection: A Comparative Analysis of Large Language Models and Traditional Machine Learning and Deep Learning Approaches},
+            school = {Brunel University London},
+            year = {2023},
+            address = {Uxbridge, Middlesex UB8 3PH, United Kingdom},
+            type = {MSc Thesis},
+            department = {Department of Computer Science},
+            program = {MSc Data Science and Analytics}
+        }""",
+        descriptive_stats={
+            "num_samples": {"test": 51},
+            "average_text_length": {"test": 1096025.2156862745},
+            "unique_labels": {"test": 2},
+            "labels": {"test": {"0": {"count": 32}, "1": {"count": 19}}},
+        },
     )
 
     def dataset_transform(self):

diff --git a/mteb/tasks/Classification/eng/FinancialPhraseBankClassification.py b/mteb/tasks/Classification/eng/FinancialPhraseBankClassification.py
diff --git a/mteb/tasks/Classification/eng/SemEva2017Classification.py b/mteb/tasks/Classification/eng/SemEva2017Classification.py
@@ -7,17 +7,35 @@
 class SemEva2017Classification(AbsTaskClassification):
     metadata = TaskMetadata(
         name="SemEva2017Classification",
-        description="Polar sentiment dataset of sentences from financial news, categorized by sentiment into positive, negative, or neutral.",
-        reference="https://alt.qcri.org/semeval2017/task5/",
         dataset={
             "path": "FinanceMTEB/SemEva2017_Headline",
             "revision": "f0e198ba04c23d949ef803ce32ee1e4f2d8d3696",
         },
-        type="Classification",
+        description="Polar sentiment dataset of sentences from financial news, categorized by sentiment into positive, negative, or neutral.",
+        reference="https://alt.qcri.org/semeval2017/task5/",
         category="s2s",
+        modalities=["text"],
+        type="Classification",
         eval_splits=["test"],
         eval_langs=["eng-Latn"],
         main_score="accuracy",
+        date=("2016-07-01", "2017-12-31"),
+        domains=["Finance"],
+        task_subtypes=[],
+        license="cc-by-4.0",
+        annotations_creators="expert-annotated",
+        descriptive_stats={
+            "num_samples": {"test": 343},
+            "average_text_length": {"test": 59.80466472303207},
+            "unique_labels": {"test": 3},
+            "labels": {
+                "test": {
+                    "0": {"count": 122},
+                    "2": {"count": 204},
+                    "1": {"count": 17},
+                }
+            },
+        },
     )
 
     def dataset_transform(self):

diff --git a/mteb/tasks/Classification/zho/FinChinaSentimentClassification.py b/mteb/tasks/Classification/zho/FinChinaSentimentClassification.py
@@ -15,9 +15,37 @@ class FinChinaSentimentClassification(AbsTaskClassification):
         },
         type="Classification",
         category="s2s",
+        modalities=["text"],
         eval_splits=["test"],
         eval_langs=["cmn-Hans"],
         main_score="accuracy",
+        date=("2023-06-23", "2023-09-15"),
+        domains=["Finance"],
+        license="apache-2.0",
+        annotations_creators="expert-annotated",
+        dialect=[],
+        bibtex_citation="""@misc{lu2023bbtfincomprehensiveconstructionchinese,
+              title={BBT-Fin: Comprehensive Construction of Chinese Financial Domain Pre-trained Language Model, Corpus and Benchmark},
+              author={Dakuan Lu and Hengkui Wu and Jiaqing Liang and Yipei Xu and Qianyu He and Yipeng Geng and Mengkun Han and Yingsi Xin and Yanghua Xiao},
+              year={2023},
+              eprint={2302.09432},
+              archivePrefix={arXiv},
+              primaryClass={cs.CL},
+              url={https://arxiv.org/abs/2302.09432},
+        }""",
+        descriptive_stats={
+            "num_samples": {"test": 1000},
+            "average_text_length": {"test": 1202.622},
+            "unique_labels": {"test": 4},
+            "labels": {
+                "test": {
+                    "-1": {"count": 762},
+                    "-2": {"count": 118},
+                    "0": {"count": 102},
+                    "-3": {"count": 18},
+                }
+            },
+        },
     )
 
     def dataset_transform(self):

diff --git a/mteb/tasks/Classification/zho/FinFEClassification.py b/mteb/tasks/Classification/zho/FinFEClassification.py
@@ -15,9 +15,35 @@ class FinFEClassification(AbsTaskClassification):
         },
         type="Classification",
         category="s2s",
+        modalities=["text"],
         eval_splits=["test"],
         eval_langs=["cmn-Hans"],
         main_score="accuracy",
+        date=("2023-06-23", "2023-09-15"),
+        domains=["Finance"],
+        license="apache-2.0",
+        annotations_creators="expert-annotated",
+        bibtex_citation="""@misc{lu2023bbtfincomprehensiveconstructionchinese,
+              title={BBT-Fin: Comprehensive Construction of Chinese Financial Domain Pre-trained Language Model, Corpus and Benchmark},
+              author={Dakuan Lu and Hengkui Wu and Jiaqing Liang and Yipei Xu and Qianyu He and Yipeng Geng and Mengkun Han and Yingsi Xin and Yanghua Xiao},
+              year={2023},
+              eprint={2302.09432},
+              archivePrefix={arXiv},
+              primaryClass={cs.CL},
+              url={https://arxiv.org/abs/2302.09432},
+        }""",
+        descriptive_stats={
+            "num_samples": {"test": 1000},
+            "average_text_length": {"test": 20.767},
+            "unique_labels": {"test": 3},
+            "labels": {
+                "test": {
+                    "0": {"count": 287},
+                    "2": {"count": 462},
+                    "1": {"count": 251},
+                }
+            },
+        },
     )
 
     def dataset_transform(self):