|
266 | 266 | " \"top\": search_top_k,\n",
|
267 | 267 | " \"vector_queries\": [\n",
|
268 | 268 | " {\n",
|
269 |
| - " \"kind\": \"vector\",\n", |
| 269 | + " \"kind\": \"text\",\n", |
270 | 270 | " \"fields\": vector_field_name,\n",
|
271 | 271 | " \"k_nearest_neighbors\": search_top_k\n",
|
272 | 272 | " }\n",
|
|
283 | 283 | " \"semantic_configuration_name\": \"en-semantic-config\",\n",
|
284 | 284 | " \"vector_queries\": [\n",
|
285 | 285 | " {\n",
|
286 |
| - " \"kind\": \"vector\",\n", |
| 286 | + " \"kind\": \"text\",\n", |
287 | 287 | " \"fields\": vector_field_name,\n",
|
288 | 288 | " \"k_nearest_neighbors\": search_top_k\n",
|
289 | 289 | " }\n",
|
290 | 290 | " ],\n",
|
291 |
| - " \"score_field\": \"@search.reranker_score\"\n", |
| 291 | + " \"score_field\": \"@search.score\"\n", |
292 | 292 | "}"
|
293 | 293 | ]
|
294 | 294 | },
|
|
613 | 613 | " search_configuration.pop(\"vector_queries\")\n",
|
614 | 614 | " \n",
|
615 | 615 | " results = search_client.search(search_text=search_text, vector_queries=vector_queries, **search_configuration)\n",
|
616 |
| - " return [{\"document_id\": result[\"doc_id\"], \"label\": result.get(score_field, None)} for result in results]\n", |
| 616 | + " return [{\"document_id\": result[\"doc_id\"], \"relevance_score\": result.get(score_field, None)} for result in results]\n", |
617 | 617 | "\n",
|
618 | 618 | "def prepare_dataset(search_configuration):\n",
|
619 | 619 | " # Load the queryset and qrels\n",
|
|
623 | 623 | " # Drop negative qrels values and duplicates, and rename columns\n",
|
624 | 624 | " qrels = qrels.loc[qrels[\"score\"] >= 0]\n",
|
625 | 625 | " qrels.drop_duplicates(subset=[\"query-id\", \"corpus-id\"], inplace=True)\n",
|
626 |
| - " qrels.rename(columns={\"corpus-id\": \"document_id\", \"score\": \"label\"}, inplace=True)\n", |
| 626 | + " qrels.rename(columns={\"corpus-id\": \"document_id\", \"score\": \"query_relevance_label\"}, inplace=True)\n", |
627 | 627 | " \n",
|
628 | 628 | " # Group qrels by query ID and generate groundtruth set per query\n",
|
629 | 629 | " qrels_grouped = qrels.groupby(\"query-id\")\n",
|
630 |
| - " qrels_aggregated = qrels_grouped[[\"document_id\", \"label\"]].agg(lambda x: list(x))\n", |
631 |
| - " qrels_aggregated[\"groundtruth_documents_labels\"] = qrels_aggregated.apply(lambda x: json.dumps([{\"document_id\": doc_id, \"label\": label} for (doc_id, label) in zip(x[\"document_id\"], x[\"label\"])]), axis=1)\n", |
| 630 | + " qrels_aggregated = qrels_grouped[[\"document_id\", \"query_relevance_label\"]].agg(lambda x: list(x))\n", |
| 631 | + " qrels_aggregated[\"retrieval_ground_truth\"] = qrels_aggregated.apply(lambda x: json.dumps([{\"document_id\": doc_id, \"query_relevance_label\": label} for (doc_id, label) in zip(x[\"document_id\"], x[\"query_relevance_label\"])]), axis=1)\n", |
632 | 632 | " \n",
|
633 | 633 | " # Join the queryset and qrels on query ID and doc ID\n",
|
634 | 634 | " merged = queryset.merge(qrels_aggregated, left_on=\"_id\", right_on=\"query-id\")\n",
|
635 | 635 | " \n",
|
636 | 636 | " # Generate search results for each query\n",
|
637 | 637 | " search_configuration_name = search_configuration.pop(\"name\")\n",
|
638 | 638 | " score_field = search_configuration.pop(\"score_field\")\n",
|
639 |
| - " merged[\"retrieved_documents_labels\"] = merged.apply(\n", |
| 639 | + " merged[\"retrieved_documents\"] = merged.apply(\n", |
640 | 640 | " lambda x: json.dumps(search(\n",
|
641 | 641 | " query=x[\"text\"],\n",
|
642 | 642 | " search_client=search_client,\n",
|
643 | 643 | " score_field=score_field,\n",
|
644 | 644 | " **search_configuration\n",
|
645 | 645 | " )), axis=1)\n",
|
646 | 646 | " \n",
|
647 |
| - " merged_final = merged[[\"retrieved_documents_labels\", \"groundtruth_documents_labels\"]]\n", |
| 647 | + " merged_final = merged[[\"retrieved_documents\", \"retrieval_ground_truth\"]]\n", |
648 | 648 | " # Save final dataset to a local file in JSON-lines format\n",
|
649 | 649 | " jsonl_path = os.path.join(\".\", f\"evaluate-beir-{search_configuration_name}.jsonl\")\n",
|
650 | 650 | " merged_final.to_json(jsonl_path, lines=True, orient=\"records\")\n",
|
|
695 | 695 | "metadata": {},
|
696 | 696 | "source": [
|
697 | 697 | "## Run document retrieval evaluation\n",
|
698 |
| - "In the following cell, we will configure and run the document retrieval evaluator for our dataset. The init params `groundtruth_min`, `groundtruth_max` and `groundtruth_step` help us to configure the qrels scaling for some metrics which depend on a count of labels, such as Fidelity. In this case, the TREC-COVID dataset groundtruth set has 0, 1, and 2 as possible labels, so we set the values of those init params accordingly." |
| 698 | + "In the following cell, we will configure and run the document retrieval evaluator for our dataset. The init params `groundtruth_label_min` and `groundtruth_label_max` help us to configure the qrels scaling for some metrics which depend on a count of labels, such as Fidelity. In this case, the TREC-COVID dataset groundtruth set has 0, 1, and 2 as possible labels, so we set the values of those init params accordingly." |
699 | 699 | ]
|
700 | 700 | },
|
701 | 701 | {
|
|
715 | 715 | " \"documentretrievalevaluator\": EvaluatorConfiguration(\n",
|
716 | 716 | " id=DocumentRetrievalEvaluator().id,\n",
|
717 | 717 | " data_mapping={\n",
|
718 |
| - " \"groundtruth_documents_labels\": \"${data.groundtruth_documents_labels}\",\n", |
719 |
| - " \"retrieved_documents_labels\": \"${data.retrieved_documents_labels}\"\n", |
| 718 | + " \"retrieval_ground_truth\": \"${data.retrieval_ground_truth}\",\n", |
| 719 | + " \"retrieved_documents\": \"${data.retrieved_documents}\"\n", |
720 | 720 | " },\n",
|
721 | 721 | " init_params={\n",
|
722 |
| - " \"groundtruth_min\": 0,\n", |
723 |
| - " \"groundtruth_max\": 2,\n", |
724 |
| - " \"groundtruth_step\": 1\n", |
| 722 | + " \"groundtruth_label_min\": 0,\n", |
| 723 | + " \"groundtruth_label_max\": 2\n", |
725 | 724 | " }\n",
|
726 | 725 | " )\n",
|
727 | 726 | " },\n",
|
|
0 commit comments