From 63684d29f247b3ece1ec26b1f0e346c9aa85abf7 Mon Sep 17 00:00:00 2001
From: Rado Ondas <ondas.radovan@gmail.com>
Date: Thu, 3 Aug 2023 15:33:09 +0200
Subject: [PATCH 1/3] Adds collab book for Image similarity

---
 notebooks/search/04-image-similarity.ipynb | 1264 ++++++++++++++++++++
 1 file changed, 1264 insertions(+)
 create mode 100644 notebooks/search/04-image-similarity.ipynb

diff --git a/notebooks/search/04-image-similarity.ipynb b/notebooks/search/04-image-similarity.ipynb
new file mode 100644
index 00000000..b5069481
--- /dev/null
+++ b/notebooks/search/04-image-similarity.ipynb
@@ -0,0 +1,1264 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# How to implement Image search using Elasticsearch"
+      ],
+      "metadata": {
+        "id": "CepGq3Kvtdxi"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "The workbook shows how to implement an Image search using Elasticsearch. You will index documents with image embeddings (generated or pre-generated) and then using NLP model be able to search using natural language description of the image.\n",
+        "\n",
+        "### Prerequisities\n",
+        "Before you start make sure you have Elasticsearch cluster running. The cluster must have at least one machine learning (ML) node with enough (4GB) memory."
+      ],
+      "metadata": {
+        "id": "oMu1SW_TQQrU"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Install Python requirements\n",
+        "Before you start you need to install all required Python dependencies."
+      ],
+      "metadata": {
+        "id": "VFcdr8IDQE_H"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 16,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "6WosfR55npKU",
+        "outputId": "033767ff-0eef-48cc-c9e7-efbf73c9cb67"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Requirement already satisfied: sentence-transformers in /usr/local/lib/python3.10/dist-packages (2.2.2)\n",
+            "Requirement already satisfied: eland in /usr/local/lib/python3.10/dist-packages (8.7.0)\n",
+            "Requirement already satisfied: elasticsearch in /usr/local/lib/python3.10/dist-packages (8.9.0)\n",
+            "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.31.0)\n",
+            "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (2.0.1+cu118)\n",
+            "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (4.65.0)\n",
+            "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (9.4.0)\n",
+            "Requirement already satisfied: streamlit in /usr/local/lib/python3.10/dist-packages (1.25.0)\n",
+            "Requirement already satisfied: torchvision in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (0.15.2+cu118)\n",
+            "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (1.22.4)\n",
+            "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (1.2.2)\n",
+            "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (1.10.1)\n",
+            "Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (3.8.1)\n",
+            "Requirement already satisfied: sentencepiece in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (0.1.99)\n",
+            "Requirement already satisfied: huggingface-hub>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (0.16.4)\n",
+            "Requirement already satisfied: pandas>=1.5 in /usr/local/lib/python3.10/dist-packages (from eland) (1.5.3)\n",
+            "Requirement already satisfied: matplotlib>=3.6 in /usr/local/lib/python3.10/dist-packages (from eland) (3.7.1)\n",
+            "Requirement already satisfied: elastic-transport<9,>=8 in /usr/local/lib/python3.10/dist-packages (from elasticsearch) (8.4.0)\n",
+            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.12.2)\n",
+            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (23.1)\n",
+            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n",
+            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2022.10.31)\n",
+            "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.27.1)\n",
+            "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.13.3)\n",
+            "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.3.1)\n",
+            "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch) (4.7.1)\n",
+            "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch) (1.11.1)\n",
+            "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch) (3.1)\n",
+            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch) (3.1.2)\n",
+            "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch) (2.0.0)\n",
+            "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch) (3.25.2)\n",
+            "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch) (16.0.6)\n",
+            "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.10/dist-packages (from streamlit) (4.2.2)\n",
+            "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/lib/python3/dist-packages (from streamlit) (1.4)\n",
+            "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.10/dist-packages (from streamlit) (5.3.1)\n",
+            "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.10/dist-packages (from streamlit) (8.1.6)\n",
+            "Requirement already satisfied: importlib-metadata<7,>=1.4 in /usr/lib/python3/dist-packages (from streamlit) (4.6.4)\n",
+            "Requirement already satisfied: protobuf<5,>=3.20 in /usr/local/lib/python3.10/dist-packages (from streamlit) (3.20.3)\n",
+            "Requirement already satisfied: pyarrow>=6.0 in /usr/local/lib/python3.10/dist-packages (from streamlit) (9.0.0)\n",
+            "Requirement already satisfied: pympler<2,>=0.9 in /usr/local/lib/python3.10/dist-packages (from streamlit) (1.0.1)\n",
+            "Requirement already satisfied: python-dateutil<3,>=2.7.3 in /usr/local/lib/python3.10/dist-packages (from streamlit) (2.8.2)\n",
+            "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.10/dist-packages (from streamlit) (13.4.2)\n",
+            "Requirement already satisfied: tenacity<9,>=8.1.0 in /usr/local/lib/python3.10/dist-packages (from streamlit) (8.2.2)\n",
+            "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.10/dist-packages (from streamlit) (0.10.2)\n",
+            "Requirement already satisfied: tzlocal<5,>=1.1 in /usr/local/lib/python3.10/dist-packages (from streamlit) (4.3.1)\n",
+            "Requirement already satisfied: validators<1,>=0.2 in /usr/local/lib/python3.10/dist-packages (from streamlit) (0.20.0)\n",
+            "Requirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.10/dist-packages (from streamlit) (3.1.32)\n",
+            "Requirement already satisfied: pydeck<1,>=0.8 in /usr/local/lib/python3.10/dist-packages (from streamlit) (0.8.0)\n",
+            "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.10/dist-packages (from streamlit) (6.3.1)\n",
+            "Requirement already satisfied: watchdog>=2.1.5 in /usr/local/lib/python3.10/dist-packages (from streamlit) (3.0.0)\n",
+            "Requirement already satisfied: entrypoints in /usr/local/lib/python3.10/dist-packages (from altair<6,>=4.0->streamlit) (0.4)\n",
+            "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.10/dist-packages (from altair<6,>=4.0->streamlit) (4.3.3)\n",
+            "Requirement already satisfied: toolz in /usr/local/lib/python3.10/dist-packages (from altair<6,>=4.0->streamlit) (0.12.0)\n",
+            "Requirement already satisfied: urllib3<2,>=1.26.2 in /usr/local/lib/python3.10/dist-packages (from elastic-transport<9,>=8->elasticsearch) (1.26.16)\n",
+            "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from elastic-transport<9,>=8->elasticsearch) (2023.7.22)\n",
+            "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.10/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit) (4.0.10)\n",
+            "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence-transformers) (2023.6.0)\n",
+            "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.6->eland) (1.1.0)\n",
+            "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.6->eland) (0.11.0)\n",
+            "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.6->eland) (4.41.1)\n",
+            "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.6->eland) (1.4.4)\n",
+            "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.6->eland) (3.1.0)\n",
+            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.5->eland) (2022.7.1)\n",
+            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch) (2.1.3)\n",
+            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil<3,>=2.7.3->streamlit) (1.16.0)\n",
+            "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.12)\n",
+            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.4)\n",
+            "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich<14,>=10.14.0->streamlit) (3.0.0)\n",
+            "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich<14,>=10.14.0->streamlit) (2.14.0)\n",
+            "Requirement already satisfied: pytz-deprecation-shim in /usr/local/lib/python3.10/dist-packages (from tzlocal<5,>=1.1->streamlit) (0.1.0.post0)\n",
+            "Requirement already satisfied: decorator>=3.4.0 in /usr/local/lib/python3.10/dist-packages (from validators<1,>=0.2->streamlit) (4.4.2)\n",
+            "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk->sentence-transformers) (1.3.1)\n",
+            "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->sentence-transformers) (3.2.0)\n",
+            "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch) (1.3.0)\n",
+            "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.10/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit) (5.0.0)\n",
+            "Requirement already satisfied: attrs>=17.4.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit) (23.1.0)\n",
+            "Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit) (0.19.3)\n",
+            "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit) (0.1.2)\n",
+            "Requirement already satisfied: tzdata in /usr/local/lib/python3.10/dist-packages (from pytz-deprecation-shim->tzlocal<5,>=1.1->streamlit) (2023.3)\n"
+          ]
+        }
+      ],
+      "source": [
+        "!pip install sentence-transformers eland elasticsearch transformers torch tqdm Pillow streamlit"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Upload NLP model for querying\n",
+        "Using an `eland` library you will import NLP CLIP model. The model will tran\n",
+        "sfer your search query into vector which will be used for the search over the set of images stored in Elasticsearch.\n",
+        "\n",
+        "The model used is [clip-ViT-B-32-multilingual-v1](https://huggingface.co/sentence-transformers/clip-ViT-B-32-multilingual-v1) because the image embeddings are also generated by the CLIP model.\n",
+        "\n",
+        "How to get cloud id? Go to [ESS cloud](https://cloud.elastic.co/logout?redirectTo=%2Fhome&reason=unauthorised) and `On the deployment overview page, copy down the Cloud ID.`\n",
+        "\n",
+        "The authentication is using api key (`--es-api-key`). Learn how to generate [API key](https://www.elastic.co/guide/en/kibana/current/api-keys.html#create-api-key).\n",
+        "```\n",
+        "$ eland_import_hub_model --cloud-id $CLOUD_ID  \\\n",
+        "  --hub-model-id sentence-transformers/clip-ViT-B-32-multilingual-v1 \\\n",
+        "  --task-type text_embedding --es-api-key $API_KEY --start\n",
+        "```"
+      ],
+      "metadata": {
+        "id": "eIV5lAnVt9L7"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "API_KEY=''\n",
+        "CLOUD_ID=''\n",
+        "!eland_import_hub_model --cloud-id $CLOUD_ID --hub-model-id sentence-transformers/clip-ViT-B-32-multilingual-v1 --task-type text_embedding --es-api-key API_KEY --start"
+      ],
+      "metadata": {
+        "id": "tVhL9jBnuAAQ"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Import Python libraries"
+      ],
+      "metadata": {
+        "id": "hVxWnFflUCZv"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from elasticsearch import Elasticsearch\n",
+        "from elasticsearch.helpers import parallel_bulk\n",
+        "import requests\n",
+        "import os\n",
+        "import sys\n",
+        "# import shutil\n",
+        "import zipfile\n",
+        "from tqdm.auto import tqdm\n",
+        "import pandas as pd\n",
+        "from PIL import Image\n",
+        "from sentence_transformers import SentenceTransformer\n",
+        "import urllib.request\n",
+        "# import urllib.error\n",
+        "import json\n",
+        "from getpass import getpass"
+      ],
+      "metadata": {
+        "id": "I0pRCbYMuMVn"
+      },
+      "execution_count": 17,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Connect to Elasticsearch cluster\n",
+        "Use your own cluster details `ELASTIC_CLOUD_ID`, `API_KEY`."
+      ],
+      "metadata": {
+        "id": "Klv3rywdUJBN"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# ESS Cloud connection definition using an API_KEY\n",
+        "ELASTIC_CLOUD_ID = getpass(\"Elastic Cloud ID: \")\n",
+        "API_KEY = getpass(\"Elastic API key: \")\n",
+        "\n",
+        "# ELASTIC_CLOUD_USER = \"elastic\"\n",
+        "# CLOUD_PASSWORD = getpass(\"Elastic Password\")\n",
+        "\n",
+        "es = Elasticsearch(\n",
+        "  cloud_id=ELASTIC_CLOUD_ID,\n",
+        "  #basic_auth=(ELASTIC_CLOUD_USER, ELASTIC_CLOUD_PASSWORD),\n",
+        "  api_key=API_KEY,\n",
+        "  request_timeout=600\n",
+        ")\n",
+        "\n",
+        "es.info() # should return cluster info"
+      ],
+      "metadata": {
+        "id": "YwN8RmFY3FQI",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "d0d0e31e-2ad2-46fe-ef8c-8c8bce7e1c48"
+      },
+      "execution_count": 19,
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Elastic Cloud ID: ··········\n",
+            "Elastic API key: ··········\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "ObjectApiResponse({'name': 'instance-0000000000', 'cluster_name': 'a597bbe1e0d047c494e7d4015f67ef37', 'cluster_uuid': 'EnT0vwwSSZeAahPw3Vhsuw', 'version': {'number': '8.8.2', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '98e1271edf932a480e4262a471281f1ee295ce6b', 'build_date': '2023-06-26T05:16:16.196344851Z', 'build_snapshot': False, 'lucene_version': '9.6.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'})"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 19
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Create Index and mappings for Images\n",
+        "Befor you can index documents into Elasticsearch, you need to create an Index with correct mappings."
+      ],
+      "metadata": {
+        "id": "IW-GIlH2OxB4"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Destination Index name\n",
+        "INDEX_NAME=\"images\"\n",
+        "# If you want to delete previous version of the Index\n",
+        "DELETE_INDEX=False\n",
+        "\n",
+        "INDEX_MAPPING = {\n",
+        "    \"properties\": {\n",
+        "      \"image_embedding\": {\n",
+        "        \"type\": \"dense_vector\",\n",
+        "        \"dims\": 512,\n",
+        "        \"index\": True,\n",
+        "        \"similarity\": \"cosine\"\n",
+        "      },\n",
+        "      \"photo_id\": {\n",
+        "        \"type\": \"keyword\"\n",
+        "      },\n",
+        "      \"photo_image_url\": {\n",
+        "        \"type\": \"keyword\"\n",
+        "      },\n",
+        "      \"ai_description\": {\n",
+        "        \"type\": \"text\"\n",
+        "      },\n",
+        "      \"photo_description\": {\n",
+        "        \"type\": \"text\"\n",
+        "      },\n",
+        "      \"photo_url\": {\n",
+        "        \"type\": \"keyword\"\n",
+        "      },\n",
+        "      \"photographer_first_name\": {\n",
+        "        \"type\": \"keyword\"\n",
+        "      },\n",
+        "      \"photographer_last_name\": {\n",
+        "        \"type\": \"keyword\"\n",
+        "      },\n",
+        "      \"photographer_username\": {\n",
+        "        \"type\": \"keyword\"\n",
+        "      },\n",
+        "      \"exif_camera_make\": {\n",
+        "        \"type\": \"keyword\"\n",
+        "      },\n",
+        "      \"exif_camera_model\": {\n",
+        "        \"type\": \"keyword\"\n",
+        "      },\n",
+        "      \"exif_iso\": {\n",
+        "        \"type\": \"integer\"\n",
+        "      }\n",
+        "    }\n",
+        "  }\n",
+        "\n",
+        "# Index settings\n",
+        "INDEX_SETTINGS = {\n",
+        "    \"index\": {\n",
+        "      \"number_of_replicas\": \"1\",\n",
+        "      \"number_of_shards\": \"1\",\n",
+        "      \"refresh_interval\": \"5s\"\n",
+        "    }\n",
+        "}\n",
+        "\n",
+        "if(DELETE_INDEX):\n",
+        "  if es.indices.exists(index=INDEX_NAME):\n",
+        "    print(\"Deleting existing %s\" % INDEX_NAME)\n",
+        "    es.indices.delete(index=INDEX_NAME, ignore=[400, 404])\n",
+        "\n",
+        "if not es.indices.exists(index=INDEX_NAME):\n",
+        "  print(\"Creating index %s\" % INDEX_NAME)\n",
+        "  es.indices.create(index=INDEX_NAME, mappings=INDEX_MAPPING, settings=INDEX_SETTINGS,\n",
+        "                  ignore=[400, 404])\n"
+      ],
+      "metadata": {
+        "id": "xAkc1OVcOxy3"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Get image dataset and embeddings\n",
+        "Download:\n",
+        "- The example image dataset is from [Unsplash](https://github.com/unsplash/datasets)\n",
+        "- The [Image embeddings](https://github.com/radoondas/flask-elastic-nlp/blob/main/embeddings/blogs/blogs-no-embeddings.json.zip) are pre-generated using CLIP model\n",
+        "\n",
+        "Then unzip both files."
+      ],
+      "metadata": {
+        "id": "NKE-j0kPUMn_"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!wget https://unsplash.com/data/lite/1.2.0 -O data/unsplash-research-dataset-lite-1.2.0.zip\n",
+        "!wget https://raw.githubusercontent.com/radoondas/flask-elastic-nlp/main/embeddings/images/image-embeddings.json.zip -P data"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "zFGaPDRR5mqT",
+        "outputId": "0114cdd6-a714-41ab-9b46-3013bd36698a"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "--2023-07-25 14:28:32--  https://unsplash.com/data/lite/1.2.0\n",
+            "Resolving unsplash.com (unsplash.com)... 151.101.65.181, 151.101.1.181, 151.101.129.181, ...\n",
+            "Connecting to unsplash.com (unsplash.com)|151.101.65.181|:443... connected.\n",
+            "HTTP request sent, awaiting response... 302 Found\n",
+            "Location: https://unsplash-datasets.s3.amazonaws.com/lite/1.2.0/unsplash-research-dataset-lite-1.2.0.zip [following]\n",
+            "--2023-07-25 14:28:32--  https://unsplash-datasets.s3.amazonaws.com/lite/1.2.0/unsplash-research-dataset-lite-1.2.0.zip\n",
+            "Resolving unsplash-datasets.s3.amazonaws.com (unsplash-datasets.s3.amazonaws.com)... 52.217.102.84, 3.5.25.253, 52.217.96.188, ...\n",
+            "Connecting to unsplash-datasets.s3.amazonaws.com (unsplash-datasets.s3.amazonaws.com)|52.217.102.84|:443... connected.\n",
+            "HTTP request sent, awaiting response... 200 OK\n",
+            "Length: 632351052 (603M) [application/zip]\n",
+            "Saving to: ‘data/unsplash-research-dataset-lite-1.2.0.zip’\n",
+            "\n",
+            "data/unsplash-resea 100%[===================>] 603.06M  14.1MB/s    in 42s     \n",
+            "\n",
+            "2023-07-25 14:29:16 (14.2 MB/s) - ‘data/unsplash-research-dataset-lite-1.2.0.zip’ saved [632351052/632351052]\n",
+            "\n",
+            "--2023-07-25 14:29:16--  https://raw.githubusercontent.com/radoondas/flask-elastic-nlp/main/embeddings/images/image-embeddings.json.zip\n",
+            "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n",
+            "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n",
+            "HTTP request sent, awaiting response... 200 OK\n",
+            "Length: 94256217 (90M) [application/zip]\n",
+            "Saving to: ‘data/image-embeddings.json.zip.1’\n",
+            "\n",
+            "image-embeddings.js 100%[===================>]  89.89M   164MB/s    in 0.5s    \n",
+            "\n",
+            "2023-07-25 14:29:16 (164 MB/s) - ‘data/image-embeddings.json.zip.1’ saved [94256217/94256217]\n",
+            "\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Unzip downloaded files\n",
+        "UNSPLASH_ZIP_FILE=\"data/unsplash-research-dataset-lite-1.2.0.zip\"\n",
+        "EMBEDDINGS_ZIP_FILE=\"data/image-embeddings.json.zip\"\n",
+        "\n",
+        "with zipfile.ZipFile(UNSPLASH_ZIP_FILE, 'r') as zip_ref:\n",
+        "  print('Extracting file ', UNSPLASH_ZIP_FILE, '.')\n",
+        "  zip_ref.extractall('data/unsplash/')\n",
+        "\n",
+        "with zipfile.ZipFile(EMBEDDINGS_ZIP_FILE, 'r') as zip_ref:\n",
+        "  print('Extracting file ', EMBEDDINGS_ZIP_FILE, '.')\n",
+        "  zip_ref.extractall(\"data/embeddings/\")"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "MBh4AQ8i7C0-",
+        "outputId": "17a50b7f-f052-4b72-daa8-0e8fc630326f"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Extracting file  data/unsplash-research-dataset-lite-1.2.0.zip .\n",
+            "Extracting file  data/image-embeddings.json.zip .\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Image embeddings\n",
+        "We have now 2 options  how to proceed.\n",
+        "1. Import all pregenerated image embeddings (~19k). This is faster option with a lot of images available in a short time.\n",
+        "2. Import a small subset of randomly choosen images to see the process of generating of image embeddings using external Clip model."
+      ],
+      "metadata": {
+        "id": "p6H7QYctQQA7"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# define helper function\n",
+        "def gen_rows(df):\n",
+        "  for doc in df.to_dict(orient='records'):\n",
+        "    yield doc"
+      ],
+      "metadata": {
+        "id": "03YvC-_JY9OE"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## 1. Import all pregenerated image embeddings\n",
+        "This option lets you inport ~19k documents woth pregenenerated image embeddings with metadata.\n",
+        "\n",
+        "The process downloads files with images information, merge them and index into Elasticsearch."
+      ],
+      "metadata": {
+        "id": "qhZRdUyAQd-s"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "df_unsplash = pd.read_csv('data/unsplash/' + 'photos.tsv000', sep='\\t', header=0)\n",
+        "\n",
+        "# follwing 8 lines are fix for inconsistent/incorrect data\n",
+        "df_unsplash['photo_description'].fillna('', inplace=True)\n",
+        "df_unsplash['ai_description'].fillna('', inplace=True)\n",
+        "df_unsplash['photographer_first_name'].fillna('', inplace=True)\n",
+        "df_unsplash['photographer_last_name'].fillna('', inplace=True)\n",
+        "df_unsplash['photographer_username'].fillna('', inplace=True)\n",
+        "df_unsplash['exif_camera_make'].fillna('', inplace=True)\n",
+        "df_unsplash['exif_camera_model'].fillna('', inplace=True)\n",
+        "df_unsplash['exif_iso'].fillna(0, inplace=True)\n",
+        "## end of fix\n",
+        "\n",
+        "# read subset of columns from the original/downloaded dataset\n",
+        "df_unsplash_subset = df_unsplash[\n",
+        "        ['photo_id', 'photo_url', 'photo_image_url', 'photo_description', 'ai_description', 'photographer_first_name',\n",
+        "         'photographer_last_name', 'photographer_username', 'exif_camera_make', 'exif_camera_model', 'exif_iso']]\n",
+        "\n",
+        "# read all pregenerated embeddings\n",
+        "df_embeddings = pd.read_json('data/embeddings/' + 'image-embeddings.json', lines=True)\n",
+        "\n",
+        "df_merged = pd.merge(df_unsplash_subset, df_embeddings,\n",
+        "                         on='photo_id',\n",
+        "                         how='inner')\n",
+        "\n",
+        "count = 0\n",
+        "for success, info in parallel_bulk(\n",
+        "        client=es,\n",
+        "        actions=gen_rows(df_merged),\n",
+        "        thread_count=5,\n",
+        "        chunk_size=1000,\n",
+        "        index=INDEX_NAME\n",
+        "):\n",
+        "  if success:\n",
+        "    count += 1\n",
+        "    if count % 1000 == 0:\n",
+        "      print('Indexed %s documents' % str(count), flush=True)\n",
+        "      sys.stdout.flush()\n",
+        "  else:\n",
+        "    print('Doc failed', info)\n",
+        "\n",
+        "print('Indexed %s image embeddings documents' % str(count), flush=True)\n",
+        "sys.stdout.flush()"
+      ],
+      "metadata": {
+        "id": "32xrbSUXTODQ"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## 2. Import small number of random\n",
+        "In this part you will randomly choose small number of images and then generate image embeddings. The script will then index documents into Elasticserach."
+      ],
+      "metadata": {
+        "id": "xypgh4lFQmkc"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "NUMBER_OF_IMAGES=20\n",
+        "INDEX_NAME=\"images-test\"\n",
+        "\n",
+        "df_unsplash = pd.read_csv('data/unsplash/' + 'photos.tsv000', sep='\\t', header=0)\n",
+        "\n",
+        "## stat fix\n",
+        "# follwing 8 lines are fix for inconsistent/incorrect data\n",
+        "df_unsplash['photo_description'].fillna('', inplace=True)\n",
+        "df_unsplash['ai_description'].fillna('', inplace=True)\n",
+        "df_unsplash['photographer_first_name'].fillna('', inplace=True)\n",
+        "df_unsplash['photographer_last_name'].fillna('', inplace=True)\n",
+        "df_unsplash['photographer_username'].fillna('', inplace=True)\n",
+        "df_unsplash['exif_camera_make'].fillna('', inplace=True)\n",
+        "df_unsplash['exif_camera_model'].fillna('', inplace=True)\n",
+        "df_unsplash['exif_iso'].fillna(0, inplace=True)\n",
+        "## end of fix\n",
+        "\n",
+        "df_unsplash_subset = df_unsplash[\n",
+        "        ['photo_id', 'photo_url', 'photo_image_url', 'photo_description', 'ai_description', 'photographer_first_name',\n",
+        "         'photographer_last_name', 'photographer_username', 'exif_camera_make', 'exif_camera_model', 'exif_iso']]\n",
+        "\n",
+        "df_random_subset = df_unsplash_subset.sample(n=NUMBER_OF_IMAGES, replace=False)\n",
+        "df_random_subset = df_random_subset.reset_index()\n",
+        "\n",
+        "# Load model CLIP\n",
+        "img_model = SentenceTransformer('clip-ViT-B-32')\n",
+        "\n",
+        "# new list of image documents for indexing into ES\n",
+        "lst = []\n",
+        "if not os.path.exists(\"data/images\"):\n",
+        "  os.mkdir(\"data/images\")\n",
+        "\n",
+        "for index, row in df_random_subset.iterrows():\n",
+        "  #open image from url\n",
+        "  img_path = \"data/images/\" + row['photo_id']\n",
+        "  try:\n",
+        "    urllib.request.urlretrieve(row['photo_image_url'], img_path)\n",
+        "    print(row['photo_id'] + \" \" + row['photo_url'])\n",
+        "  except urllib.error.HTTPError as err:\n",
+        "        if err.code == 404:\n",
+        "            print('404 error: Image not found at {}'.format(row['photo_image_url']))\n",
+        "        else:\n",
+        "            raise\n",
+        "\n",
+        "  img = Image.open(img_path)\n",
+        "  # create doc\n",
+        "  doc = {}\n",
+        "  embedding = img_model.encode(img)\n",
+        "  doc['photo_id'] = row['photo_id']\n",
+        "  doc['image_embedding'] = embedding.tolist()\n",
+        "  lst.append(doc)\n",
+        "  # print(doc)\n",
+        "\n",
+        "  # Image cleanup.\n",
+        "  # If file exists, delete it.\n",
+        "  if os.path.exists(img_path):\n",
+        "    os.remove(img_path)\n",
+        "\n",
+        "# read all pregenerated embeddings\n",
+        "df_embeddings = pd.read_json('data/embeddings/' + 'image-embeddings.json', lines=True)\n",
+        "\n",
+        "df_merged = pd.merge(df_random_subset, pd.DataFrame(lst),\n",
+        "                         on='photo_id',\n",
+        "                         how='inner')\n",
+        "# print(df_merged)\n",
+        "\n",
+        "count = 0\n",
+        "for success, info in parallel_bulk(\n",
+        "        client=es,\n",
+        "        actions=gen_rows(df_merged),\n",
+        "        thread_count=5,\n",
+        "        chunk_size=10,\n",
+        "        index=INDEX_NAME\n",
+        "):\n",
+        "  if success:\n",
+        "    count += 1\n",
+        "    if count % 10 == 0:\n",
+        "      print('Indexed %s documents' % str(count), flush=True)\n",
+        "      sys.stdout.flush()\n",
+        "  else:\n",
+        "    print('Doc failed', info)\n",
+        "\n",
+        "print('Indexed %s image embeddings documents' % str(count), flush=True)\n",
+        "sys.stdout.flush()"
+      ],
+      "metadata": {
+        "id": "r_txQjP2RKnr"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Query the image dataset\n",
+        "The next step is to run a query to search for images. The example query searches for `\"model_text\": \"Valentine day flowers\"` using the model we uploaded to Elasticsearch `sentence-transformers__clip-vit-b-32-multilingual-v1`.\n",
+        "\n",
+        "The process is one query even it internaly consists of two tasks. One is to tramsform your search text into a vector using the NLP model and the second task is to run the vector search over the image dataset.\n",
+        "```\n",
+        "POST images/_search\n",
+        "{\n",
+        "  \"knn\": {\n",
+        "    \"field\": \"image_embedding\",\n",
+        "    \"k\": 5,\n",
+        "    \"num_candidates\": 10,\n",
+        "    \"query_vector_builder\": {\n",
+        "      \"text_embedding\": {\n",
+        "        \"model_id\": \"sentence-transformers__clip-vit-b-32-multilingual-v1\",\n",
+        "        \"model_text\": \"Valentine day flowers\"\n",
+        "      }\n",
+        "    }\n",
+        "  },\n",
+        "  \"fields\": [\n",
+        "    \"photo_description\",\n",
+        "    \"ai_description\",\n",
+        "    \"photo_url\"\n",
+        "  ],\n",
+        "  \"_source\": false\n",
+        "}\n",
+        "```\n",
+        "\n"
+      ],
+      "metadata": {
+        "id": "-_i2CIpSz9vw"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Search queary\n",
+        "WHAT_ARE_YOU_LOOKING_FOR=\"Valentine day flowers\"\n",
+        "INDEX_IM_EMBED=\"images\"\n",
+        "\n",
+        "source_fields = [\"photo_description\", \"ai_description\", \"photo_url\", \"photo_image_url\", \"photographer_first_name\",\n",
+        "                     \"photographer_username\", \"photographer_last_name\", \"photo_id\"]\n",
+        "query = {\n",
+        "  \"field\": \"image_embedding\",\n",
+        "  \"k\": 5,\n",
+        "  \"num_candidates\": 100,\n",
+        "  \"query_vector_builder\": {\n",
+        "    \"text_embedding\": {\n",
+        "      \"model_id\": \"sentence-transformers__clip-vit-b-32-multilingual-v1\",\n",
+        "      \"model_text\": WHAT_ARE_YOU_LOOKING_FOR\n",
+        "    }\n",
+        "  }\n",
+        "}\n",
+        "\n",
+        "response = es.search(\n",
+        "    index=INDEX_IM_EMBED,\n",
+        "    fields=source_fields,\n",
+        "    knn=query, source=False)\n",
+        "\n",
+        "print(response.body)\n",
+        "\n",
+        "# the code writes the response into a file for the streamlit UI used in the optional step.\n",
+        "with open('json_data.json', 'w') as outfile:\n",
+        "  json.dump(response.body['hits']['hits'], outfile)\n",
+        "\n",
+        "# Use the `loads()` method to load the JSON data\n",
+        "dfr = json.loads(json.dumps(response.body['hits']['hits']))\n",
+        "# Pass the generated JSON data into a pandas dataframe\n",
+        "dfr = pd.DataFrame(dfr)\n",
+        "# Print the data frame\n",
+        "dfr\n",
+        "\n",
+        "results = pd.json_normalize(json.loads(json.dumps(response.body['hits']['hits'])))\n",
+        "# results\n",
+        "results[['_id', '_score', 'fields.photo_id', 'fields.photo_image_url',\n",
+        "         'fields.photo_description', 'fields.photographer_first_name',\n",
+        "         'fields.photographer_last_name', 'fields.ai_description',\n",
+        "         'fields.photo_url']]"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 375
+        },
+        "id": "wdicpvRlzmXG",
+        "outputId": "00550041-0aed-4f51-ccd3-18eb705ff7ed"
+      },
+      "execution_count": 35,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "{'took': 114, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 5, 'relation': 'eq'}, 'max_score': 0.646751, 'hits': [{'_index': 'images', '_id': 'nK5Fh4kBLg4Kd5ySLbKC', '_score': 0.646751, 'fields': {'photo_image_url': ['https://images.unsplash.com/photo-1587404787163-d03a28fcc1f0'], 'photo_id': ['gQFZxLe3m4g'], 'photographer_first_name': ['Vadim'], 'photo_description': ['instagram.com/vadimsadovski'], 'photographer_last_name': ['Sadovski'], 'photo_url': ['https://unsplash.com/photos/gQFZxLe3m4g'], 'photographer_username': ['vadimsadovski'], 'ai_description': ['']}}, {'_index': 'images', '_id': 'Xa5Eh4kBLg4Kd5yS84Qf', '_score': 0.64675057, 'fields': {'photo_image_url': ['https://images.unsplash.com/photo-1574259148543-dd376a61339f'], 'photo_id': ['g5Mhx29yp-A'], 'photographer_first_name': ['Erin'], 'photo_description': ['Cute but grumpy cat in the Austrian mountains'], 'photographer_last_name': ['East'], 'photo_url': ['https://unsplash.com/photos/g5Mhx29yp-A'], 'photographer_username': ['mserineast'], 'ai_description': ['brown Persian cat on white bench']}}, {'_index': 'images', '_id': '265Eh4kBLg4Kd5yS84Uf', '_score': 0.64244866, 'fields': {'photo_image_url': ['https://images.unsplash.com/photo-1565772101068-dec21f7b36eb'], 'photo_id': ['9KZ0PGNCxNE'], 'photographer_first_name': ['Perchek'], 'photo_description': [''], 'photographer_last_name': ['Industrie'], 'photo_url': ['https://unsplash.com/photos/9KZ0PGNCxNE'], 'photographer_username': ['perchek_industrie'], 'ai_description': ['siamese cat']}}, {'_index': 'images', '_id': 'xq5Fh4kBLg4Kd5ySEpuC', '_score': 0.64216036, 'fields': {'photo_image_url': ['https://images.unsplash.com/photo-1589994205353-325f40210d02'], 'photo_id': ['VOZQkkKXvY4'], 'photographer_first_name': ['Andrey'], 'photo_description': [''], 'photographer_last_name': ['Svistunov'], 'photo_url': ['https://unsplash.com/photos/VOZQkkKXvY4'], 'photographer_username': ['svistal13'], 'ai_description': ['orange tabby cat on ground covered with snow during daytime']}}, {'_index': 'images', '_id': 'WK5Eh4kBLg4Kd5yS5XcD', '_score': 0.64185303, 'fields': {'photo_image_url': ['https://images.unsplash.com/photo-1556820161-b605d166fce1'], 'photo_id': ['wmz8y6L6c_k'], 'photographer_first_name': ['Phillip'], 'photo_description': [''], 'photographer_last_name': ['Suitcases'], 'photo_url': ['https://unsplash.com/photos/wmz8y6L6c_k'], 'photographer_username': ['nillait'], 'ai_description': ['brown and black kitten close-up photography']}}]}}\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "                    _id    _score fields.photo_id  \\\n",
+              "0  nK5Fh4kBLg4Kd5ySLbKC  0.646751   [gQFZxLe3m4g]   \n",
+              "1  Xa5Eh4kBLg4Kd5yS84Qf  0.646751   [g5Mhx29yp-A]   \n",
+              "2  265Eh4kBLg4Kd5yS84Uf  0.642449   [9KZ0PGNCxNE]   \n",
+              "3  xq5Fh4kBLg4Kd5ySEpuC  0.642160   [VOZQkkKXvY4]   \n",
+              "4  WK5Eh4kBLg4Kd5yS5XcD  0.641853   [wmz8y6L6c_k]   \n",
+              "\n",
+              "                              fields.photo_image_url  \\\n",
+              "0  [https://images.unsplash.com/photo-15874047871...   \n",
+              "1  [https://images.unsplash.com/photo-15742591485...   \n",
+              "2  [https://images.unsplash.com/photo-15657721010...   \n",
+              "3  [https://images.unsplash.com/photo-15899942053...   \n",
+              "4  [https://images.unsplash.com/photo-1556820161-...   \n",
+              "\n",
+              "                          fields.photo_description  \\\n",
+              "0                    [instagram.com/vadimsadovski]   \n",
+              "1  [Cute but grumpy cat in the Austrian mountains]   \n",
+              "2                                               []   \n",
+              "3                                               []   \n",
+              "4                                               []   \n",
+              "\n",
+              "  fields.photographer_first_name fields.photographer_last_name  \\\n",
+              "0                        [Vadim]                    [Sadovski]   \n",
+              "1                         [Erin]                        [East]   \n",
+              "2                      [Perchek]                   [Industrie]   \n",
+              "3                       [Andrey]                   [Svistunov]   \n",
+              "4                      [Phillip]                   [Suitcases]   \n",
+              "\n",
+              "                               fields.ai_description  \\\n",
+              "0                                                 []   \n",
+              "1                 [brown Persian cat on white bench]   \n",
+              "2                                      [siamese cat]   \n",
+              "3  [orange tabby cat on ground covered with snow ...   \n",
+              "4      [brown and black kitten close-up photography]   \n",
+              "\n",
+              "                            fields.photo_url  \n",
+              "0  [https://unsplash.com/photos/gQFZxLe3m4g]  \n",
+              "1  [https://unsplash.com/photos/g5Mhx29yp-A]  \n",
+              "2  [https://unsplash.com/photos/9KZ0PGNCxNE]  \n",
+              "3  [https://unsplash.com/photos/VOZQkkKXvY4]  \n",
+              "4  [https://unsplash.com/photos/wmz8y6L6c_k]  "
+            ],
+            "text/html": [
+              "\n",
+              "\n",
+              "  <div id=\"df-b6abcb85-4cf5-4089-9ed7-a06f75186b38\">\n",
+              "    <div class=\"colab-df-container\">\n",
+              "      <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>_id</th>\n",
+              "      <th>_score</th>\n",
+              "      <th>fields.photo_id</th>\n",
+              "      <th>fields.photo_image_url</th>\n",
+              "      <th>fields.photo_description</th>\n",
+              "      <th>fields.photographer_first_name</th>\n",
+              "      <th>fields.photographer_last_name</th>\n",
+              "      <th>fields.ai_description</th>\n",
+              "      <th>fields.photo_url</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>nK5Fh4kBLg4Kd5ySLbKC</td>\n",
+              "      <td>0.646751</td>\n",
+              "      <td>[gQFZxLe3m4g]</td>\n",
+              "      <td>[https://images.unsplash.com/photo-15874047871...</td>\n",
+              "      <td>[instagram.com/vadimsadovski]</td>\n",
+              "      <td>[Vadim]</td>\n",
+              "      <td>[Sadovski]</td>\n",
+              "      <td>[]</td>\n",
+              "      <td>[https://unsplash.com/photos/gQFZxLe3m4g]</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>Xa5Eh4kBLg4Kd5yS84Qf</td>\n",
+              "      <td>0.646751</td>\n",
+              "      <td>[g5Mhx29yp-A]</td>\n",
+              "      <td>[https://images.unsplash.com/photo-15742591485...</td>\n",
+              "      <td>[Cute but grumpy cat in the Austrian mountains]</td>\n",
+              "      <td>[Erin]</td>\n",
+              "      <td>[East]</td>\n",
+              "      <td>[brown Persian cat on white bench]</td>\n",
+              "      <td>[https://unsplash.com/photos/g5Mhx29yp-A]</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>265Eh4kBLg4Kd5yS84Uf</td>\n",
+              "      <td>0.642449</td>\n",
+              "      <td>[9KZ0PGNCxNE]</td>\n",
+              "      <td>[https://images.unsplash.com/photo-15657721010...</td>\n",
+              "      <td>[]</td>\n",
+              "      <td>[Perchek]</td>\n",
+              "      <td>[Industrie]</td>\n",
+              "      <td>[siamese cat]</td>\n",
+              "      <td>[https://unsplash.com/photos/9KZ0PGNCxNE]</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>xq5Fh4kBLg4Kd5ySEpuC</td>\n",
+              "      <td>0.642160</td>\n",
+              "      <td>[VOZQkkKXvY4]</td>\n",
+              "      <td>[https://images.unsplash.com/photo-15899942053...</td>\n",
+              "      <td>[]</td>\n",
+              "      <td>[Andrey]</td>\n",
+              "      <td>[Svistunov]</td>\n",
+              "      <td>[orange tabby cat on ground covered with snow ...</td>\n",
+              "      <td>[https://unsplash.com/photos/VOZQkkKXvY4]</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>WK5Eh4kBLg4Kd5yS5XcD</td>\n",
+              "      <td>0.641853</td>\n",
+              "      <td>[wmz8y6L6c_k]</td>\n",
+              "      <td>[https://images.unsplash.com/photo-1556820161-...</td>\n",
+              "      <td>[]</td>\n",
+              "      <td>[Phillip]</td>\n",
+              "      <td>[Suitcases]</td>\n",
+              "      <td>[brown and black kitten close-up photography]</td>\n",
+              "      <td>[https://unsplash.com/photos/wmz8y6L6c_k]</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>\n",
+              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-b6abcb85-4cf5-4089-9ed7-a06f75186b38')\"\n",
+              "              title=\"Convert this dataframe to an interactive table.\"\n",
+              "              style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "       width=\"24px\">\n",
+              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
+              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
+              "  </svg>\n",
+              "      </button>\n",
+              "\n",
+              "\n",
+              "\n",
+              "    <div id=\"df-5fa64969-378b-4c22-8956-67dbb0649d64\">\n",
+              "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-5fa64969-378b-4c22-8956-67dbb0649d64')\"\n",
+              "              title=\"Suggest charts.\"\n",
+              "              style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "      </button>\n",
+              "    </div>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: #E8F0FE;\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: #1967D2;\n",
+              "    height: 32px;\n",
+              "    padding: 0 0 0 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: #E2EBFA;\n",
+              "    box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: #174EA6;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "    background-color: #3B4455;\n",
+              "    fill: #D2E3FC;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart:hover {\n",
+              "    background-color: #434B5C;\n",
+              "    box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "    filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "    fill: #FFFFFF;\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "    <script>\n",
+              "      async function quickchart(key) {\n",
+              "        const containerElement = document.querySelector('#' + key);\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      }\n",
+              "    </script>\n",
+              "\n",
+              "      <script>\n",
+              "\n",
+              "function displayQuickchartButton(domScope) {\n",
+              "  let quickchartButtonEl =\n",
+              "    domScope.querySelector('#df-5fa64969-378b-4c22-8956-67dbb0649d64 button.colab-df-quickchart');\n",
+              "  quickchartButtonEl.style.display =\n",
+              "    google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "}\n",
+              "\n",
+              "        displayQuickchartButton(document);\n",
+              "      </script>\n",
+              "      <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      flex-wrap:wrap;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "      <script>\n",
+              "        const buttonEl =\n",
+              "          document.querySelector('#df-b6abcb85-4cf5-4089-9ed7-a06f75186b38 button.colab-df-convert');\n",
+              "        buttonEl.style.display =\n",
+              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "        async function convertToInteractive(key) {\n",
+              "          const element = document.querySelector('#df-b6abcb85-4cf5-4089-9ed7-a06f75186b38');\n",
+              "          const dataTable =\n",
+              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                     [key], {});\n",
+              "          if (!dataTable) return;\n",
+              "\n",
+              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "            + ' to learn more about interactive tables.';\n",
+              "          element.innerHTML = '';\n",
+              "          dataTable['output_type'] = 'display_data';\n",
+              "          await google.colab.output.renderOutput(dataTable, element);\n",
+              "          const docLink = document.createElement('div');\n",
+              "          docLink.innerHTML = docLinkHtml;\n",
+              "          element.appendChild(docLink);\n",
+              "        }\n",
+              "      </script>\n",
+              "    </div>\n",
+              "  </div>\n"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 35
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# [Optional] Simple streamlit UI\n",
+        "In the following section, you will view the response in a simple UI for better visualisation.\n",
+        "\n",
+        "The query in the previous step did write down a file response `json_data.json` for the UI to load and visualise.\n",
+        "\n",
+        "Follow the steps below to see the results in a table."
+      ],
+      "metadata": {
+        "id": "Ry62sfHFHFi9"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Install tunnel library"
+      ],
+      "metadata": {
+        "id": "iUAbRqr8II-x"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!npm install localtunnel"
+      ],
+      "metadata": {
+        "id": "RGEmAt2DjtN7",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "f6c37d54-7e09-4e59-fc21-8a3db4fa840d"
+      },
+      "execution_count": 12,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "\u001b[K\u001b[?25h\u001b[37;40mnpm\u001b[0m \u001b[0m\u001b[30;43mWARN\u001b[0m \u001b[0m\u001b[35msaveError\u001b[0m ENOENT: no such file or directory, open '/content/package.json'\n",
+            "\u001b[0m\u001b[37;40mnpm\u001b[0m \u001b[0m\u001b[34;40mnotice\u001b[0m\u001b[35m\u001b[0m created a lockfile as package-lock.json. You should commit this file.\n",
+            "\u001b[0m\u001b[37;40mnpm\u001b[0m \u001b[0m\u001b[30;43mWARN\u001b[0m \u001b[0m\u001b[35menoent\u001b[0m ENOENT: no such file or directory, open '/content/package.json'\n",
+            "\u001b[0m\u001b[37;40mnpm\u001b[0m \u001b[0m\u001b[30;43mWARN\u001b[0m\u001b[35m\u001b[0m content No description\n",
+            "\u001b[0m\u001b[37;40mnpm\u001b[0m \u001b[0m\u001b[30;43mWARN\u001b[0m\u001b[35m\u001b[0m content No repository field.\n",
+            "\u001b[0m\u001b[37;40mnpm\u001b[0m \u001b[0m\u001b[30;43mWARN\u001b[0m\u001b[35m\u001b[0m content No README data\n",
+            "\u001b[0m\u001b[37;40mnpm\u001b[0m \u001b[0m\u001b[30;43mWARN\u001b[0m\u001b[35m\u001b[0m content No license field.\n",
+            "\u001b[0m\n",
+            "\u001b[K\u001b[?25h+ localtunnel@2.0.2\n",
+            "added 22 packages from 22 contributors and audited 22 packages in 5.903s\n",
+            "\n",
+            "3 packages are looking for funding\n",
+            "  run `npm fund` for details\n",
+            "\n",
+            "found \u001b[92m0\u001b[0m vulnerabilities\n",
+            "\n",
+            "\u001b[K\u001b[?25h"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Create application"
+      ],
+      "metadata": {
+        "id": "KUAfucnYITka"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "%%writefile app.py\n",
+        "\n",
+        "import streamlit as st\n",
+        "import json\n",
+        "import pandas as pd\n",
+        "\n",
+        "\n",
+        "def get_image_preview(image_url):\n",
+        "    \"\"\"Returns an HTML <img> tag with preview of the image.\"\"\"\n",
+        "    return f\"\"\"<img src=\"{image_url}\" width=\"400\" />\"\"\"\n",
+        "\n",
+        "\n",
+        "def get_url_link(photo_url):\n",
+        "    \"\"\"Returns an HTML <a> tag to the image page.\"\"\"\n",
+        "    return f\"\"\"<a href=\"{photo_url}\"  target=\"_blank\"> {photo_url} </a>\"\"\"\n",
+        "\n",
+        "\n",
+        "def main():\n",
+        "    \"\"\"Creates a Streamlit app with a table of images.\"\"\"\n",
+        "    data = json.load(open(\"json_data.json\"))\n",
+        "    table = []\n",
+        "    for image in data:\n",
+        "        image_url = image[\"fields\"][\"photo_image_url\"][0]\n",
+        "        image_preview = get_image_preview(image_url)\n",
+        "        photo_url = image[\"fields\"][\"photo_url\"][0]\n",
+        "        photo_url_link = get_url_link(photo_url)\n",
+        "        table.append([image_preview, image[\"fields\"][\"photo_id\"][0],\n",
+        "                      image[\"fields\"][\"photographer_first_name\"][0],\n",
+        "                      image[\"fields\"][\"photographer_last_name\"][0],\n",
+        "                      image[\"fields\"][\"photographer_username\"][0],\n",
+        "                      photo_url_link])\n",
+        "\n",
+        "    st.write(pd.DataFrame(table, columns=[\"Image\", \"ID\", \"First Name\", \"Last Name\",\n",
+        "                                          \"Photographer username\", \"Photo url\"]).to_html(escape = False),\n",
+        "             unsafe_allow_html=True)\n",
+        "\n",
+        "\n",
+        "if __name__ == \"__main__\":\n",
+        "    main()\n",
+        "\n"
+      ],
+      "metadata": {
+        "id": "9Wb7GOWMXFnF",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "6db23ef3-b25e-4f80-a3cb-6d08c1c78c16"
+      },
+      "execution_count": 36,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Overwriting app.py\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Run app\n",
+        "Run the application and check your IP for the tunneling"
+      ],
+      "metadata": {
+        "id": "CjDhvbGhHuiz"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!streamlit run app.py &>/content/logs.txt & curl ipv4.icanhazip.com"
+      ],
+      "metadata": {
+        "id": "851CeYi8jvuF",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "46a64023-e990-4900-f482-5558237f08cc"
+      },
+      "execution_count": 37,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "34.138.156.22\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Create the tunnel\n",
+        "Run the tunnel and use the link below to connect to the tunnel.\n",
+        "\n",
+        "Use the IP from the previous step to connect to the application"
+      ],
+      "metadata": {
+        "id": "4OuSLFHyHy5M"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!npx localtunnel --port 8501"
+      ],
+      "metadata": {
+        "id": "inF7ceBmjyE3",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "559ce180-3f0f-4475-c9a9-46dc91389276"
+      },
+      "execution_count": 38,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "\u001b[K\u001b[?25hnpx: installed 22 in 2.186s\n",
+            "your url is: https://nine-facts-act.loca.lt\n",
+            "^C\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Resources\n",
+        "\n",
+        "Blog: https://www.elastic.co/blog/implement-image-similarity-search-elastic\n",
+        "\n",
+        "GH  : https://github.com/radoondas/flask-elastic-image-search\n"
+      ],
+      "metadata": {
+        "id": "SbxbVzvQ7caR"
+      }
+    }
+  ]
+}
\ No newline at end of file

From 54ccdf95808ef3e49e21aeeefaa7d7b67adac166 Mon Sep 17 00:00:00 2001
From: Miguel Grinberg <miguel.grinberg@gmail.com>
Date: Thu, 14 Mar 2024 11:05:29 +0000
Subject: [PATCH 2/3] rename and relocate the notebook

---
 notebooks/images/image-similarity.ipynb    | 1284 ++++++++++++++++++++
 notebooks/search/04-image-similarity.ipynb | 1264 -------------------
 2 files changed, 1284 insertions(+), 1264 deletions(-)
 create mode 100644 notebooks/images/image-similarity.ipynb
 delete mode 100644 notebooks/search/04-image-similarity.ipynb

diff --git a/notebooks/images/image-similarity.ipynb b/notebooks/images/image-similarity.ipynb
new file mode 100644
index 00000000..46e01eb1
--- /dev/null
+++ b/notebooks/images/image-similarity.ipynb
@@ -0,0 +1,1284 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "kernelspec": {
+   "name": "python3",
+   "display_name": "Python 3"
+  },
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "source": [
+    "# How to implement Image search using Elasticsearch"
+   ],
+   "metadata": {
+    "id": "CepGq3Kvtdxi"
+   }
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "The workbook shows how to implement an Image search using Elasticsearch. You will index documents with image embeddings (generated or pre-generated) and then using NLP model be able to search using natural language description of the image.\n",
+    "\n",
+    "### Prerequisities\n",
+    "Before you start make sure you have Elasticsearch cluster running. The cluster must have at least one machine learning (ML) node with enough (4GB) memory."
+   ],
+   "metadata": {
+    "id": "oMu1SW_TQQrU"
+   }
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "### Install Python requirements\n",
+    "Before you start you need to install all required Python dependencies."
+   ],
+   "metadata": {
+    "id": "VFcdr8IDQE_H"
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "6WosfR55npKU",
+    "outputId": "033767ff-0eef-48cc-c9e7-efbf73c9cb67"
+   },
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Requirement already satisfied: sentence-transformers in /usr/local/lib/python3.10/dist-packages (2.2.2)\n",
+      "Requirement already satisfied: eland in /usr/local/lib/python3.10/dist-packages (8.7.0)\n",
+      "Requirement already satisfied: elasticsearch in /usr/local/lib/python3.10/dist-packages (8.9.0)\n",
+      "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.31.0)\n",
+      "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (2.0.1+cu118)\n",
+      "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (4.65.0)\n",
+      "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (9.4.0)\n",
+      "Requirement already satisfied: streamlit in /usr/local/lib/python3.10/dist-packages (1.25.0)\n",
+      "Requirement already satisfied: torchvision in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (0.15.2+cu118)\n",
+      "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (1.22.4)\n",
+      "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (1.2.2)\n",
+      "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (1.10.1)\n",
+      "Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (3.8.1)\n",
+      "Requirement already satisfied: sentencepiece in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (0.1.99)\n",
+      "Requirement already satisfied: huggingface-hub>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (0.16.4)\n",
+      "Requirement already satisfied: pandas>=1.5 in /usr/local/lib/python3.10/dist-packages (from eland) (1.5.3)\n",
+      "Requirement already satisfied: matplotlib>=3.6 in /usr/local/lib/python3.10/dist-packages (from eland) (3.7.1)\n",
+      "Requirement already satisfied: elastic-transport<9,>=8 in /usr/local/lib/python3.10/dist-packages (from elasticsearch) (8.4.0)\n",
+      "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.12.2)\n",
+      "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (23.1)\n",
+      "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n",
+      "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2022.10.31)\n",
+      "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.27.1)\n",
+      "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.13.3)\n",
+      "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.3.1)\n",
+      "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch) (4.7.1)\n",
+      "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch) (1.11.1)\n",
+      "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch) (3.1)\n",
+      "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch) (3.1.2)\n",
+      "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch) (2.0.0)\n",
+      "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch) (3.25.2)\n",
+      "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch) (16.0.6)\n",
+      "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.10/dist-packages (from streamlit) (4.2.2)\n",
+      "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/lib/python3/dist-packages (from streamlit) (1.4)\n",
+      "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.10/dist-packages (from streamlit) (5.3.1)\n",
+      "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.10/dist-packages (from streamlit) (8.1.6)\n",
+      "Requirement already satisfied: importlib-metadata<7,>=1.4 in /usr/lib/python3/dist-packages (from streamlit) (4.6.4)\n",
+      "Requirement already satisfied: protobuf<5,>=3.20 in /usr/local/lib/python3.10/dist-packages (from streamlit) (3.20.3)\n",
+      "Requirement already satisfied: pyarrow>=6.0 in /usr/local/lib/python3.10/dist-packages (from streamlit) (9.0.0)\n",
+      "Requirement already satisfied: pympler<2,>=0.9 in /usr/local/lib/python3.10/dist-packages (from streamlit) (1.0.1)\n",
+      "Requirement already satisfied: python-dateutil<3,>=2.7.3 in /usr/local/lib/python3.10/dist-packages (from streamlit) (2.8.2)\n",
+      "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.10/dist-packages (from streamlit) (13.4.2)\n",
+      "Requirement already satisfied: tenacity<9,>=8.1.0 in /usr/local/lib/python3.10/dist-packages (from streamlit) (8.2.2)\n",
+      "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.10/dist-packages (from streamlit) (0.10.2)\n",
+      "Requirement already satisfied: tzlocal<5,>=1.1 in /usr/local/lib/python3.10/dist-packages (from streamlit) (4.3.1)\n",
+      "Requirement already satisfied: validators<1,>=0.2 in /usr/local/lib/python3.10/dist-packages (from streamlit) (0.20.0)\n",
+      "Requirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.10/dist-packages (from streamlit) (3.1.32)\n",
+      "Requirement already satisfied: pydeck<1,>=0.8 in /usr/local/lib/python3.10/dist-packages (from streamlit) (0.8.0)\n",
+      "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.10/dist-packages (from streamlit) (6.3.1)\n",
+      "Requirement already satisfied: watchdog>=2.1.5 in /usr/local/lib/python3.10/dist-packages (from streamlit) (3.0.0)\n",
+      "Requirement already satisfied: entrypoints in /usr/local/lib/python3.10/dist-packages (from altair<6,>=4.0->streamlit) (0.4)\n",
+      "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.10/dist-packages (from altair<6,>=4.0->streamlit) (4.3.3)\n",
+      "Requirement already satisfied: toolz in /usr/local/lib/python3.10/dist-packages (from altair<6,>=4.0->streamlit) (0.12.0)\n",
+      "Requirement already satisfied: urllib3<2,>=1.26.2 in /usr/local/lib/python3.10/dist-packages (from elastic-transport<9,>=8->elasticsearch) (1.26.16)\n",
+      "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from elastic-transport<9,>=8->elasticsearch) (2023.7.22)\n",
+      "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.10/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit) (4.0.10)\n",
+      "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence-transformers) (2023.6.0)\n",
+      "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.6->eland) (1.1.0)\n",
+      "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.6->eland) (0.11.0)\n",
+      "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.6->eland) (4.41.1)\n",
+      "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.6->eland) (1.4.4)\n",
+      "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.6->eland) (3.1.0)\n",
+      "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.5->eland) (2022.7.1)\n",
+      "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch) (2.1.3)\n",
+      "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil<3,>=2.7.3->streamlit) (1.16.0)\n",
+      "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.12)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.4)\n",
+      "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich<14,>=10.14.0->streamlit) (3.0.0)\n",
+      "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich<14,>=10.14.0->streamlit) (2.14.0)\n",
+      "Requirement already satisfied: pytz-deprecation-shim in /usr/local/lib/python3.10/dist-packages (from tzlocal<5,>=1.1->streamlit) (0.1.0.post0)\n",
+      "Requirement already satisfied: decorator>=3.4.0 in /usr/local/lib/python3.10/dist-packages (from validators<1,>=0.2->streamlit) (4.4.2)\n",
+      "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk->sentence-transformers) (1.3.1)\n",
+      "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->sentence-transformers) (3.2.0)\n",
+      "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch) (1.3.0)\n",
+      "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.10/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit) (5.0.0)\n",
+      "Requirement already satisfied: attrs>=17.4.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit) (23.1.0)\n",
+      "Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit) (0.19.3)\n",
+      "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit) (0.1.2)\n",
+      "Requirement already satisfied: tzdata in /usr/local/lib/python3.10/dist-packages (from pytz-deprecation-shim->tzlocal<5,>=1.1->streamlit) (2023.3)\n"
+     ]
+    }
+   ],
+   "source": [
+    "!pip install sentence-transformers eland elasticsearch transformers torch tqdm Pillow streamlit"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "### Upload NLP model for querying\n",
+    "Using an `eland` library you will import NLP CLIP model. The model will tran\n",
+    "sfer your search query into vector which will be used for the search over the set of images stored in Elasticsearch.\n",
+    "\n",
+    "The model used is [clip-ViT-B-32-multilingual-v1](https://huggingface.co/sentence-transformers/clip-ViT-B-32-multilingual-v1) because the image embeddings are also generated by the CLIP model.\n",
+    "\n",
+    "How to get cloud id? Go to [ESS cloud](https://cloud.elastic.co/logout?redirectTo=%2Fhome&reason=unauthorised) and `On the deployment overview page, copy down the Cloud ID.`\n",
+    "\n",
+    "The authentication is using api key (`--es-api-key`). Learn how to generate [API key](https://www.elastic.co/guide/en/kibana/current/api-keys.html#create-api-key).\n",
+    "```\n",
+    "$ eland_import_hub_model --cloud-id $CLOUD_ID  \\\n",
+    "  --hub-model-id sentence-transformers/clip-ViT-B-32-multilingual-v1 \\\n",
+    "  --task-type text_embedding --es-api-key $API_KEY --start\n",
+    "```"
+   ],
+   "metadata": {
+    "id": "eIV5lAnVt9L7"
+   }
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "API_KEY = \"\"\n",
+    "CLOUD_ID = \"\"\n",
+    "!eland_import_hub_model --cloud-id $CLOUD_ID --hub-model-id sentence-transformers/clip-ViT-B-32-multilingual-v1 --task-type text_embedding --es-api-key API_KEY --start"
+   ],
+   "metadata": {
+    "id": "tVhL9jBnuAAQ"
+   },
+   "execution_count": null,
+   "outputs": []
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "### Import Python libraries"
+   ],
+   "metadata": {
+    "id": "hVxWnFflUCZv"
+   }
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "from elasticsearch import Elasticsearch\n",
+    "from elasticsearch.helpers import parallel_bulk\n",
+    "import requests\n",
+    "import os\n",
+    "import sys\n",
+    "\n",
+    "# import shutil\n",
+    "import zipfile\n",
+    "from tqdm.auto import tqdm\n",
+    "import pandas as pd\n",
+    "from PIL import Image\n",
+    "from sentence_transformers import SentenceTransformer\n",
+    "import urllib.request\n",
+    "\n",
+    "# import urllib.error\n",
+    "import json\n",
+    "from getpass import getpass"
+   ],
+   "metadata": {
+    "id": "I0pRCbYMuMVn"
+   },
+   "execution_count": 17,
+   "outputs": []
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "### Connect to Elasticsearch cluster\n",
+    "Use your own cluster details `ELASTIC_CLOUD_ID`, `API_KEY`."
+   ],
+   "metadata": {
+    "id": "Klv3rywdUJBN"
+   }
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "# ESS Cloud connection definition using an API_KEY\n",
+    "ELASTIC_CLOUD_ID = getpass(\"Elastic Cloud ID: \")\n",
+    "API_KEY = getpass(\"Elastic API key: \")\n",
+    "\n",
+    "# ELASTIC_CLOUD_USER = \"elastic\"\n",
+    "# CLOUD_PASSWORD = getpass(\"Elastic Password\")\n",
+    "\n",
+    "es = Elasticsearch(\n",
+    "    cloud_id=ELASTIC_CLOUD_ID,\n",
+    "    # basic_auth=(ELASTIC_CLOUD_USER, ELASTIC_CLOUD_PASSWORD),\n",
+    "    api_key=API_KEY,\n",
+    "    request_timeout=600,\n",
+    ")\n",
+    "\n",
+    "es.info()  # should return cluster info"
+   ],
+   "metadata": {
+    "id": "YwN8RmFY3FQI",
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "outputId": "d0d0e31e-2ad2-46fe-ef8c-8c8bce7e1c48"
+   },
+   "execution_count": 19,
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Elastic Cloud ID: ··········\n",
+      "Elastic API key: ··········\n"
+     ]
+    },
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "ObjectApiResponse({'name': 'instance-0000000000', 'cluster_name': 'a597bbe1e0d047c494e7d4015f67ef37', 'cluster_uuid': 'EnT0vwwSSZeAahPw3Vhsuw', 'version': {'number': '8.8.2', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '98e1271edf932a480e4262a471281f1ee295ce6b', 'build_date': '2023-06-26T05:16:16.196344851Z', 'build_snapshot': False, 'lucene_version': '9.6.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'})"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 19
+    }
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "### Create Index and mappings for Images\n",
+    "Befor you can index documents into Elasticsearch, you need to create an Index with correct mappings."
+   ],
+   "metadata": {
+    "id": "IW-GIlH2OxB4"
+   }
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "# Destination Index name\n",
+    "INDEX_NAME = \"images\"\n",
+    "# If you want to delete previous version of the Index\n",
+    "DELETE_INDEX = False\n",
+    "\n",
+    "INDEX_MAPPING = {\n",
+    "    \"properties\": {\n",
+    "        \"image_embedding\": {\n",
+    "            \"type\": \"dense_vector\",\n",
+    "            \"dims\": 512,\n",
+    "            \"index\": True,\n",
+    "            \"similarity\": \"cosine\",\n",
+    "        },\n",
+    "        \"photo_id\": {\"type\": \"keyword\"},\n",
+    "        \"photo_image_url\": {\"type\": \"keyword\"},\n",
+    "        \"ai_description\": {\"type\": \"text\"},\n",
+    "        \"photo_description\": {\"type\": \"text\"},\n",
+    "        \"photo_url\": {\"type\": \"keyword\"},\n",
+    "        \"photographer_first_name\": {\"type\": \"keyword\"},\n",
+    "        \"photographer_last_name\": {\"type\": \"keyword\"},\n",
+    "        \"photographer_username\": {\"type\": \"keyword\"},\n",
+    "        \"exif_camera_make\": {\"type\": \"keyword\"},\n",
+    "        \"exif_camera_model\": {\"type\": \"keyword\"},\n",
+    "        \"exif_iso\": {\"type\": \"integer\"},\n",
+    "    }\n",
+    "}\n",
+    "\n",
+    "# Index settings\n",
+    "INDEX_SETTINGS = {\n",
+    "    \"index\": {\n",
+    "        \"number_of_replicas\": \"1\",\n",
+    "        \"number_of_shards\": \"1\",\n",
+    "        \"refresh_interval\": \"5s\",\n",
+    "    }\n",
+    "}\n",
+    "\n",
+    "if DELETE_INDEX:\n",
+    "    if es.indices.exists(index=INDEX_NAME):\n",
+    "        print(\"Deleting existing %s\" % INDEX_NAME)\n",
+    "        es.indices.delete(index=INDEX_NAME, ignore=[400, 404])\n",
+    "\n",
+    "if not es.indices.exists(index=INDEX_NAME):\n",
+    "    print(\"Creating index %s\" % INDEX_NAME)\n",
+    "    es.indices.create(\n",
+    "        index=INDEX_NAME,\n",
+    "        mappings=INDEX_MAPPING,\n",
+    "        settings=INDEX_SETTINGS,\n",
+    "        ignore=[400, 404],\n",
+    "    )"
+   ],
+   "metadata": {
+    "id": "xAkc1OVcOxy3"
+   },
+   "execution_count": null,
+   "outputs": []
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "### Get image dataset and embeddings\n",
+    "Download:\n",
+    "- The example image dataset is from [Unsplash](https://github.com/unsplash/datasets)\n",
+    "- The [Image embeddings](https://github.com/radoondas/flask-elastic-nlp/blob/main/embeddings/blogs/blogs-no-embeddings.json.zip) are pre-generated using CLIP model\n",
+    "\n",
+    "Then unzip both files."
+   ],
+   "metadata": {
+    "id": "NKE-j0kPUMn_"
+   }
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "!wget https://unsplash.com/data/lite/1.2.0 -O data/unsplash-research-dataset-lite-1.2.0.zip\n",
+    "!wget https://raw.githubusercontent.com/radoondas/flask-elastic-nlp/main/embeddings/images/image-embeddings.json.zip -P data"
+   ],
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "zFGaPDRR5mqT",
+    "outputId": "0114cdd6-a714-41ab-9b46-3013bd36698a"
+   },
+   "execution_count": null,
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "--2023-07-25 14:28:32--  https://unsplash.com/data/lite/1.2.0\n",
+      "Resolving unsplash.com (unsplash.com)... 151.101.65.181, 151.101.1.181, 151.101.129.181, ...\n",
+      "Connecting to unsplash.com (unsplash.com)|151.101.65.181|:443... connected.\n",
+      "HTTP request sent, awaiting response... 302 Found\n",
+      "Location: https://unsplash-datasets.s3.amazonaws.com/lite/1.2.0/unsplash-research-dataset-lite-1.2.0.zip [following]\n",
+      "--2023-07-25 14:28:32--  https://unsplash-datasets.s3.amazonaws.com/lite/1.2.0/unsplash-research-dataset-lite-1.2.0.zip\n",
+      "Resolving unsplash-datasets.s3.amazonaws.com (unsplash-datasets.s3.amazonaws.com)... 52.217.102.84, 3.5.25.253, 52.217.96.188, ...\n",
+      "Connecting to unsplash-datasets.s3.amazonaws.com (unsplash-datasets.s3.amazonaws.com)|52.217.102.84|:443... connected.\n",
+      "HTTP request sent, awaiting response... 200 OK\n",
+      "Length: 632351052 (603M) [application/zip]\n",
+      "Saving to: ‘data/unsplash-research-dataset-lite-1.2.0.zip’\n",
+      "\n",
+      "data/unsplash-resea 100%[===================>] 603.06M  14.1MB/s    in 42s     \n",
+      "\n",
+      "2023-07-25 14:29:16 (14.2 MB/s) - ‘data/unsplash-research-dataset-lite-1.2.0.zip’ saved [632351052/632351052]\n",
+      "\n",
+      "--2023-07-25 14:29:16--  https://raw.githubusercontent.com/radoondas/flask-elastic-nlp/main/embeddings/images/image-embeddings.json.zip\n",
+      "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n",
+      "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n",
+      "HTTP request sent, awaiting response... 200 OK\n",
+      "Length: 94256217 (90M) [application/zip]\n",
+      "Saving to: ‘data/image-embeddings.json.zip.1’\n",
+      "\n",
+      "image-embeddings.js 100%[===================>]  89.89M   164MB/s    in 0.5s    \n",
+      "\n",
+      "2023-07-25 14:29:16 (164 MB/s) - ‘data/image-embeddings.json.zip.1’ saved [94256217/94256217]\n",
+      "\n"
+     ]
+    }
+   ]
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "# Unzip downloaded files\n",
+    "UNSPLASH_ZIP_FILE = \"data/unsplash-research-dataset-lite-1.2.0.zip\"\n",
+    "EMBEDDINGS_ZIP_FILE = \"data/image-embeddings.json.zip\"\n",
+    "\n",
+    "with zipfile.ZipFile(UNSPLASH_ZIP_FILE, \"r\") as zip_ref:\n",
+    "    print(\"Extracting file \", UNSPLASH_ZIP_FILE, \".\")\n",
+    "    zip_ref.extractall(\"data/unsplash/\")\n",
+    "\n",
+    "with zipfile.ZipFile(EMBEDDINGS_ZIP_FILE, \"r\") as zip_ref:\n",
+    "    print(\"Extracting file \", EMBEDDINGS_ZIP_FILE, \".\")\n",
+    "    zip_ref.extractall(\"data/embeddings/\")"
+   ],
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "MBh4AQ8i7C0-",
+    "outputId": "17a50b7f-f052-4b72-daa8-0e8fc630326f"
+   },
+   "execution_count": null,
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Extracting file  data/unsplash-research-dataset-lite-1.2.0.zip .\n",
+      "Extracting file  data/image-embeddings.json.zip .\n"
+     ]
+    }
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "# Image embeddings\n",
+    "We have now 2 options  how to proceed.\n",
+    "1. Import all pregenerated image embeddings (~19k). This is faster option with a lot of images available in a short time.\n",
+    "2. Import a small subset of randomly choosen images to see the process of generating of image embeddings using external Clip model."
+   ],
+   "metadata": {
+    "id": "p6H7QYctQQA7"
+   }
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "# define helper function\n",
+    "def gen_rows(df):\n",
+    "    for doc in df.to_dict(orient=\"records\"):\n",
+    "        yield doc"
+   ],
+   "metadata": {
+    "id": "03YvC-_JY9OE"
+   },
+   "execution_count": null,
+   "outputs": []
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "## 1. Import all pregenerated image embeddings\n",
+    "This option lets you inport ~19k documents woth pregenenerated image embeddings with metadata.\n",
+    "\n",
+    "The process downloads files with images information, merge them and index into Elasticsearch."
+   ],
+   "metadata": {
+    "id": "qhZRdUyAQd-s"
+   }
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "df_unsplash = pd.read_csv(\"data/unsplash/\" + \"photos.tsv000\", sep=\"\\t\", header=0)\n",
+    "\n",
+    "# follwing 8 lines are fix for inconsistent/incorrect data\n",
+    "df_unsplash[\"photo_description\"].fillna(\"\", inplace=True)\n",
+    "df_unsplash[\"ai_description\"].fillna(\"\", inplace=True)\n",
+    "df_unsplash[\"photographer_first_name\"].fillna(\"\", inplace=True)\n",
+    "df_unsplash[\"photographer_last_name\"].fillna(\"\", inplace=True)\n",
+    "df_unsplash[\"photographer_username\"].fillna(\"\", inplace=True)\n",
+    "df_unsplash[\"exif_camera_make\"].fillna(\"\", inplace=True)\n",
+    "df_unsplash[\"exif_camera_model\"].fillna(\"\", inplace=True)\n",
+    "df_unsplash[\"exif_iso\"].fillna(0, inplace=True)\n",
+    "## end of fix\n",
+    "\n",
+    "# read subset of columns from the original/downloaded dataset\n",
+    "df_unsplash_subset = df_unsplash[\n",
+    "    [\n",
+    "        \"photo_id\",\n",
+    "        \"photo_url\",\n",
+    "        \"photo_image_url\",\n",
+    "        \"photo_description\",\n",
+    "        \"ai_description\",\n",
+    "        \"photographer_first_name\",\n",
+    "        \"photographer_last_name\",\n",
+    "        \"photographer_username\",\n",
+    "        \"exif_camera_make\",\n",
+    "        \"exif_camera_model\",\n",
+    "        \"exif_iso\",\n",
+    "    ]\n",
+    "]\n",
+    "\n",
+    "# read all pregenerated embeddings\n",
+    "df_embeddings = pd.read_json(\"data/embeddings/\" + \"image-embeddings.json\", lines=True)\n",
+    "\n",
+    "df_merged = pd.merge(df_unsplash_subset, df_embeddings, on=\"photo_id\", how=\"inner\")\n",
+    "\n",
+    "count = 0\n",
+    "for success, info in parallel_bulk(\n",
+    "    client=es,\n",
+    "    actions=gen_rows(df_merged),\n",
+    "    thread_count=5,\n",
+    "    chunk_size=1000,\n",
+    "    index=INDEX_NAME,\n",
+    "):\n",
+    "    if success:\n",
+    "        count += 1\n",
+    "        if count % 1000 == 0:\n",
+    "            print(\"Indexed %s documents\" % str(count), flush=True)\n",
+    "            sys.stdout.flush()\n",
+    "    else:\n",
+    "        print(\"Doc failed\", info)\n",
+    "\n",
+    "print(\"Indexed %s image embeddings documents\" % str(count), flush=True)\n",
+    "sys.stdout.flush()"
+   ],
+   "metadata": {
+    "id": "32xrbSUXTODQ"
+   },
+   "execution_count": null,
+   "outputs": []
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "## 2. Import small number of random\n",
+    "In this part you will randomly choose small number of images and then generate image embeddings. The script will then index documents into Elasticserach."
+   ],
+   "metadata": {
+    "id": "xypgh4lFQmkc"
+   }
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "NUMBER_OF_IMAGES = 20\n",
+    "INDEX_NAME = \"images-test\"\n",
+    "\n",
+    "df_unsplash = pd.read_csv(\"data/unsplash/\" + \"photos.tsv000\", sep=\"\\t\", header=0)\n",
+    "\n",
+    "## stat fix\n",
+    "# follwing 8 lines are fix for inconsistent/incorrect data\n",
+    "df_unsplash[\"photo_description\"].fillna(\"\", inplace=True)\n",
+    "df_unsplash[\"ai_description\"].fillna(\"\", inplace=True)\n",
+    "df_unsplash[\"photographer_first_name\"].fillna(\"\", inplace=True)\n",
+    "df_unsplash[\"photographer_last_name\"].fillna(\"\", inplace=True)\n",
+    "df_unsplash[\"photographer_username\"].fillna(\"\", inplace=True)\n",
+    "df_unsplash[\"exif_camera_make\"].fillna(\"\", inplace=True)\n",
+    "df_unsplash[\"exif_camera_model\"].fillna(\"\", inplace=True)\n",
+    "df_unsplash[\"exif_iso\"].fillna(0, inplace=True)\n",
+    "## end of fix\n",
+    "\n",
+    "df_unsplash_subset = df_unsplash[\n",
+    "    [\n",
+    "        \"photo_id\",\n",
+    "        \"photo_url\",\n",
+    "        \"photo_image_url\",\n",
+    "        \"photo_description\",\n",
+    "        \"ai_description\",\n",
+    "        \"photographer_first_name\",\n",
+    "        \"photographer_last_name\",\n",
+    "        \"photographer_username\",\n",
+    "        \"exif_camera_make\",\n",
+    "        \"exif_camera_model\",\n",
+    "        \"exif_iso\",\n",
+    "    ]\n",
+    "]\n",
+    "\n",
+    "df_random_subset = df_unsplash_subset.sample(n=NUMBER_OF_IMAGES, replace=False)\n",
+    "df_random_subset = df_random_subset.reset_index()\n",
+    "\n",
+    "# Load model CLIP\n",
+    "img_model = SentenceTransformer(\"clip-ViT-B-32\")\n",
+    "\n",
+    "# new list of image documents for indexing into ES\n",
+    "lst = []\n",
+    "if not os.path.exists(\"data/images\"):\n",
+    "    os.mkdir(\"data/images\")\n",
+    "\n",
+    "for index, row in df_random_subset.iterrows():\n",
+    "    # open image from url\n",
+    "    img_path = \"data/images/\" + row[\"photo_id\"]\n",
+    "    try:\n",
+    "        urllib.request.urlretrieve(row[\"photo_image_url\"], img_path)\n",
+    "        print(row[\"photo_id\"] + \" \" + row[\"photo_url\"])\n",
+    "    except urllib.error.HTTPError as err:\n",
+    "        if err.code == 404:\n",
+    "            print(\"404 error: Image not found at {}\".format(row[\"photo_image_url\"]))\n",
+    "        else:\n",
+    "            raise\n",
+    "\n",
+    "    img = Image.open(img_path)\n",
+    "    # create doc\n",
+    "    doc = {}\n",
+    "    embedding = img_model.encode(img)\n",
+    "    doc[\"photo_id\"] = row[\"photo_id\"]\n",
+    "    doc[\"image_embedding\"] = embedding.tolist()\n",
+    "    lst.append(doc)\n",
+    "    # print(doc)\n",
+    "\n",
+    "    # Image cleanup.\n",
+    "    # If file exists, delete it.\n",
+    "    if os.path.exists(img_path):\n",
+    "        os.remove(img_path)\n",
+    "\n",
+    "# read all pregenerated embeddings\n",
+    "df_embeddings = pd.read_json(\"data/embeddings/\" + \"image-embeddings.json\", lines=True)\n",
+    "\n",
+    "df_merged = pd.merge(df_random_subset, pd.DataFrame(lst), on=\"photo_id\", how=\"inner\")\n",
+    "# print(df_merged)\n",
+    "\n",
+    "count = 0\n",
+    "for success, info in parallel_bulk(\n",
+    "    client=es,\n",
+    "    actions=gen_rows(df_merged),\n",
+    "    thread_count=5,\n",
+    "    chunk_size=10,\n",
+    "    index=INDEX_NAME,\n",
+    "):\n",
+    "    if success:\n",
+    "        count += 1\n",
+    "        if count % 10 == 0:\n",
+    "            print(\"Indexed %s documents\" % str(count), flush=True)\n",
+    "            sys.stdout.flush()\n",
+    "    else:\n",
+    "        print(\"Doc failed\", info)\n",
+    "\n",
+    "print(\"Indexed %s image embeddings documents\" % str(count), flush=True)\n",
+    "sys.stdout.flush()"
+   ],
+   "metadata": {
+    "id": "r_txQjP2RKnr"
+   },
+   "execution_count": null,
+   "outputs": []
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "### Query the image dataset\n",
+    "The next step is to run a query to search for images. The example query searches for `\"model_text\": \"Valentine day flowers\"` using the model we uploaded to Elasticsearch `sentence-transformers__clip-vit-b-32-multilingual-v1`.\n",
+    "\n",
+    "The process is one query even it internaly consists of two tasks. One is to tramsform your search text into a vector using the NLP model and the second task is to run the vector search over the image dataset.\n",
+    "```\n",
+    "POST images/_search\n",
+    "{\n",
+    "  \"knn\": {\n",
+    "    \"field\": \"image_embedding\",\n",
+    "    \"k\": 5,\n",
+    "    \"num_candidates\": 10,\n",
+    "    \"query_vector_builder\": {\n",
+    "      \"text_embedding\": {\n",
+    "        \"model_id\": \"sentence-transformers__clip-vit-b-32-multilingual-v1\",\n",
+    "        \"model_text\": \"Valentine day flowers\"\n",
+    "      }\n",
+    "    }\n",
+    "  },\n",
+    "  \"fields\": [\n",
+    "    \"photo_description\",\n",
+    "    \"ai_description\",\n",
+    "    \"photo_url\"\n",
+    "  ],\n",
+    "  \"_source\": false\n",
+    "}\n",
+    "```\n",
+    "\n"
+   ],
+   "metadata": {
+    "id": "-_i2CIpSz9vw"
+   }
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "# Search queary\n",
+    "WHAT_ARE_YOU_LOOKING_FOR = \"Valentine day flowers\"\n",
+    "INDEX_IM_EMBED = \"images\"\n",
+    "\n",
+    "source_fields = [\n",
+    "    \"photo_description\",\n",
+    "    \"ai_description\",\n",
+    "    \"photo_url\",\n",
+    "    \"photo_image_url\",\n",
+    "    \"photographer_first_name\",\n",
+    "    \"photographer_username\",\n",
+    "    \"photographer_last_name\",\n",
+    "    \"photo_id\",\n",
+    "]\n",
+    "query = {\n",
+    "    \"field\": \"image_embedding\",\n",
+    "    \"k\": 5,\n",
+    "    \"num_candidates\": 100,\n",
+    "    \"query_vector_builder\": {\n",
+    "        \"text_embedding\": {\n",
+    "            \"model_id\": \"sentence-transformers__clip-vit-b-32-multilingual-v1\",\n",
+    "            \"model_text\": WHAT_ARE_YOU_LOOKING_FOR,\n",
+    "        }\n",
+    "    },\n",
+    "}\n",
+    "\n",
+    "response = es.search(\n",
+    "    index=INDEX_IM_EMBED, fields=source_fields, knn=query, source=False\n",
+    ")\n",
+    "\n",
+    "print(response.body)\n",
+    "\n",
+    "# the code writes the response into a file for the streamlit UI used in the optional step.\n",
+    "with open(\"json_data.json\", \"w\") as outfile:\n",
+    "    json.dump(response.body[\"hits\"][\"hits\"], outfile)\n",
+    "\n",
+    "# Use the `loads()` method to load the JSON data\n",
+    "dfr = json.loads(json.dumps(response.body[\"hits\"][\"hits\"]))\n",
+    "# Pass the generated JSON data into a pandas dataframe\n",
+    "dfr = pd.DataFrame(dfr)\n",
+    "# Print the data frame\n",
+    "dfr\n",
+    "\n",
+    "results = pd.json_normalize(json.loads(json.dumps(response.body[\"hits\"][\"hits\"])))\n",
+    "# results\n",
+    "results[\n",
+    "    [\n",
+    "        \"_id\",\n",
+    "        \"_score\",\n",
+    "        \"fields.photo_id\",\n",
+    "        \"fields.photo_image_url\",\n",
+    "        \"fields.photo_description\",\n",
+    "        \"fields.photographer_first_name\",\n",
+    "        \"fields.photographer_last_name\",\n",
+    "        \"fields.ai_description\",\n",
+    "        \"fields.photo_url\",\n",
+    "    ]\n",
+    "]"
+   ],
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 375
+    },
+    "id": "wdicpvRlzmXG",
+    "outputId": "00550041-0aed-4f51-ccd3-18eb705ff7ed"
+   },
+   "execution_count": 35,
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "{'took': 114, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 5, 'relation': 'eq'}, 'max_score': 0.646751, 'hits': [{'_index': 'images', '_id': 'nK5Fh4kBLg4Kd5ySLbKC', '_score': 0.646751, 'fields': {'photo_image_url': ['https://images.unsplash.com/photo-1587404787163-d03a28fcc1f0'], 'photo_id': ['gQFZxLe3m4g'], 'photographer_first_name': ['Vadim'], 'photo_description': ['instagram.com/vadimsadovski'], 'photographer_last_name': ['Sadovski'], 'photo_url': ['https://unsplash.com/photos/gQFZxLe3m4g'], 'photographer_username': ['vadimsadovski'], 'ai_description': ['']}}, {'_index': 'images', '_id': 'Xa5Eh4kBLg4Kd5yS84Qf', '_score': 0.64675057, 'fields': {'photo_image_url': ['https://images.unsplash.com/photo-1574259148543-dd376a61339f'], 'photo_id': ['g5Mhx29yp-A'], 'photographer_first_name': ['Erin'], 'photo_description': ['Cute but grumpy cat in the Austrian mountains'], 'photographer_last_name': ['East'], 'photo_url': ['https://unsplash.com/photos/g5Mhx29yp-A'], 'photographer_username': ['mserineast'], 'ai_description': ['brown Persian cat on white bench']}}, {'_index': 'images', '_id': '265Eh4kBLg4Kd5yS84Uf', '_score': 0.64244866, 'fields': {'photo_image_url': ['https://images.unsplash.com/photo-1565772101068-dec21f7b36eb'], 'photo_id': ['9KZ0PGNCxNE'], 'photographer_first_name': ['Perchek'], 'photo_description': [''], 'photographer_last_name': ['Industrie'], 'photo_url': ['https://unsplash.com/photos/9KZ0PGNCxNE'], 'photographer_username': ['perchek_industrie'], 'ai_description': ['siamese cat']}}, {'_index': 'images', '_id': 'xq5Fh4kBLg4Kd5ySEpuC', '_score': 0.64216036, 'fields': {'photo_image_url': ['https://images.unsplash.com/photo-1589994205353-325f40210d02'], 'photo_id': ['VOZQkkKXvY4'], 'photographer_first_name': ['Andrey'], 'photo_description': [''], 'photographer_last_name': ['Svistunov'], 'photo_url': ['https://unsplash.com/photos/VOZQkkKXvY4'], 'photographer_username': ['svistal13'], 'ai_description': ['orange tabby cat on ground covered with snow during daytime']}}, {'_index': 'images', '_id': 'WK5Eh4kBLg4Kd5yS5XcD', '_score': 0.64185303, 'fields': {'photo_image_url': ['https://images.unsplash.com/photo-1556820161-b605d166fce1'], 'photo_id': ['wmz8y6L6c_k'], 'photographer_first_name': ['Phillip'], 'photo_description': [''], 'photographer_last_name': ['Suitcases'], 'photo_url': ['https://unsplash.com/photos/wmz8y6L6c_k'], 'photographer_username': ['nillait'], 'ai_description': ['brown and black kitten close-up photography']}}]}}\n"
+     ]
+    },
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "                    _id    _score fields.photo_id  \\\n",
+       "0  nK5Fh4kBLg4Kd5ySLbKC  0.646751   [gQFZxLe3m4g]   \n",
+       "1  Xa5Eh4kBLg4Kd5yS84Qf  0.646751   [g5Mhx29yp-A]   \n",
+       "2  265Eh4kBLg4Kd5yS84Uf  0.642449   [9KZ0PGNCxNE]   \n",
+       "3  xq5Fh4kBLg4Kd5ySEpuC  0.642160   [VOZQkkKXvY4]   \n",
+       "4  WK5Eh4kBLg4Kd5yS5XcD  0.641853   [wmz8y6L6c_k]   \n",
+       "\n",
+       "                              fields.photo_image_url  \\\n",
+       "0  [https://images.unsplash.com/photo-15874047871...   \n",
+       "1  [https://images.unsplash.com/photo-15742591485...   \n",
+       "2  [https://images.unsplash.com/photo-15657721010...   \n",
+       "3  [https://images.unsplash.com/photo-15899942053...   \n",
+       "4  [https://images.unsplash.com/photo-1556820161-...   \n",
+       "\n",
+       "                          fields.photo_description  \\\n",
+       "0                    [instagram.com/vadimsadovski]   \n",
+       "1  [Cute but grumpy cat in the Austrian mountains]   \n",
+       "2                                               []   \n",
+       "3                                               []   \n",
+       "4                                               []   \n",
+       "\n",
+       "  fields.photographer_first_name fields.photographer_last_name  \\\n",
+       "0                        [Vadim]                    [Sadovski]   \n",
+       "1                         [Erin]                        [East]   \n",
+       "2                      [Perchek]                   [Industrie]   \n",
+       "3                       [Andrey]                   [Svistunov]   \n",
+       "4                      [Phillip]                   [Suitcases]   \n",
+       "\n",
+       "                               fields.ai_description  \\\n",
+       "0                                                 []   \n",
+       "1                 [brown Persian cat on white bench]   \n",
+       "2                                      [siamese cat]   \n",
+       "3  [orange tabby cat on ground covered with snow ...   \n",
+       "4      [brown and black kitten close-up photography]   \n",
+       "\n",
+       "                            fields.photo_url  \n",
+       "0  [https://unsplash.com/photos/gQFZxLe3m4g]  \n",
+       "1  [https://unsplash.com/photos/g5Mhx29yp-A]  \n",
+       "2  [https://unsplash.com/photos/9KZ0PGNCxNE]  \n",
+       "3  [https://unsplash.com/photos/VOZQkkKXvY4]  \n",
+       "4  [https://unsplash.com/photos/wmz8y6L6c_k]  "
+      ],
+      "text/html": [
+       "\n",
+       "\n",
+       "  <div id=\"df-b6abcb85-4cf5-4089-9ed7-a06f75186b38\">\n",
+       "    <div class=\"colab-df-container\">\n",
+       "      <div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>_id</th>\n",
+       "      <th>_score</th>\n",
+       "      <th>fields.photo_id</th>\n",
+       "      <th>fields.photo_image_url</th>\n",
+       "      <th>fields.photo_description</th>\n",
+       "      <th>fields.photographer_first_name</th>\n",
+       "      <th>fields.photographer_last_name</th>\n",
+       "      <th>fields.ai_description</th>\n",
+       "      <th>fields.photo_url</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>nK5Fh4kBLg4Kd5ySLbKC</td>\n",
+       "      <td>0.646751</td>\n",
+       "      <td>[gQFZxLe3m4g]</td>\n",
+       "      <td>[https://images.unsplash.com/photo-15874047871...</td>\n",
+       "      <td>[instagram.com/vadimsadovski]</td>\n",
+       "      <td>[Vadim]</td>\n",
+       "      <td>[Sadovski]</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[https://unsplash.com/photos/gQFZxLe3m4g]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Xa5Eh4kBLg4Kd5yS84Qf</td>\n",
+       "      <td>0.646751</td>\n",
+       "      <td>[g5Mhx29yp-A]</td>\n",
+       "      <td>[https://images.unsplash.com/photo-15742591485...</td>\n",
+       "      <td>[Cute but grumpy cat in the Austrian mountains]</td>\n",
+       "      <td>[Erin]</td>\n",
+       "      <td>[East]</td>\n",
+       "      <td>[brown Persian cat on white bench]</td>\n",
+       "      <td>[https://unsplash.com/photos/g5Mhx29yp-A]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>265Eh4kBLg4Kd5yS84Uf</td>\n",
+       "      <td>0.642449</td>\n",
+       "      <td>[9KZ0PGNCxNE]</td>\n",
+       "      <td>[https://images.unsplash.com/photo-15657721010...</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[Perchek]</td>\n",
+       "      <td>[Industrie]</td>\n",
+       "      <td>[siamese cat]</td>\n",
+       "      <td>[https://unsplash.com/photos/9KZ0PGNCxNE]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>xq5Fh4kBLg4Kd5ySEpuC</td>\n",
+       "      <td>0.642160</td>\n",
+       "      <td>[VOZQkkKXvY4]</td>\n",
+       "      <td>[https://images.unsplash.com/photo-15899942053...</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[Andrey]</td>\n",
+       "      <td>[Svistunov]</td>\n",
+       "      <td>[orange tabby cat on ground covered with snow ...</td>\n",
+       "      <td>[https://unsplash.com/photos/VOZQkkKXvY4]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>WK5Eh4kBLg4Kd5yS5XcD</td>\n",
+       "      <td>0.641853</td>\n",
+       "      <td>[wmz8y6L6c_k]</td>\n",
+       "      <td>[https://images.unsplash.com/photo-1556820161-...</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[Phillip]</td>\n",
+       "      <td>[Suitcases]</td>\n",
+       "      <td>[brown and black kitten close-up photography]</td>\n",
+       "      <td>[https://unsplash.com/photos/wmz8y6L6c_k]</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>\n",
+       "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-b6abcb85-4cf5-4089-9ed7-a06f75186b38')\"\n",
+       "              title=\"Convert this dataframe to an interactive table.\"\n",
+       "              style=\"display:none;\">\n",
+       "\n",
+       "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+       "       width=\"24px\">\n",
+       "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
+       "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
+       "  </svg>\n",
+       "      </button>\n",
+       "\n",
+       "\n",
+       "\n",
+       "    <div id=\"df-5fa64969-378b-4c22-8956-67dbb0649d64\">\n",
+       "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-5fa64969-378b-4c22-8956-67dbb0649d64')\"\n",
+       "              title=\"Suggest charts.\"\n",
+       "              style=\"display:none;\">\n",
+       "\n",
+       "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+       "     width=\"24px\">\n",
+       "    <g>\n",
+       "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+       "    </g>\n",
+       "</svg>\n",
+       "      </button>\n",
+       "    </div>\n",
+       "\n",
+       "<style>\n",
+       "  .colab-df-quickchart {\n",
+       "    background-color: #E8F0FE;\n",
+       "    border: none;\n",
+       "    border-radius: 50%;\n",
+       "    cursor: pointer;\n",
+       "    display: none;\n",
+       "    fill: #1967D2;\n",
+       "    height: 32px;\n",
+       "    padding: 0 0 0 0;\n",
+       "    width: 32px;\n",
+       "  }\n",
+       "\n",
+       "  .colab-df-quickchart:hover {\n",
+       "    background-color: #E2EBFA;\n",
+       "    box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+       "    fill: #174EA6;\n",
+       "  }\n",
+       "\n",
+       "  [theme=dark] .colab-df-quickchart {\n",
+       "    background-color: #3B4455;\n",
+       "    fill: #D2E3FC;\n",
+       "  }\n",
+       "\n",
+       "  [theme=dark] .colab-df-quickchart:hover {\n",
+       "    background-color: #434B5C;\n",
+       "    box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+       "    filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+       "    fill: #FFFFFF;\n",
+       "  }\n",
+       "</style>\n",
+       "\n",
+       "    <script>\n",
+       "      async function quickchart(key) {\n",
+       "        const containerElement = document.querySelector('#' + key);\n",
+       "        const charts = await google.colab.kernel.invokeFunction(\n",
+       "            'suggestCharts', [key], {});\n",
+       "      }\n",
+       "    </script>\n",
+       "\n",
+       "      <script>\n",
+       "\n",
+       "function displayQuickchartButton(domScope) {\n",
+       "  let quickchartButtonEl =\n",
+       "    domScope.querySelector('#df-5fa64969-378b-4c22-8956-67dbb0649d64 button.colab-df-quickchart');\n",
+       "  quickchartButtonEl.style.display =\n",
+       "    google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+       "}\n",
+       "\n",
+       "        displayQuickchartButton(document);\n",
+       "      </script>\n",
+       "      <style>\n",
+       "    .colab-df-container {\n",
+       "      display:flex;\n",
+       "      flex-wrap:wrap;\n",
+       "      gap: 12px;\n",
+       "    }\n",
+       "\n",
+       "    .colab-df-convert {\n",
+       "      background-color: #E8F0FE;\n",
+       "      border: none;\n",
+       "      border-radius: 50%;\n",
+       "      cursor: pointer;\n",
+       "      display: none;\n",
+       "      fill: #1967D2;\n",
+       "      height: 32px;\n",
+       "      padding: 0 0 0 0;\n",
+       "      width: 32px;\n",
+       "    }\n",
+       "\n",
+       "    .colab-df-convert:hover {\n",
+       "      background-color: #E2EBFA;\n",
+       "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+       "      fill: #174EA6;\n",
+       "    }\n",
+       "\n",
+       "    [theme=dark] .colab-df-convert {\n",
+       "      background-color: #3B4455;\n",
+       "      fill: #D2E3FC;\n",
+       "    }\n",
+       "\n",
+       "    [theme=dark] .colab-df-convert:hover {\n",
+       "      background-color: #434B5C;\n",
+       "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+       "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+       "      fill: #FFFFFF;\n",
+       "    }\n",
+       "  </style>\n",
+       "\n",
+       "      <script>\n",
+       "        const buttonEl =\n",
+       "          document.querySelector('#df-b6abcb85-4cf5-4089-9ed7-a06f75186b38 button.colab-df-convert');\n",
+       "        buttonEl.style.display =\n",
+       "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+       "\n",
+       "        async function convertToInteractive(key) {\n",
+       "          const element = document.querySelector('#df-b6abcb85-4cf5-4089-9ed7-a06f75186b38');\n",
+       "          const dataTable =\n",
+       "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+       "                                                     [key], {});\n",
+       "          if (!dataTable) return;\n",
+       "\n",
+       "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
+       "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+       "            + ' to learn more about interactive tables.';\n",
+       "          element.innerHTML = '';\n",
+       "          dataTable['output_type'] = 'display_data';\n",
+       "          await google.colab.output.renderOutput(dataTable, element);\n",
+       "          const docLink = document.createElement('div');\n",
+       "          docLink.innerHTML = docLinkHtml;\n",
+       "          element.appendChild(docLink);\n",
+       "        }\n",
+       "      </script>\n",
+       "    </div>\n",
+       "  </div>\n"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 35
+    }
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "# [Optional] Simple streamlit UI\n",
+    "In the following section, you will view the response in a simple UI for better visualisation.\n",
+    "\n",
+    "The query in the previous step did write down a file response `json_data.json` for the UI to load and visualise.\n",
+    "\n",
+    "Follow the steps below to see the results in a table."
+   ],
+   "metadata": {
+    "id": "Ry62sfHFHFi9"
+   }
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "### Install tunnel library"
+   ],
+   "metadata": {
+    "id": "iUAbRqr8II-x"
+   }
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "!npm install localtunnel"
+   ],
+   "metadata": {
+    "id": "RGEmAt2DjtN7",
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "outputId": "f6c37d54-7e09-4e59-fc21-8a3db4fa840d"
+   },
+   "execution_count": 12,
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "\u001b[K\u001b[?25h\u001b[37;40mnpm\u001b[0m \u001b[0m\u001b[30;43mWARN\u001b[0m \u001b[0m\u001b[35msaveError\u001b[0m ENOENT: no such file or directory, open '/content/package.json'\n",
+      "\u001b[0m\u001b[37;40mnpm\u001b[0m \u001b[0m\u001b[34;40mnotice\u001b[0m\u001b[35m\u001b[0m created a lockfile as package-lock.json. You should commit this file.\n",
+      "\u001b[0m\u001b[37;40mnpm\u001b[0m \u001b[0m\u001b[30;43mWARN\u001b[0m \u001b[0m\u001b[35menoent\u001b[0m ENOENT: no such file or directory, open '/content/package.json'\n",
+      "\u001b[0m\u001b[37;40mnpm\u001b[0m \u001b[0m\u001b[30;43mWARN\u001b[0m\u001b[35m\u001b[0m content No description\n",
+      "\u001b[0m\u001b[37;40mnpm\u001b[0m \u001b[0m\u001b[30;43mWARN\u001b[0m\u001b[35m\u001b[0m content No repository field.\n",
+      "\u001b[0m\u001b[37;40mnpm\u001b[0m \u001b[0m\u001b[30;43mWARN\u001b[0m\u001b[35m\u001b[0m content No README data\n",
+      "\u001b[0m\u001b[37;40mnpm\u001b[0m \u001b[0m\u001b[30;43mWARN\u001b[0m\u001b[35m\u001b[0m content No license field.\n",
+      "\u001b[0m\n",
+      "\u001b[K\u001b[?25h+ localtunnel@2.0.2\n",
+      "added 22 packages from 22 contributors and audited 22 packages in 5.903s\n",
+      "\n",
+      "3 packages are looking for funding\n",
+      "  run `npm fund` for details\n",
+      "\n",
+      "found \u001b[92m0\u001b[0m vulnerabilities\n",
+      "\n",
+      "\u001b[K\u001b[?25h"
+     ]
+    }
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "### Create application"
+   ],
+   "metadata": {
+    "id": "KUAfucnYITka"
+   }
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "%%writefile app.py\n",
+    "\n",
+    "import streamlit as st\n",
+    "import json\n",
+    "import pandas as pd\n",
+    "\n",
+    "\n",
+    "def get_image_preview(image_url):\n",
+    "    \"\"\"Returns an HTML <img> tag with preview of the image.\"\"\"\n",
+    "    return f\"\"\"<img src=\"{image_url}\" width=\"400\" />\"\"\"\n",
+    "\n",
+    "\n",
+    "def get_url_link(photo_url):\n",
+    "    \"\"\"Returns an HTML <a> tag to the image page.\"\"\"\n",
+    "    return f\"\"\"<a href=\"{photo_url}\"  target=\"_blank\"> {photo_url} </a>\"\"\"\n",
+    "\n",
+    "\n",
+    "def main():\n",
+    "    \"\"\"Creates a Streamlit app with a table of images.\"\"\"\n",
+    "    data = json.load(open(\"json_data.json\"))\n",
+    "    table = []\n",
+    "    for image in data:\n",
+    "        image_url = image[\"fields\"][\"photo_image_url\"][0]\n",
+    "        image_preview = get_image_preview(image_url)\n",
+    "        photo_url = image[\"fields\"][\"photo_url\"][0]\n",
+    "        photo_url_link = get_url_link(photo_url)\n",
+    "        table.append([image_preview, image[\"fields\"][\"photo_id\"][0],\n",
+    "                      image[\"fields\"][\"photographer_first_name\"][0],\n",
+    "                      image[\"fields\"][\"photographer_last_name\"][0],\n",
+    "                      image[\"fields\"][\"photographer_username\"][0],\n",
+    "                      photo_url_link])\n",
+    "\n",
+    "    st.write(pd.DataFrame(table, columns=[\"Image\", \"ID\", \"First Name\", \"Last Name\",\n",
+    "                                          \"Photographer username\", \"Photo url\"]).to_html(escape = False),\n",
+    "             unsafe_allow_html=True)\n",
+    "\n",
+    "\n",
+    "if __name__ == \"__main__\":\n",
+    "    main()\n",
+    "\n"
+   ],
+   "metadata": {
+    "id": "9Wb7GOWMXFnF",
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "outputId": "6db23ef3-b25e-4f80-a3cb-6d08c1c78c16"
+   },
+   "execution_count": 36,
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Overwriting app.py\n"
+     ]
+    }
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "### Run app\n",
+    "Run the application and check your IP for the tunneling"
+   ],
+   "metadata": {
+    "id": "CjDhvbGhHuiz"
+   }
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "!streamlit run app.py &>/content/logs.txt & curl ipv4.icanhazip.com"
+   ],
+   "metadata": {
+    "id": "851CeYi8jvuF",
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "outputId": "46a64023-e990-4900-f482-5558237f08cc"
+   },
+   "execution_count": 37,
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "34.138.156.22\n"
+     ]
+    }
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "### Create the tunnel\n",
+    "Run the tunnel and use the link below to connect to the tunnel.\n",
+    "\n",
+    "Use the IP from the previous step to connect to the application"
+   ],
+   "metadata": {
+    "id": "4OuSLFHyHy5M"
+   }
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "!npx localtunnel --port 8501"
+   ],
+   "metadata": {
+    "id": "inF7ceBmjyE3",
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "outputId": "559ce180-3f0f-4475-c9a9-46dc91389276"
+   },
+   "execution_count": 38,
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "\u001b[K\u001b[?25hnpx: installed 22 in 2.186s\n",
+      "your url is: https://nine-facts-act.loca.lt\n",
+      "^C\n"
+     ]
+    }
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "# Resources\n",
+    "\n",
+    "Blog: https://www.elastic.co/blog/implement-image-similarity-search-elastic\n",
+    "\n",
+    "GH  : https://github.com/radoondas/flask-elastic-image-search\n"
+   ],
+   "metadata": {
+    "id": "SbxbVzvQ7caR"
+   }
+  }
+ ]
+}
\ No newline at end of file
diff --git a/notebooks/search/04-image-similarity.ipynb b/notebooks/search/04-image-similarity.ipynb
deleted file mode 100644
index b5069481..00000000
--- a/notebooks/search/04-image-similarity.ipynb
+++ /dev/null
@@ -1,1264 +0,0 @@
-{
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "provenance": []
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    },
-    "language_info": {
-      "name": "python"
-    }
-  },
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "source": [
-        "# How to implement Image search using Elasticsearch"
-      ],
-      "metadata": {
-        "id": "CepGq3Kvtdxi"
-      }
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "The workbook shows how to implement an Image search using Elasticsearch. You will index documents with image embeddings (generated or pre-generated) and then using NLP model be able to search using natural language description of the image.\n",
-        "\n",
-        "### Prerequisities\n",
-        "Before you start make sure you have Elasticsearch cluster running. The cluster must have at least one machine learning (ML) node with enough (4GB) memory."
-      ],
-      "metadata": {
-        "id": "oMu1SW_TQQrU"
-      }
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "### Install Python requirements\n",
-        "Before you start you need to install all required Python dependencies."
-      ],
-      "metadata": {
-        "id": "VFcdr8IDQE_H"
-      }
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 16,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "6WosfR55npKU",
-        "outputId": "033767ff-0eef-48cc-c9e7-efbf73c9cb67"
-      },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Requirement already satisfied: sentence-transformers in /usr/local/lib/python3.10/dist-packages (2.2.2)\n",
-            "Requirement already satisfied: eland in /usr/local/lib/python3.10/dist-packages (8.7.0)\n",
-            "Requirement already satisfied: elasticsearch in /usr/local/lib/python3.10/dist-packages (8.9.0)\n",
-            "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.31.0)\n",
-            "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (2.0.1+cu118)\n",
-            "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (4.65.0)\n",
-            "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (9.4.0)\n",
-            "Requirement already satisfied: streamlit in /usr/local/lib/python3.10/dist-packages (1.25.0)\n",
-            "Requirement already satisfied: torchvision in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (0.15.2+cu118)\n",
-            "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (1.22.4)\n",
-            "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (1.2.2)\n",
-            "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (1.10.1)\n",
-            "Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (3.8.1)\n",
-            "Requirement already satisfied: sentencepiece in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (0.1.99)\n",
-            "Requirement already satisfied: huggingface-hub>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (0.16.4)\n",
-            "Requirement already satisfied: pandas>=1.5 in /usr/local/lib/python3.10/dist-packages (from eland) (1.5.3)\n",
-            "Requirement already satisfied: matplotlib>=3.6 in /usr/local/lib/python3.10/dist-packages (from eland) (3.7.1)\n",
-            "Requirement already satisfied: elastic-transport<9,>=8 in /usr/local/lib/python3.10/dist-packages (from elasticsearch) (8.4.0)\n",
-            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.12.2)\n",
-            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (23.1)\n",
-            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n",
-            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2022.10.31)\n",
-            "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.27.1)\n",
-            "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.13.3)\n",
-            "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.3.1)\n",
-            "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch) (4.7.1)\n",
-            "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch) (1.11.1)\n",
-            "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch) (3.1)\n",
-            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch) (3.1.2)\n",
-            "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch) (2.0.0)\n",
-            "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch) (3.25.2)\n",
-            "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch) (16.0.6)\n",
-            "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.10/dist-packages (from streamlit) (4.2.2)\n",
-            "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/lib/python3/dist-packages (from streamlit) (1.4)\n",
-            "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.10/dist-packages (from streamlit) (5.3.1)\n",
-            "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.10/dist-packages (from streamlit) (8.1.6)\n",
-            "Requirement already satisfied: importlib-metadata<7,>=1.4 in /usr/lib/python3/dist-packages (from streamlit) (4.6.4)\n",
-            "Requirement already satisfied: protobuf<5,>=3.20 in /usr/local/lib/python3.10/dist-packages (from streamlit) (3.20.3)\n",
-            "Requirement already satisfied: pyarrow>=6.0 in /usr/local/lib/python3.10/dist-packages (from streamlit) (9.0.0)\n",
-            "Requirement already satisfied: pympler<2,>=0.9 in /usr/local/lib/python3.10/dist-packages (from streamlit) (1.0.1)\n",
-            "Requirement already satisfied: python-dateutil<3,>=2.7.3 in /usr/local/lib/python3.10/dist-packages (from streamlit) (2.8.2)\n",
-            "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.10/dist-packages (from streamlit) (13.4.2)\n",
-            "Requirement already satisfied: tenacity<9,>=8.1.0 in /usr/local/lib/python3.10/dist-packages (from streamlit) (8.2.2)\n",
-            "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.10/dist-packages (from streamlit) (0.10.2)\n",
-            "Requirement already satisfied: tzlocal<5,>=1.1 in /usr/local/lib/python3.10/dist-packages (from streamlit) (4.3.1)\n",
-            "Requirement already satisfied: validators<1,>=0.2 in /usr/local/lib/python3.10/dist-packages (from streamlit) (0.20.0)\n",
-            "Requirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.10/dist-packages (from streamlit) (3.1.32)\n",
-            "Requirement already satisfied: pydeck<1,>=0.8 in /usr/local/lib/python3.10/dist-packages (from streamlit) (0.8.0)\n",
-            "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.10/dist-packages (from streamlit) (6.3.1)\n",
-            "Requirement already satisfied: watchdog>=2.1.5 in /usr/local/lib/python3.10/dist-packages (from streamlit) (3.0.0)\n",
-            "Requirement already satisfied: entrypoints in /usr/local/lib/python3.10/dist-packages (from altair<6,>=4.0->streamlit) (0.4)\n",
-            "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.10/dist-packages (from altair<6,>=4.0->streamlit) (4.3.3)\n",
-            "Requirement already satisfied: toolz in /usr/local/lib/python3.10/dist-packages (from altair<6,>=4.0->streamlit) (0.12.0)\n",
-            "Requirement already satisfied: urllib3<2,>=1.26.2 in /usr/local/lib/python3.10/dist-packages (from elastic-transport<9,>=8->elasticsearch) (1.26.16)\n",
-            "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from elastic-transport<9,>=8->elasticsearch) (2023.7.22)\n",
-            "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.10/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit) (4.0.10)\n",
-            "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence-transformers) (2023.6.0)\n",
-            "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.6->eland) (1.1.0)\n",
-            "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.6->eland) (0.11.0)\n",
-            "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.6->eland) (4.41.1)\n",
-            "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.6->eland) (1.4.4)\n",
-            "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.6->eland) (3.1.0)\n",
-            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.5->eland) (2022.7.1)\n",
-            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch) (2.1.3)\n",
-            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil<3,>=2.7.3->streamlit) (1.16.0)\n",
-            "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.12)\n",
-            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.4)\n",
-            "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich<14,>=10.14.0->streamlit) (3.0.0)\n",
-            "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich<14,>=10.14.0->streamlit) (2.14.0)\n",
-            "Requirement already satisfied: pytz-deprecation-shim in /usr/local/lib/python3.10/dist-packages (from tzlocal<5,>=1.1->streamlit) (0.1.0.post0)\n",
-            "Requirement already satisfied: decorator>=3.4.0 in /usr/local/lib/python3.10/dist-packages (from validators<1,>=0.2->streamlit) (4.4.2)\n",
-            "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk->sentence-transformers) (1.3.1)\n",
-            "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->sentence-transformers) (3.2.0)\n",
-            "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch) (1.3.0)\n",
-            "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.10/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit) (5.0.0)\n",
-            "Requirement already satisfied: attrs>=17.4.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit) (23.1.0)\n",
-            "Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit) (0.19.3)\n",
-            "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit) (0.1.2)\n",
-            "Requirement already satisfied: tzdata in /usr/local/lib/python3.10/dist-packages (from pytz-deprecation-shim->tzlocal<5,>=1.1->streamlit) (2023.3)\n"
-          ]
-        }
-      ],
-      "source": [
-        "!pip install sentence-transformers eland elasticsearch transformers torch tqdm Pillow streamlit"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "### Upload NLP model for querying\n",
-        "Using an `eland` library you will import NLP CLIP model. The model will tran\n",
-        "sfer your search query into vector which will be used for the search over the set of images stored in Elasticsearch.\n",
-        "\n",
-        "The model used is [clip-ViT-B-32-multilingual-v1](https://huggingface.co/sentence-transformers/clip-ViT-B-32-multilingual-v1) because the image embeddings are also generated by the CLIP model.\n",
-        "\n",
-        "How to get cloud id? Go to [ESS cloud](https://cloud.elastic.co/logout?redirectTo=%2Fhome&reason=unauthorised) and `On the deployment overview page, copy down the Cloud ID.`\n",
-        "\n",
-        "The authentication is using api key (`--es-api-key`). Learn how to generate [API key](https://www.elastic.co/guide/en/kibana/current/api-keys.html#create-api-key).\n",
-        "```\n",
-        "$ eland_import_hub_model --cloud-id $CLOUD_ID  \\\n",
-        "  --hub-model-id sentence-transformers/clip-ViT-B-32-multilingual-v1 \\\n",
-        "  --task-type text_embedding --es-api-key $API_KEY --start\n",
-        "```"
-      ],
-      "metadata": {
-        "id": "eIV5lAnVt9L7"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "API_KEY=''\n",
-        "CLOUD_ID=''\n",
-        "!eland_import_hub_model --cloud-id $CLOUD_ID --hub-model-id sentence-transformers/clip-ViT-B-32-multilingual-v1 --task-type text_embedding --es-api-key API_KEY --start"
-      ],
-      "metadata": {
-        "id": "tVhL9jBnuAAQ"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "### Import Python libraries"
-      ],
-      "metadata": {
-        "id": "hVxWnFflUCZv"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "from elasticsearch import Elasticsearch\n",
-        "from elasticsearch.helpers import parallel_bulk\n",
-        "import requests\n",
-        "import os\n",
-        "import sys\n",
-        "# import shutil\n",
-        "import zipfile\n",
-        "from tqdm.auto import tqdm\n",
-        "import pandas as pd\n",
-        "from PIL import Image\n",
-        "from sentence_transformers import SentenceTransformer\n",
-        "import urllib.request\n",
-        "# import urllib.error\n",
-        "import json\n",
-        "from getpass import getpass"
-      ],
-      "metadata": {
-        "id": "I0pRCbYMuMVn"
-      },
-      "execution_count": 17,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "### Connect to Elasticsearch cluster\n",
-        "Use your own cluster details `ELASTIC_CLOUD_ID`, `API_KEY`."
-      ],
-      "metadata": {
-        "id": "Klv3rywdUJBN"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# ESS Cloud connection definition using an API_KEY\n",
-        "ELASTIC_CLOUD_ID = getpass(\"Elastic Cloud ID: \")\n",
-        "API_KEY = getpass(\"Elastic API key: \")\n",
-        "\n",
-        "# ELASTIC_CLOUD_USER = \"elastic\"\n",
-        "# CLOUD_PASSWORD = getpass(\"Elastic Password\")\n",
-        "\n",
-        "es = Elasticsearch(\n",
-        "  cloud_id=ELASTIC_CLOUD_ID,\n",
-        "  #basic_auth=(ELASTIC_CLOUD_USER, ELASTIC_CLOUD_PASSWORD),\n",
-        "  api_key=API_KEY,\n",
-        "  request_timeout=600\n",
-        ")\n",
-        "\n",
-        "es.info() # should return cluster info"
-      ],
-      "metadata": {
-        "id": "YwN8RmFY3FQI",
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "outputId": "d0d0e31e-2ad2-46fe-ef8c-8c8bce7e1c48"
-      },
-      "execution_count": 19,
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Elastic Cloud ID: ··········\n",
-            "Elastic API key: ··········\n"
-          ]
-        },
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "ObjectApiResponse({'name': 'instance-0000000000', 'cluster_name': 'a597bbe1e0d047c494e7d4015f67ef37', 'cluster_uuid': 'EnT0vwwSSZeAahPw3Vhsuw', 'version': {'number': '8.8.2', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '98e1271edf932a480e4262a471281f1ee295ce6b', 'build_date': '2023-06-26T05:16:16.196344851Z', 'build_snapshot': False, 'lucene_version': '9.6.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'})"
-            ]
-          },
-          "metadata": {},
-          "execution_count": 19
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "### Create Index and mappings for Images\n",
-        "Befor you can index documents into Elasticsearch, you need to create an Index with correct mappings."
-      ],
-      "metadata": {
-        "id": "IW-GIlH2OxB4"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# Destination Index name\n",
-        "INDEX_NAME=\"images\"\n",
-        "# If you want to delete previous version of the Index\n",
-        "DELETE_INDEX=False\n",
-        "\n",
-        "INDEX_MAPPING = {\n",
-        "    \"properties\": {\n",
-        "      \"image_embedding\": {\n",
-        "        \"type\": \"dense_vector\",\n",
-        "        \"dims\": 512,\n",
-        "        \"index\": True,\n",
-        "        \"similarity\": \"cosine\"\n",
-        "      },\n",
-        "      \"photo_id\": {\n",
-        "        \"type\": \"keyword\"\n",
-        "      },\n",
-        "      \"photo_image_url\": {\n",
-        "        \"type\": \"keyword\"\n",
-        "      },\n",
-        "      \"ai_description\": {\n",
-        "        \"type\": \"text\"\n",
-        "      },\n",
-        "      \"photo_description\": {\n",
-        "        \"type\": \"text\"\n",
-        "      },\n",
-        "      \"photo_url\": {\n",
-        "        \"type\": \"keyword\"\n",
-        "      },\n",
-        "      \"photographer_first_name\": {\n",
-        "        \"type\": \"keyword\"\n",
-        "      },\n",
-        "      \"photographer_last_name\": {\n",
-        "        \"type\": \"keyword\"\n",
-        "      },\n",
-        "      \"photographer_username\": {\n",
-        "        \"type\": \"keyword\"\n",
-        "      },\n",
-        "      \"exif_camera_make\": {\n",
-        "        \"type\": \"keyword\"\n",
-        "      },\n",
-        "      \"exif_camera_model\": {\n",
-        "        \"type\": \"keyword\"\n",
-        "      },\n",
-        "      \"exif_iso\": {\n",
-        "        \"type\": \"integer\"\n",
-        "      }\n",
-        "    }\n",
-        "  }\n",
-        "\n",
-        "# Index settings\n",
-        "INDEX_SETTINGS = {\n",
-        "    \"index\": {\n",
-        "      \"number_of_replicas\": \"1\",\n",
-        "      \"number_of_shards\": \"1\",\n",
-        "      \"refresh_interval\": \"5s\"\n",
-        "    }\n",
-        "}\n",
-        "\n",
-        "if(DELETE_INDEX):\n",
-        "  if es.indices.exists(index=INDEX_NAME):\n",
-        "    print(\"Deleting existing %s\" % INDEX_NAME)\n",
-        "    es.indices.delete(index=INDEX_NAME, ignore=[400, 404])\n",
-        "\n",
-        "if not es.indices.exists(index=INDEX_NAME):\n",
-        "  print(\"Creating index %s\" % INDEX_NAME)\n",
-        "  es.indices.create(index=INDEX_NAME, mappings=INDEX_MAPPING, settings=INDEX_SETTINGS,\n",
-        "                  ignore=[400, 404])\n"
-      ],
-      "metadata": {
-        "id": "xAkc1OVcOxy3"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "### Get image dataset and embeddings\n",
-        "Download:\n",
-        "- The example image dataset is from [Unsplash](https://github.com/unsplash/datasets)\n",
-        "- The [Image embeddings](https://github.com/radoondas/flask-elastic-nlp/blob/main/embeddings/blogs/blogs-no-embeddings.json.zip) are pre-generated using CLIP model\n",
-        "\n",
-        "Then unzip both files."
-      ],
-      "metadata": {
-        "id": "NKE-j0kPUMn_"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "!wget https://unsplash.com/data/lite/1.2.0 -O data/unsplash-research-dataset-lite-1.2.0.zip\n",
-        "!wget https://raw.githubusercontent.com/radoondas/flask-elastic-nlp/main/embeddings/images/image-embeddings.json.zip -P data"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "zFGaPDRR5mqT",
-        "outputId": "0114cdd6-a714-41ab-9b46-3013bd36698a"
-      },
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "--2023-07-25 14:28:32--  https://unsplash.com/data/lite/1.2.0\n",
-            "Resolving unsplash.com (unsplash.com)... 151.101.65.181, 151.101.1.181, 151.101.129.181, ...\n",
-            "Connecting to unsplash.com (unsplash.com)|151.101.65.181|:443... connected.\n",
-            "HTTP request sent, awaiting response... 302 Found\n",
-            "Location: https://unsplash-datasets.s3.amazonaws.com/lite/1.2.0/unsplash-research-dataset-lite-1.2.0.zip [following]\n",
-            "--2023-07-25 14:28:32--  https://unsplash-datasets.s3.amazonaws.com/lite/1.2.0/unsplash-research-dataset-lite-1.2.0.zip\n",
-            "Resolving unsplash-datasets.s3.amazonaws.com (unsplash-datasets.s3.amazonaws.com)... 52.217.102.84, 3.5.25.253, 52.217.96.188, ...\n",
-            "Connecting to unsplash-datasets.s3.amazonaws.com (unsplash-datasets.s3.amazonaws.com)|52.217.102.84|:443... connected.\n",
-            "HTTP request sent, awaiting response... 200 OK\n",
-            "Length: 632351052 (603M) [application/zip]\n",
-            "Saving to: ‘data/unsplash-research-dataset-lite-1.2.0.zip’\n",
-            "\n",
-            "data/unsplash-resea 100%[===================>] 603.06M  14.1MB/s    in 42s     \n",
-            "\n",
-            "2023-07-25 14:29:16 (14.2 MB/s) - ‘data/unsplash-research-dataset-lite-1.2.0.zip’ saved [632351052/632351052]\n",
-            "\n",
-            "--2023-07-25 14:29:16--  https://raw.githubusercontent.com/radoondas/flask-elastic-nlp/main/embeddings/images/image-embeddings.json.zip\n",
-            "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n",
-            "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n",
-            "HTTP request sent, awaiting response... 200 OK\n",
-            "Length: 94256217 (90M) [application/zip]\n",
-            "Saving to: ‘data/image-embeddings.json.zip.1’\n",
-            "\n",
-            "image-embeddings.js 100%[===================>]  89.89M   164MB/s    in 0.5s    \n",
-            "\n",
-            "2023-07-25 14:29:16 (164 MB/s) - ‘data/image-embeddings.json.zip.1’ saved [94256217/94256217]\n",
-            "\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# Unzip downloaded files\n",
-        "UNSPLASH_ZIP_FILE=\"data/unsplash-research-dataset-lite-1.2.0.zip\"\n",
-        "EMBEDDINGS_ZIP_FILE=\"data/image-embeddings.json.zip\"\n",
-        "\n",
-        "with zipfile.ZipFile(UNSPLASH_ZIP_FILE, 'r') as zip_ref:\n",
-        "  print('Extracting file ', UNSPLASH_ZIP_FILE, '.')\n",
-        "  zip_ref.extractall('data/unsplash/')\n",
-        "\n",
-        "with zipfile.ZipFile(EMBEDDINGS_ZIP_FILE, 'r') as zip_ref:\n",
-        "  print('Extracting file ', EMBEDDINGS_ZIP_FILE, '.')\n",
-        "  zip_ref.extractall(\"data/embeddings/\")"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "MBh4AQ8i7C0-",
-        "outputId": "17a50b7f-f052-4b72-daa8-0e8fc630326f"
-      },
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Extracting file  data/unsplash-research-dataset-lite-1.2.0.zip .\n",
-            "Extracting file  data/image-embeddings.json.zip .\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "# Image embeddings\n",
-        "We have now 2 options  how to proceed.\n",
-        "1. Import all pregenerated image embeddings (~19k). This is faster option with a lot of images available in a short time.\n",
-        "2. Import a small subset of randomly choosen images to see the process of generating of image embeddings using external Clip model."
-      ],
-      "metadata": {
-        "id": "p6H7QYctQQA7"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# define helper function\n",
-        "def gen_rows(df):\n",
-        "  for doc in df.to_dict(orient='records'):\n",
-        "    yield doc"
-      ],
-      "metadata": {
-        "id": "03YvC-_JY9OE"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## 1. Import all pregenerated image embeddings\n",
-        "This option lets you inport ~19k documents woth pregenenerated image embeddings with metadata.\n",
-        "\n",
-        "The process downloads files with images information, merge them and index into Elasticsearch."
-      ],
-      "metadata": {
-        "id": "qhZRdUyAQd-s"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "df_unsplash = pd.read_csv('data/unsplash/' + 'photos.tsv000', sep='\\t', header=0)\n",
-        "\n",
-        "# follwing 8 lines are fix for inconsistent/incorrect data\n",
-        "df_unsplash['photo_description'].fillna('', inplace=True)\n",
-        "df_unsplash['ai_description'].fillna('', inplace=True)\n",
-        "df_unsplash['photographer_first_name'].fillna('', inplace=True)\n",
-        "df_unsplash['photographer_last_name'].fillna('', inplace=True)\n",
-        "df_unsplash['photographer_username'].fillna('', inplace=True)\n",
-        "df_unsplash['exif_camera_make'].fillna('', inplace=True)\n",
-        "df_unsplash['exif_camera_model'].fillna('', inplace=True)\n",
-        "df_unsplash['exif_iso'].fillna(0, inplace=True)\n",
-        "## end of fix\n",
-        "\n",
-        "# read subset of columns from the original/downloaded dataset\n",
-        "df_unsplash_subset = df_unsplash[\n",
-        "        ['photo_id', 'photo_url', 'photo_image_url', 'photo_description', 'ai_description', 'photographer_first_name',\n",
-        "         'photographer_last_name', 'photographer_username', 'exif_camera_make', 'exif_camera_model', 'exif_iso']]\n",
-        "\n",
-        "# read all pregenerated embeddings\n",
-        "df_embeddings = pd.read_json('data/embeddings/' + 'image-embeddings.json', lines=True)\n",
-        "\n",
-        "df_merged = pd.merge(df_unsplash_subset, df_embeddings,\n",
-        "                         on='photo_id',\n",
-        "                         how='inner')\n",
-        "\n",
-        "count = 0\n",
-        "for success, info in parallel_bulk(\n",
-        "        client=es,\n",
-        "        actions=gen_rows(df_merged),\n",
-        "        thread_count=5,\n",
-        "        chunk_size=1000,\n",
-        "        index=INDEX_NAME\n",
-        "):\n",
-        "  if success:\n",
-        "    count += 1\n",
-        "    if count % 1000 == 0:\n",
-        "      print('Indexed %s documents' % str(count), flush=True)\n",
-        "      sys.stdout.flush()\n",
-        "  else:\n",
-        "    print('Doc failed', info)\n",
-        "\n",
-        "print('Indexed %s image embeddings documents' % str(count), flush=True)\n",
-        "sys.stdout.flush()"
-      ],
-      "metadata": {
-        "id": "32xrbSUXTODQ"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## 2. Import small number of random\n",
-        "In this part you will randomly choose small number of images and then generate image embeddings. The script will then index documents into Elasticserach."
-      ],
-      "metadata": {
-        "id": "xypgh4lFQmkc"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "NUMBER_OF_IMAGES=20\n",
-        "INDEX_NAME=\"images-test\"\n",
-        "\n",
-        "df_unsplash = pd.read_csv('data/unsplash/' + 'photos.tsv000', sep='\\t', header=0)\n",
-        "\n",
-        "## stat fix\n",
-        "# follwing 8 lines are fix for inconsistent/incorrect data\n",
-        "df_unsplash['photo_description'].fillna('', inplace=True)\n",
-        "df_unsplash['ai_description'].fillna('', inplace=True)\n",
-        "df_unsplash['photographer_first_name'].fillna('', inplace=True)\n",
-        "df_unsplash['photographer_last_name'].fillna('', inplace=True)\n",
-        "df_unsplash['photographer_username'].fillna('', inplace=True)\n",
-        "df_unsplash['exif_camera_make'].fillna('', inplace=True)\n",
-        "df_unsplash['exif_camera_model'].fillna('', inplace=True)\n",
-        "df_unsplash['exif_iso'].fillna(0, inplace=True)\n",
-        "## end of fix\n",
-        "\n",
-        "df_unsplash_subset = df_unsplash[\n",
-        "        ['photo_id', 'photo_url', 'photo_image_url', 'photo_description', 'ai_description', 'photographer_first_name',\n",
-        "         'photographer_last_name', 'photographer_username', 'exif_camera_make', 'exif_camera_model', 'exif_iso']]\n",
-        "\n",
-        "df_random_subset = df_unsplash_subset.sample(n=NUMBER_OF_IMAGES, replace=False)\n",
-        "df_random_subset = df_random_subset.reset_index()\n",
-        "\n",
-        "# Load model CLIP\n",
-        "img_model = SentenceTransformer('clip-ViT-B-32')\n",
-        "\n",
-        "# new list of image documents for indexing into ES\n",
-        "lst = []\n",
-        "if not os.path.exists(\"data/images\"):\n",
-        "  os.mkdir(\"data/images\")\n",
-        "\n",
-        "for index, row in df_random_subset.iterrows():\n",
-        "  #open image from url\n",
-        "  img_path = \"data/images/\" + row['photo_id']\n",
-        "  try:\n",
-        "    urllib.request.urlretrieve(row['photo_image_url'], img_path)\n",
-        "    print(row['photo_id'] + \" \" + row['photo_url'])\n",
-        "  except urllib.error.HTTPError as err:\n",
-        "        if err.code == 404:\n",
-        "            print('404 error: Image not found at {}'.format(row['photo_image_url']))\n",
-        "        else:\n",
-        "            raise\n",
-        "\n",
-        "  img = Image.open(img_path)\n",
-        "  # create doc\n",
-        "  doc = {}\n",
-        "  embedding = img_model.encode(img)\n",
-        "  doc['photo_id'] = row['photo_id']\n",
-        "  doc['image_embedding'] = embedding.tolist()\n",
-        "  lst.append(doc)\n",
-        "  # print(doc)\n",
-        "\n",
-        "  # Image cleanup.\n",
-        "  # If file exists, delete it.\n",
-        "  if os.path.exists(img_path):\n",
-        "    os.remove(img_path)\n",
-        "\n",
-        "# read all pregenerated embeddings\n",
-        "df_embeddings = pd.read_json('data/embeddings/' + 'image-embeddings.json', lines=True)\n",
-        "\n",
-        "df_merged = pd.merge(df_random_subset, pd.DataFrame(lst),\n",
-        "                         on='photo_id',\n",
-        "                         how='inner')\n",
-        "# print(df_merged)\n",
-        "\n",
-        "count = 0\n",
-        "for success, info in parallel_bulk(\n",
-        "        client=es,\n",
-        "        actions=gen_rows(df_merged),\n",
-        "        thread_count=5,\n",
-        "        chunk_size=10,\n",
-        "        index=INDEX_NAME\n",
-        "):\n",
-        "  if success:\n",
-        "    count += 1\n",
-        "    if count % 10 == 0:\n",
-        "      print('Indexed %s documents' % str(count), flush=True)\n",
-        "      sys.stdout.flush()\n",
-        "  else:\n",
-        "    print('Doc failed', info)\n",
-        "\n",
-        "print('Indexed %s image embeddings documents' % str(count), flush=True)\n",
-        "sys.stdout.flush()"
-      ],
-      "metadata": {
-        "id": "r_txQjP2RKnr"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "### Query the image dataset\n",
-        "The next step is to run a query to search for images. The example query searches for `\"model_text\": \"Valentine day flowers\"` using the model we uploaded to Elasticsearch `sentence-transformers__clip-vit-b-32-multilingual-v1`.\n",
-        "\n",
-        "The process is one query even it internaly consists of two tasks. One is to tramsform your search text into a vector using the NLP model and the second task is to run the vector search over the image dataset.\n",
-        "```\n",
-        "POST images/_search\n",
-        "{\n",
-        "  \"knn\": {\n",
-        "    \"field\": \"image_embedding\",\n",
-        "    \"k\": 5,\n",
-        "    \"num_candidates\": 10,\n",
-        "    \"query_vector_builder\": {\n",
-        "      \"text_embedding\": {\n",
-        "        \"model_id\": \"sentence-transformers__clip-vit-b-32-multilingual-v1\",\n",
-        "        \"model_text\": \"Valentine day flowers\"\n",
-        "      }\n",
-        "    }\n",
-        "  },\n",
-        "  \"fields\": [\n",
-        "    \"photo_description\",\n",
-        "    \"ai_description\",\n",
-        "    \"photo_url\"\n",
-        "  ],\n",
-        "  \"_source\": false\n",
-        "}\n",
-        "```\n",
-        "\n"
-      ],
-      "metadata": {
-        "id": "-_i2CIpSz9vw"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# Search queary\n",
-        "WHAT_ARE_YOU_LOOKING_FOR=\"Valentine day flowers\"\n",
-        "INDEX_IM_EMBED=\"images\"\n",
-        "\n",
-        "source_fields = [\"photo_description\", \"ai_description\", \"photo_url\", \"photo_image_url\", \"photographer_first_name\",\n",
-        "                     \"photographer_username\", \"photographer_last_name\", \"photo_id\"]\n",
-        "query = {\n",
-        "  \"field\": \"image_embedding\",\n",
-        "  \"k\": 5,\n",
-        "  \"num_candidates\": 100,\n",
-        "  \"query_vector_builder\": {\n",
-        "    \"text_embedding\": {\n",
-        "      \"model_id\": \"sentence-transformers__clip-vit-b-32-multilingual-v1\",\n",
-        "      \"model_text\": WHAT_ARE_YOU_LOOKING_FOR\n",
-        "    }\n",
-        "  }\n",
-        "}\n",
-        "\n",
-        "response = es.search(\n",
-        "    index=INDEX_IM_EMBED,\n",
-        "    fields=source_fields,\n",
-        "    knn=query, source=False)\n",
-        "\n",
-        "print(response.body)\n",
-        "\n",
-        "# the code writes the response into a file for the streamlit UI used in the optional step.\n",
-        "with open('json_data.json', 'w') as outfile:\n",
-        "  json.dump(response.body['hits']['hits'], outfile)\n",
-        "\n",
-        "# Use the `loads()` method to load the JSON data\n",
-        "dfr = json.loads(json.dumps(response.body['hits']['hits']))\n",
-        "# Pass the generated JSON data into a pandas dataframe\n",
-        "dfr = pd.DataFrame(dfr)\n",
-        "# Print the data frame\n",
-        "dfr\n",
-        "\n",
-        "results = pd.json_normalize(json.loads(json.dumps(response.body['hits']['hits'])))\n",
-        "# results\n",
-        "results[['_id', '_score', 'fields.photo_id', 'fields.photo_image_url',\n",
-        "         'fields.photo_description', 'fields.photographer_first_name',\n",
-        "         'fields.photographer_last_name', 'fields.ai_description',\n",
-        "         'fields.photo_url']]"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 375
-        },
-        "id": "wdicpvRlzmXG",
-        "outputId": "00550041-0aed-4f51-ccd3-18eb705ff7ed"
-      },
-      "execution_count": 35,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "{'took': 114, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 5, 'relation': 'eq'}, 'max_score': 0.646751, 'hits': [{'_index': 'images', '_id': 'nK5Fh4kBLg4Kd5ySLbKC', '_score': 0.646751, 'fields': {'photo_image_url': ['https://images.unsplash.com/photo-1587404787163-d03a28fcc1f0'], 'photo_id': ['gQFZxLe3m4g'], 'photographer_first_name': ['Vadim'], 'photo_description': ['instagram.com/vadimsadovski'], 'photographer_last_name': ['Sadovski'], 'photo_url': ['https://unsplash.com/photos/gQFZxLe3m4g'], 'photographer_username': ['vadimsadovski'], 'ai_description': ['']}}, {'_index': 'images', '_id': 'Xa5Eh4kBLg4Kd5yS84Qf', '_score': 0.64675057, 'fields': {'photo_image_url': ['https://images.unsplash.com/photo-1574259148543-dd376a61339f'], 'photo_id': ['g5Mhx29yp-A'], 'photographer_first_name': ['Erin'], 'photo_description': ['Cute but grumpy cat in the Austrian mountains'], 'photographer_last_name': ['East'], 'photo_url': ['https://unsplash.com/photos/g5Mhx29yp-A'], 'photographer_username': ['mserineast'], 'ai_description': ['brown Persian cat on white bench']}}, {'_index': 'images', '_id': '265Eh4kBLg4Kd5yS84Uf', '_score': 0.64244866, 'fields': {'photo_image_url': ['https://images.unsplash.com/photo-1565772101068-dec21f7b36eb'], 'photo_id': ['9KZ0PGNCxNE'], 'photographer_first_name': ['Perchek'], 'photo_description': [''], 'photographer_last_name': ['Industrie'], 'photo_url': ['https://unsplash.com/photos/9KZ0PGNCxNE'], 'photographer_username': ['perchek_industrie'], 'ai_description': ['siamese cat']}}, {'_index': 'images', '_id': 'xq5Fh4kBLg4Kd5ySEpuC', '_score': 0.64216036, 'fields': {'photo_image_url': ['https://images.unsplash.com/photo-1589994205353-325f40210d02'], 'photo_id': ['VOZQkkKXvY4'], 'photographer_first_name': ['Andrey'], 'photo_description': [''], 'photographer_last_name': ['Svistunov'], 'photo_url': ['https://unsplash.com/photos/VOZQkkKXvY4'], 'photographer_username': ['svistal13'], 'ai_description': ['orange tabby cat on ground covered with snow during daytime']}}, {'_index': 'images', '_id': 'WK5Eh4kBLg4Kd5yS5XcD', '_score': 0.64185303, 'fields': {'photo_image_url': ['https://images.unsplash.com/photo-1556820161-b605d166fce1'], 'photo_id': ['wmz8y6L6c_k'], 'photographer_first_name': ['Phillip'], 'photo_description': [''], 'photographer_last_name': ['Suitcases'], 'photo_url': ['https://unsplash.com/photos/wmz8y6L6c_k'], 'photographer_username': ['nillait'], 'ai_description': ['brown and black kitten close-up photography']}}]}}\n"
-          ]
-        },
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "                    _id    _score fields.photo_id  \\\n",
-              "0  nK5Fh4kBLg4Kd5ySLbKC  0.646751   [gQFZxLe3m4g]   \n",
-              "1  Xa5Eh4kBLg4Kd5yS84Qf  0.646751   [g5Mhx29yp-A]   \n",
-              "2  265Eh4kBLg4Kd5yS84Uf  0.642449   [9KZ0PGNCxNE]   \n",
-              "3  xq5Fh4kBLg4Kd5ySEpuC  0.642160   [VOZQkkKXvY4]   \n",
-              "4  WK5Eh4kBLg4Kd5yS5XcD  0.641853   [wmz8y6L6c_k]   \n",
-              "\n",
-              "                              fields.photo_image_url  \\\n",
-              "0  [https://images.unsplash.com/photo-15874047871...   \n",
-              "1  [https://images.unsplash.com/photo-15742591485...   \n",
-              "2  [https://images.unsplash.com/photo-15657721010...   \n",
-              "3  [https://images.unsplash.com/photo-15899942053...   \n",
-              "4  [https://images.unsplash.com/photo-1556820161-...   \n",
-              "\n",
-              "                          fields.photo_description  \\\n",
-              "0                    [instagram.com/vadimsadovski]   \n",
-              "1  [Cute but grumpy cat in the Austrian mountains]   \n",
-              "2                                               []   \n",
-              "3                                               []   \n",
-              "4                                               []   \n",
-              "\n",
-              "  fields.photographer_first_name fields.photographer_last_name  \\\n",
-              "0                        [Vadim]                    [Sadovski]   \n",
-              "1                         [Erin]                        [East]   \n",
-              "2                      [Perchek]                   [Industrie]   \n",
-              "3                       [Andrey]                   [Svistunov]   \n",
-              "4                      [Phillip]                   [Suitcases]   \n",
-              "\n",
-              "                               fields.ai_description  \\\n",
-              "0                                                 []   \n",
-              "1                 [brown Persian cat on white bench]   \n",
-              "2                                      [siamese cat]   \n",
-              "3  [orange tabby cat on ground covered with snow ...   \n",
-              "4      [brown and black kitten close-up photography]   \n",
-              "\n",
-              "                            fields.photo_url  \n",
-              "0  [https://unsplash.com/photos/gQFZxLe3m4g]  \n",
-              "1  [https://unsplash.com/photos/g5Mhx29yp-A]  \n",
-              "2  [https://unsplash.com/photos/9KZ0PGNCxNE]  \n",
-              "3  [https://unsplash.com/photos/VOZQkkKXvY4]  \n",
-              "4  [https://unsplash.com/photos/wmz8y6L6c_k]  "
-            ],
-            "text/html": [
-              "\n",
-              "\n",
-              "  <div id=\"df-b6abcb85-4cf5-4089-9ed7-a06f75186b38\">\n",
-              "    <div class=\"colab-df-container\">\n",
-              "      <div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>_id</th>\n",
-              "      <th>_score</th>\n",
-              "      <th>fields.photo_id</th>\n",
-              "      <th>fields.photo_image_url</th>\n",
-              "      <th>fields.photo_description</th>\n",
-              "      <th>fields.photographer_first_name</th>\n",
-              "      <th>fields.photographer_last_name</th>\n",
-              "      <th>fields.ai_description</th>\n",
-              "      <th>fields.photo_url</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>0</th>\n",
-              "      <td>nK5Fh4kBLg4Kd5ySLbKC</td>\n",
-              "      <td>0.646751</td>\n",
-              "      <td>[gQFZxLe3m4g]</td>\n",
-              "      <td>[https://images.unsplash.com/photo-15874047871...</td>\n",
-              "      <td>[instagram.com/vadimsadovski]</td>\n",
-              "      <td>[Vadim]</td>\n",
-              "      <td>[Sadovski]</td>\n",
-              "      <td>[]</td>\n",
-              "      <td>[https://unsplash.com/photos/gQFZxLe3m4g]</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1</th>\n",
-              "      <td>Xa5Eh4kBLg4Kd5yS84Qf</td>\n",
-              "      <td>0.646751</td>\n",
-              "      <td>[g5Mhx29yp-A]</td>\n",
-              "      <td>[https://images.unsplash.com/photo-15742591485...</td>\n",
-              "      <td>[Cute but grumpy cat in the Austrian mountains]</td>\n",
-              "      <td>[Erin]</td>\n",
-              "      <td>[East]</td>\n",
-              "      <td>[brown Persian cat on white bench]</td>\n",
-              "      <td>[https://unsplash.com/photos/g5Mhx29yp-A]</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>2</th>\n",
-              "      <td>265Eh4kBLg4Kd5yS84Uf</td>\n",
-              "      <td>0.642449</td>\n",
-              "      <td>[9KZ0PGNCxNE]</td>\n",
-              "      <td>[https://images.unsplash.com/photo-15657721010...</td>\n",
-              "      <td>[]</td>\n",
-              "      <td>[Perchek]</td>\n",
-              "      <td>[Industrie]</td>\n",
-              "      <td>[siamese cat]</td>\n",
-              "      <td>[https://unsplash.com/photos/9KZ0PGNCxNE]</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>3</th>\n",
-              "      <td>xq5Fh4kBLg4Kd5ySEpuC</td>\n",
-              "      <td>0.642160</td>\n",
-              "      <td>[VOZQkkKXvY4]</td>\n",
-              "      <td>[https://images.unsplash.com/photo-15899942053...</td>\n",
-              "      <td>[]</td>\n",
-              "      <td>[Andrey]</td>\n",
-              "      <td>[Svistunov]</td>\n",
-              "      <td>[orange tabby cat on ground covered with snow ...</td>\n",
-              "      <td>[https://unsplash.com/photos/VOZQkkKXvY4]</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>4</th>\n",
-              "      <td>WK5Eh4kBLg4Kd5yS5XcD</td>\n",
-              "      <td>0.641853</td>\n",
-              "      <td>[wmz8y6L6c_k]</td>\n",
-              "      <td>[https://images.unsplash.com/photo-1556820161-...</td>\n",
-              "      <td>[]</td>\n",
-              "      <td>[Phillip]</td>\n",
-              "      <td>[Suitcases]</td>\n",
-              "      <td>[brown and black kitten close-up photography]</td>\n",
-              "      <td>[https://unsplash.com/photos/wmz8y6L6c_k]</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "</div>\n",
-              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-b6abcb85-4cf5-4089-9ed7-a06f75186b38')\"\n",
-              "              title=\"Convert this dataframe to an interactive table.\"\n",
-              "              style=\"display:none;\">\n",
-              "\n",
-              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
-              "       width=\"24px\">\n",
-              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
-              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
-              "  </svg>\n",
-              "      </button>\n",
-              "\n",
-              "\n",
-              "\n",
-              "    <div id=\"df-5fa64969-378b-4c22-8956-67dbb0649d64\">\n",
-              "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-5fa64969-378b-4c22-8956-67dbb0649d64')\"\n",
-              "              title=\"Suggest charts.\"\n",
-              "              style=\"display:none;\">\n",
-              "\n",
-              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
-              "     width=\"24px\">\n",
-              "    <g>\n",
-              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
-              "    </g>\n",
-              "</svg>\n",
-              "      </button>\n",
-              "    </div>\n",
-              "\n",
-              "<style>\n",
-              "  .colab-df-quickchart {\n",
-              "    background-color: #E8F0FE;\n",
-              "    border: none;\n",
-              "    border-radius: 50%;\n",
-              "    cursor: pointer;\n",
-              "    display: none;\n",
-              "    fill: #1967D2;\n",
-              "    height: 32px;\n",
-              "    padding: 0 0 0 0;\n",
-              "    width: 32px;\n",
-              "  }\n",
-              "\n",
-              "  .colab-df-quickchart:hover {\n",
-              "    background-color: #E2EBFA;\n",
-              "    box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
-              "    fill: #174EA6;\n",
-              "  }\n",
-              "\n",
-              "  [theme=dark] .colab-df-quickchart {\n",
-              "    background-color: #3B4455;\n",
-              "    fill: #D2E3FC;\n",
-              "  }\n",
-              "\n",
-              "  [theme=dark] .colab-df-quickchart:hover {\n",
-              "    background-color: #434B5C;\n",
-              "    box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
-              "    filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
-              "    fill: #FFFFFF;\n",
-              "  }\n",
-              "</style>\n",
-              "\n",
-              "    <script>\n",
-              "      async function quickchart(key) {\n",
-              "        const containerElement = document.querySelector('#' + key);\n",
-              "        const charts = await google.colab.kernel.invokeFunction(\n",
-              "            'suggestCharts', [key], {});\n",
-              "      }\n",
-              "    </script>\n",
-              "\n",
-              "      <script>\n",
-              "\n",
-              "function displayQuickchartButton(domScope) {\n",
-              "  let quickchartButtonEl =\n",
-              "    domScope.querySelector('#df-5fa64969-378b-4c22-8956-67dbb0649d64 button.colab-df-quickchart');\n",
-              "  quickchartButtonEl.style.display =\n",
-              "    google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
-              "}\n",
-              "\n",
-              "        displayQuickchartButton(document);\n",
-              "      </script>\n",
-              "      <style>\n",
-              "    .colab-df-container {\n",
-              "      display:flex;\n",
-              "      flex-wrap:wrap;\n",
-              "      gap: 12px;\n",
-              "    }\n",
-              "\n",
-              "    .colab-df-convert {\n",
-              "      background-color: #E8F0FE;\n",
-              "      border: none;\n",
-              "      border-radius: 50%;\n",
-              "      cursor: pointer;\n",
-              "      display: none;\n",
-              "      fill: #1967D2;\n",
-              "      height: 32px;\n",
-              "      padding: 0 0 0 0;\n",
-              "      width: 32px;\n",
-              "    }\n",
-              "\n",
-              "    .colab-df-convert:hover {\n",
-              "      background-color: #E2EBFA;\n",
-              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
-              "      fill: #174EA6;\n",
-              "    }\n",
-              "\n",
-              "    [theme=dark] .colab-df-convert {\n",
-              "      background-color: #3B4455;\n",
-              "      fill: #D2E3FC;\n",
-              "    }\n",
-              "\n",
-              "    [theme=dark] .colab-df-convert:hover {\n",
-              "      background-color: #434B5C;\n",
-              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
-              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
-              "      fill: #FFFFFF;\n",
-              "    }\n",
-              "  </style>\n",
-              "\n",
-              "      <script>\n",
-              "        const buttonEl =\n",
-              "          document.querySelector('#df-b6abcb85-4cf5-4089-9ed7-a06f75186b38 button.colab-df-convert');\n",
-              "        buttonEl.style.display =\n",
-              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
-              "\n",
-              "        async function convertToInteractive(key) {\n",
-              "          const element = document.querySelector('#df-b6abcb85-4cf5-4089-9ed7-a06f75186b38');\n",
-              "          const dataTable =\n",
-              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
-              "                                                     [key], {});\n",
-              "          if (!dataTable) return;\n",
-              "\n",
-              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
-              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
-              "            + ' to learn more about interactive tables.';\n",
-              "          element.innerHTML = '';\n",
-              "          dataTable['output_type'] = 'display_data';\n",
-              "          await google.colab.output.renderOutput(dataTable, element);\n",
-              "          const docLink = document.createElement('div');\n",
-              "          docLink.innerHTML = docLinkHtml;\n",
-              "          element.appendChild(docLink);\n",
-              "        }\n",
-              "      </script>\n",
-              "    </div>\n",
-              "  </div>\n"
-            ]
-          },
-          "metadata": {},
-          "execution_count": 35
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "# [Optional] Simple streamlit UI\n",
-        "In the following section, you will view the response in a simple UI for better visualisation.\n",
-        "\n",
-        "The query in the previous step did write down a file response `json_data.json` for the UI to load and visualise.\n",
-        "\n",
-        "Follow the steps below to see the results in a table."
-      ],
-      "metadata": {
-        "id": "Ry62sfHFHFi9"
-      }
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "### Install tunnel library"
-      ],
-      "metadata": {
-        "id": "iUAbRqr8II-x"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "!npm install localtunnel"
-      ],
-      "metadata": {
-        "id": "RGEmAt2DjtN7",
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "outputId": "f6c37d54-7e09-4e59-fc21-8a3db4fa840d"
-      },
-      "execution_count": 12,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "\u001b[K\u001b[?25h\u001b[37;40mnpm\u001b[0m \u001b[0m\u001b[30;43mWARN\u001b[0m \u001b[0m\u001b[35msaveError\u001b[0m ENOENT: no such file or directory, open '/content/package.json'\n",
-            "\u001b[0m\u001b[37;40mnpm\u001b[0m \u001b[0m\u001b[34;40mnotice\u001b[0m\u001b[35m\u001b[0m created a lockfile as package-lock.json. You should commit this file.\n",
-            "\u001b[0m\u001b[37;40mnpm\u001b[0m \u001b[0m\u001b[30;43mWARN\u001b[0m \u001b[0m\u001b[35menoent\u001b[0m ENOENT: no such file or directory, open '/content/package.json'\n",
-            "\u001b[0m\u001b[37;40mnpm\u001b[0m \u001b[0m\u001b[30;43mWARN\u001b[0m\u001b[35m\u001b[0m content No description\n",
-            "\u001b[0m\u001b[37;40mnpm\u001b[0m \u001b[0m\u001b[30;43mWARN\u001b[0m\u001b[35m\u001b[0m content No repository field.\n",
-            "\u001b[0m\u001b[37;40mnpm\u001b[0m \u001b[0m\u001b[30;43mWARN\u001b[0m\u001b[35m\u001b[0m content No README data\n",
-            "\u001b[0m\u001b[37;40mnpm\u001b[0m \u001b[0m\u001b[30;43mWARN\u001b[0m\u001b[35m\u001b[0m content No license field.\n",
-            "\u001b[0m\n",
-            "\u001b[K\u001b[?25h+ localtunnel@2.0.2\n",
-            "added 22 packages from 22 contributors and audited 22 packages in 5.903s\n",
-            "\n",
-            "3 packages are looking for funding\n",
-            "  run `npm fund` for details\n",
-            "\n",
-            "found \u001b[92m0\u001b[0m vulnerabilities\n",
-            "\n",
-            "\u001b[K\u001b[?25h"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "### Create application"
-      ],
-      "metadata": {
-        "id": "KUAfucnYITka"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "%%writefile app.py\n",
-        "\n",
-        "import streamlit as st\n",
-        "import json\n",
-        "import pandas as pd\n",
-        "\n",
-        "\n",
-        "def get_image_preview(image_url):\n",
-        "    \"\"\"Returns an HTML <img> tag with preview of the image.\"\"\"\n",
-        "    return f\"\"\"<img src=\"{image_url}\" width=\"400\" />\"\"\"\n",
-        "\n",
-        "\n",
-        "def get_url_link(photo_url):\n",
-        "    \"\"\"Returns an HTML <a> tag to the image page.\"\"\"\n",
-        "    return f\"\"\"<a href=\"{photo_url}\"  target=\"_blank\"> {photo_url} </a>\"\"\"\n",
-        "\n",
-        "\n",
-        "def main():\n",
-        "    \"\"\"Creates a Streamlit app with a table of images.\"\"\"\n",
-        "    data = json.load(open(\"json_data.json\"))\n",
-        "    table = []\n",
-        "    for image in data:\n",
-        "        image_url = image[\"fields\"][\"photo_image_url\"][0]\n",
-        "        image_preview = get_image_preview(image_url)\n",
-        "        photo_url = image[\"fields\"][\"photo_url\"][0]\n",
-        "        photo_url_link = get_url_link(photo_url)\n",
-        "        table.append([image_preview, image[\"fields\"][\"photo_id\"][0],\n",
-        "                      image[\"fields\"][\"photographer_first_name\"][0],\n",
-        "                      image[\"fields\"][\"photographer_last_name\"][0],\n",
-        "                      image[\"fields\"][\"photographer_username\"][0],\n",
-        "                      photo_url_link])\n",
-        "\n",
-        "    st.write(pd.DataFrame(table, columns=[\"Image\", \"ID\", \"First Name\", \"Last Name\",\n",
-        "                                          \"Photographer username\", \"Photo url\"]).to_html(escape = False),\n",
-        "             unsafe_allow_html=True)\n",
-        "\n",
-        "\n",
-        "if __name__ == \"__main__\":\n",
-        "    main()\n",
-        "\n"
-      ],
-      "metadata": {
-        "id": "9Wb7GOWMXFnF",
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "outputId": "6db23ef3-b25e-4f80-a3cb-6d08c1c78c16"
-      },
-      "execution_count": 36,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Overwriting app.py\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "### Run app\n",
-        "Run the application and check your IP for the tunneling"
-      ],
-      "metadata": {
-        "id": "CjDhvbGhHuiz"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "!streamlit run app.py &>/content/logs.txt & curl ipv4.icanhazip.com"
-      ],
-      "metadata": {
-        "id": "851CeYi8jvuF",
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "outputId": "46a64023-e990-4900-f482-5558237f08cc"
-      },
-      "execution_count": 37,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "34.138.156.22\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "### Create the tunnel\n",
-        "Run the tunnel and use the link below to connect to the tunnel.\n",
-        "\n",
-        "Use the IP from the previous step to connect to the application"
-      ],
-      "metadata": {
-        "id": "4OuSLFHyHy5M"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "!npx localtunnel --port 8501"
-      ],
-      "metadata": {
-        "id": "inF7ceBmjyE3",
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "outputId": "559ce180-3f0f-4475-c9a9-46dc91389276"
-      },
-      "execution_count": 38,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "\u001b[K\u001b[?25hnpx: installed 22 in 2.186s\n",
-            "your url is: https://nine-facts-act.loca.lt\n",
-            "^C\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "# Resources\n",
-        "\n",
-        "Blog: https://www.elastic.co/blog/implement-image-similarity-search-elastic\n",
-        "\n",
-        "GH  : https://github.com/radoondas/flask-elastic-image-search\n"
-      ],
-      "metadata": {
-        "id": "SbxbVzvQ7caR"
-      }
-    }
-  ]
-}
\ No newline at end of file

From 9817f9fe0faa230ea9ddb50588ac84e711b09f98 Mon Sep 17 00:00:00 2001
From: Miguel Grinberg <miguel.grinberg@gmail.com>
Date: Tue, 19 Mar 2024 14:56:32 +0000
Subject: [PATCH 3/3] fixed some typos and simplified the instructions

---
 bin/find-notebooks-to-test.sh           |    1 +
 notebooks/images/image-similarity.ipynb | 1113 ++++++-----------------
 2 files changed, 273 insertions(+), 841 deletions(-)

diff --git a/bin/find-notebooks-to-test.sh b/bin/find-notebooks-to-test.sh
index 76840866..7e9513ce 100755
--- a/bin/find-notebooks-to-test.sh
+++ b/bin/find-notebooks-to-test.sh
@@ -4,6 +4,7 @@ EXEMPT_NOTEBOOKS=(
     "notebooks/esql/esql-getting-started.ipynb"
     "notebooks/search/07-inference.ipynb"
     "notebooks/search/08-learning-to-rank.ipynb"
+    "notebooks/images/image-similarity.ipynb"
     "notebooks/langchain/langchain-vector-store.ipynb"
     "notebooks/langchain/self-query-retriever-examples/chatbot-example.ipynb"
     "notebooks/langchain/self-query-retriever-examples/chatbot-with-bm25-only-example.ipynb"
diff --git a/notebooks/images/image-similarity.ipynb b/notebooks/images/image-similarity.ipynb
index 46e01eb1..78be723f 100644
--- a/notebooks/images/image-similarity.ipynb
+++ b/notebooks/images/image-similarity.ipynb
@@ -1,53 +1,41 @@
 {
- "nbformat": 4,
- "nbformat_minor": 0,
- "metadata": {
-  "colab": {
-   "provenance": []
-  },
-  "kernelspec": {
-   "name": "python3",
-   "display_name": "Python 3"
-  },
-  "language_info": {
-   "name": "python"
-  }
- },
  "cells": [
   {
    "cell_type": "markdown",
-   "source": [
-    "# How to implement Image search using Elasticsearch"
-   ],
    "metadata": {
     "id": "CepGq3Kvtdxi"
-   }
+   },
+   "source": [
+    "# How to implement Image search using Elasticsearch"
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {
+    "id": "oMu1SW_TQQrU"
+   },
    "source": [
     "The workbook shows how to implement an Image search using Elasticsearch. You will index documents with image embeddings (generated or pre-generated) and then using NLP model be able to search using natural language description of the image.\n",
     "\n",
-    "### Prerequisities\n",
-    "Before you start make sure you have Elasticsearch cluster running. The cluster must have at least one machine learning (ML) node with enough (4GB) memory."
-   ],
-   "metadata": {
-    "id": "oMu1SW_TQQrU"
-   }
+    "## Prerequisities\n",
+    "Before we begin, create an elastic cloud deployment and [autoscale](https://www.elastic.co/guide/en/cloud/current/ec-autoscaling.html) to have least one machine learning (ML) node with enough (4GB) memory. Also ensure that the Elasticsearch cluster is running. \n",
+    "\n",
+    "If you don't already have an Elastic deployment, you can sign up for a free [Elastic Cloud trial](https://cloud.elastic.co/registration?utm_source=github&utm_content=elasticsearch-labs-notebook)."
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {
+    "id": "VFcdr8IDQE_H"
+   },
    "source": [
     "### Install Python requirements\n",
     "Before you start you need to install all required Python dependencies."
-   ],
-   "metadata": {
-    "id": "VFcdr8IDQE_H"
-   }
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -55,143 +43,18 @@
     "id": "6WosfR55npKU",
     "outputId": "033767ff-0eef-48cc-c9e7-efbf73c9cb67"
    },
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": [
-      "Requirement already satisfied: sentence-transformers in /usr/local/lib/python3.10/dist-packages (2.2.2)\n",
-      "Requirement already satisfied: eland in /usr/local/lib/python3.10/dist-packages (8.7.0)\n",
-      "Requirement already satisfied: elasticsearch in /usr/local/lib/python3.10/dist-packages (8.9.0)\n",
-      "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.31.0)\n",
-      "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (2.0.1+cu118)\n",
-      "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (4.65.0)\n",
-      "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (9.4.0)\n",
-      "Requirement already satisfied: streamlit in /usr/local/lib/python3.10/dist-packages (1.25.0)\n",
-      "Requirement already satisfied: torchvision in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (0.15.2+cu118)\n",
-      "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (1.22.4)\n",
-      "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (1.2.2)\n",
-      "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (1.10.1)\n",
-      "Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (3.8.1)\n",
-      "Requirement already satisfied: sentencepiece in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (0.1.99)\n",
-      "Requirement already satisfied: huggingface-hub>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (0.16.4)\n",
-      "Requirement already satisfied: pandas>=1.5 in /usr/local/lib/python3.10/dist-packages (from eland) (1.5.3)\n",
-      "Requirement already satisfied: matplotlib>=3.6 in /usr/local/lib/python3.10/dist-packages (from eland) (3.7.1)\n",
-      "Requirement already satisfied: elastic-transport<9,>=8 in /usr/local/lib/python3.10/dist-packages (from elasticsearch) (8.4.0)\n",
-      "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.12.2)\n",
-      "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (23.1)\n",
-      "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n",
-      "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2022.10.31)\n",
-      "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.27.1)\n",
-      "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.13.3)\n",
-      "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.3.1)\n",
-      "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch) (4.7.1)\n",
-      "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch) (1.11.1)\n",
-      "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch) (3.1)\n",
-      "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch) (3.1.2)\n",
-      "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch) (2.0.0)\n",
-      "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch) (3.25.2)\n",
-      "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch) (16.0.6)\n",
-      "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.10/dist-packages (from streamlit) (4.2.2)\n",
-      "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/lib/python3/dist-packages (from streamlit) (1.4)\n",
-      "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.10/dist-packages (from streamlit) (5.3.1)\n",
-      "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.10/dist-packages (from streamlit) (8.1.6)\n",
-      "Requirement already satisfied: importlib-metadata<7,>=1.4 in /usr/lib/python3/dist-packages (from streamlit) (4.6.4)\n",
-      "Requirement already satisfied: protobuf<5,>=3.20 in /usr/local/lib/python3.10/dist-packages (from streamlit) (3.20.3)\n",
-      "Requirement already satisfied: pyarrow>=6.0 in /usr/local/lib/python3.10/dist-packages (from streamlit) (9.0.0)\n",
-      "Requirement already satisfied: pympler<2,>=0.9 in /usr/local/lib/python3.10/dist-packages (from streamlit) (1.0.1)\n",
-      "Requirement already satisfied: python-dateutil<3,>=2.7.3 in /usr/local/lib/python3.10/dist-packages (from streamlit) (2.8.2)\n",
-      "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.10/dist-packages (from streamlit) (13.4.2)\n",
-      "Requirement already satisfied: tenacity<9,>=8.1.0 in /usr/local/lib/python3.10/dist-packages (from streamlit) (8.2.2)\n",
-      "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.10/dist-packages (from streamlit) (0.10.2)\n",
-      "Requirement already satisfied: tzlocal<5,>=1.1 in /usr/local/lib/python3.10/dist-packages (from streamlit) (4.3.1)\n",
-      "Requirement already satisfied: validators<1,>=0.2 in /usr/local/lib/python3.10/dist-packages (from streamlit) (0.20.0)\n",
-      "Requirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.10/dist-packages (from streamlit) (3.1.32)\n",
-      "Requirement already satisfied: pydeck<1,>=0.8 in /usr/local/lib/python3.10/dist-packages (from streamlit) (0.8.0)\n",
-      "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.10/dist-packages (from streamlit) (6.3.1)\n",
-      "Requirement already satisfied: watchdog>=2.1.5 in /usr/local/lib/python3.10/dist-packages (from streamlit) (3.0.0)\n",
-      "Requirement already satisfied: entrypoints in /usr/local/lib/python3.10/dist-packages (from altair<6,>=4.0->streamlit) (0.4)\n",
-      "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.10/dist-packages (from altair<6,>=4.0->streamlit) (4.3.3)\n",
-      "Requirement already satisfied: toolz in /usr/local/lib/python3.10/dist-packages (from altair<6,>=4.0->streamlit) (0.12.0)\n",
-      "Requirement already satisfied: urllib3<2,>=1.26.2 in /usr/local/lib/python3.10/dist-packages (from elastic-transport<9,>=8->elasticsearch) (1.26.16)\n",
-      "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from elastic-transport<9,>=8->elasticsearch) (2023.7.22)\n",
-      "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.10/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit) (4.0.10)\n",
-      "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence-transformers) (2023.6.0)\n",
-      "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.6->eland) (1.1.0)\n",
-      "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.6->eland) (0.11.0)\n",
-      "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.6->eland) (4.41.1)\n",
-      "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.6->eland) (1.4.4)\n",
-      "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.6->eland) (3.1.0)\n",
-      "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.5->eland) (2022.7.1)\n",
-      "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch) (2.1.3)\n",
-      "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil<3,>=2.7.3->streamlit) (1.16.0)\n",
-      "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.12)\n",
-      "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.4)\n",
-      "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich<14,>=10.14.0->streamlit) (3.0.0)\n",
-      "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich<14,>=10.14.0->streamlit) (2.14.0)\n",
-      "Requirement already satisfied: pytz-deprecation-shim in /usr/local/lib/python3.10/dist-packages (from tzlocal<5,>=1.1->streamlit) (0.1.0.post0)\n",
-      "Requirement already satisfied: decorator>=3.4.0 in /usr/local/lib/python3.10/dist-packages (from validators<1,>=0.2->streamlit) (4.4.2)\n",
-      "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk->sentence-transformers) (1.3.1)\n",
-      "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->sentence-transformers) (3.2.0)\n",
-      "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch) (1.3.0)\n",
-      "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.10/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit) (5.0.0)\n",
-      "Requirement already satisfied: attrs>=17.4.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit) (23.1.0)\n",
-      "Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit) (0.19.3)\n",
-      "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit) (0.1.2)\n",
-      "Requirement already satisfied: tzdata in /usr/local/lib/python3.10/dist-packages (from pytz-deprecation-shim->tzlocal<5,>=1.1->streamlit) (2023.3)\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "!pip install sentence-transformers eland elasticsearch transformers torch tqdm Pillow streamlit"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "source": [
-    "### Upload NLP model for querying\n",
-    "Using an `eland` library you will import NLP CLIP model. The model will tran\n",
-    "sfer your search query into vector which will be used for the search over the set of images stored in Elasticsearch.\n",
-    "\n",
-    "The model used is [clip-ViT-B-32-multilingual-v1](https://huggingface.co/sentence-transformers/clip-ViT-B-32-multilingual-v1) because the image embeddings are also generated by the CLIP model.\n",
-    "\n",
-    "How to get cloud id? Go to [ESS cloud](https://cloud.elastic.co/logout?redirectTo=%2Fhome&reason=unauthorised) and `On the deployment overview page, copy down the Cloud ID.`\n",
-    "\n",
-    "The authentication is using api key (`--es-api-key`). Learn how to generate [API key](https://www.elastic.co/guide/en/kibana/current/api-keys.html#create-api-key).\n",
-    "```\n",
-    "$ eland_import_hub_model --cloud-id $CLOUD_ID  \\\n",
-    "  --hub-model-id sentence-transformers/clip-ViT-B-32-multilingual-v1 \\\n",
-    "  --task-type text_embedding --es-api-key $API_KEY --start\n",
-    "```"
-   ],
-   "metadata": {
-    "id": "eIV5lAnVt9L7"
-   }
-  },
   {
    "cell_type": "code",
-   "source": [
-    "API_KEY = \"\"\n",
-    "CLOUD_ID = \"\"\n",
-    "!eland_import_hub_model --cloud-id $CLOUD_ID --hub-model-id sentence-transformers/clip-ViT-B-32-multilingual-v1 --task-type text_embedding --es-api-key API_KEY --start"
-   ],
+   "execution_count": 3,
    "metadata": {
-    "id": "tVhL9jBnuAAQ"
+    "id": "I0pRCbYMuMVn"
    },
-   "execution_count": null,
-   "outputs": []
-  },
-  {
-   "cell_type": "markdown",
-   "source": [
-    "### Import Python libraries"
-   ],
-   "metadata": {
-    "id": "hVxWnFflUCZv"
-   }
-  },
-  {
-   "cell_type": "code",
+   "outputs": [],
    "source": [
     "from elasticsearch import Elasticsearch\n",
     "from elasticsearch.helpers import parallel_bulk\n",
@@ -199,7 +62,6 @@
     "import os\n",
     "import sys\n",
     "\n",
-    "# import shutil\n",
     "import zipfile\n",
     "from tqdm.auto import tqdm\n",
     "import pandas as pd\n",
@@ -210,88 +72,139 @@
     "# import urllib.error\n",
     "import json\n",
     "from getpass import getpass"
-   ],
+   ]
+  },
+  {
+   "cell_type": "markdown",
    "metadata": {
-    "id": "I0pRCbYMuMVn"
+    "id": "eIV5lAnVt9L7"
    },
-   "execution_count": 17,
-   "outputs": []
+   "source": [
+    "### Upload NLP model for querying\n",
+    "\n",
+    "Using the [`eland_import_hub_model`](https://www.elastic.co/guide/en/elasticsearch/client/eland/current/machine-learning.html#ml-nlp-pytorch) script, download and install the [clip-ViT-B-32-multilingual-v1](https://huggingface.co/sentence-transformers/clip-ViT-B-32-multilingual-v1) model, will transfer your search query into vector which will be used for the search over the set of images stored in Elasticsearch.\n",
+    "\n",
+    "To get your cloud id, go to [Elastic cloud](https://cloud.elastic.co) and `On the deployment overview page, copy down the Cloud ID.`\n",
+    "\n",
+    "To authenticate your request, You could use [API key](https://www.elastic.co/guide/en/kibana/current/api-keys.html#create-api-key). Alternatively, you can use your cloud deployment username and password."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# https://www.elastic.co/search-labs/tutorials/install-elasticsearch/elastic-cloud#finding-your-cloud-id\n",
+    "ELASTIC_CLOUD_ID = getpass(\"Elastic Cloud ID: \")\n",
+    "\n",
+    "# https://www.elastic.co/search-labs/tutorials/install-elasticsearch/elastic-cloud#creating-an-api-key\n",
+    "ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "tVhL9jBnuAAQ"
+   },
+   "outputs": [],
+   "source": [
+    "!eland_import_hub_model --cloud-id $ELASTIC_CLOUD_ID --hub-model-id sentence-transformers/clip-ViT-B-32-multilingual-v1 --task-type text_embedding --es-api-key $ELASTIC_API_KEY --start --clear-previous"
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {
+    "id": "Klv3rywdUJBN"
+   },
    "source": [
     "### Connect to Elasticsearch cluster\n",
     "Use your own cluster details `ELASTIC_CLOUD_ID`, `API_KEY`."
-   ],
-   "metadata": {
-    "id": "Klv3rywdUJBN"
-   }
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "YwN8RmFY3FQI",
+    "outputId": "d0d0e31e-2ad2-46fe-ef8c-8c8bce7e1c48"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "ObjectApiResponse({'name': 'instance-0000000001', 'cluster_name': 'a72482be54904952ba46d53c3def7740', 'cluster_uuid': 'g8BE52TtT32pGBbRzP_oKA', 'version': {'number': '8.12.2', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '48a287ab9497e852de30327444b0809e55d46466', 'build_date': '2024-02-19T10:04:32.774273190Z', 'build_snapshot': False, 'lucene_version': '9.9.2', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'})"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# ESS Cloud connection definition using an API_KEY\n",
-    "ELASTIC_CLOUD_ID = getpass(\"Elastic Cloud ID: \")\n",
-    "API_KEY = getpass(\"Elastic API key: \")\n",
-    "\n",
-    "# ELASTIC_CLOUD_USER = \"elastic\"\n",
-    "# CLOUD_PASSWORD = getpass(\"Elastic Password\")\n",
-    "\n",
     "es = Elasticsearch(\n",
     "    cloud_id=ELASTIC_CLOUD_ID,\n",
     "    # basic_auth=(ELASTIC_CLOUD_USER, ELASTIC_CLOUD_PASSWORD),\n",
-    "    api_key=API_KEY,\n",
+    "    api_key=ELASTIC_API_KEY,\n",
     "    request_timeout=600,\n",
     ")\n",
     "\n",
     "es.info()  # should return cluster info"
-   ],
+   ]
+  },
+  {
+   "cell_type": "markdown",
    "metadata": {
-    "id": "YwN8RmFY3FQI",
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "outputId": "d0d0e31e-2ad2-46fe-ef8c-8c8bce7e1c48"
+    "id": "IW-GIlH2OxB4"
+   },
+   "source": [
+    "### Create Index and mappings for Images\n",
+    "Befor you can index documents into Elasticsearch, you need to create an Index with correct mappings."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "id": "xAkc1OVcOxy3"
    },
-   "execution_count": 19,
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Elastic Cloud ID: ··········\n",
-      "Elastic API key: ··········\n"
+      "Creating index images\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/var/folders/b0/0h5fbhnd0tz563nl779m3jv80000gn/T/ipykernel_57417/1485784368.py:45: DeprecationWarning: Passing transport options in the API method is deprecated. Use 'Elasticsearch.options()' instead.\n",
+      "  es.indices.create(\n"
      ]
     },
     {
-     "output_type": "execute_result",
      "data": {
       "text/plain": [
-       "ObjectApiResponse({'name': 'instance-0000000000', 'cluster_name': 'a597bbe1e0d047c494e7d4015f67ef37', 'cluster_uuid': 'EnT0vwwSSZeAahPw3Vhsuw', 'version': {'number': '8.8.2', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '98e1271edf932a480e4262a471281f1ee295ce6b', 'build_date': '2023-06-26T05:16:16.196344851Z', 'build_snapshot': False, 'lucene_version': '9.6.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'})"
+       "ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'images'})"
       ]
      },
+     "execution_count": 10,
      "metadata": {},
-     "execution_count": 19
+     "output_type": "execute_result"
     }
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "source": [
-    "### Create Index and mappings for Images\n",
-    "Befor you can index documents into Elasticsearch, you need to create an Index with correct mappings."
    ],
-   "metadata": {
-    "id": "IW-GIlH2OxB4"
-   }
-  },
-  {
-   "cell_type": "code",
    "source": [
     "# Destination Index name\n",
     "INDEX_NAME = \"images\"\n",
-    "# If you want to delete previous version of the Index\n",
-    "DELETE_INDEX = False\n",
+    "\n",
+    "# flag to check if index has to be deleted before creating\n",
+    "SHOULD_DELETE_INDEX = True\n",
     "\n",
     "INDEX_MAPPING = {\n",
     "    \"properties\": {\n",
@@ -324,28 +237,23 @@
     "    }\n",
     "}\n",
     "\n",
-    "if DELETE_INDEX:\n",
+    "# check if we want to delete index before creating the index\n",
+    "if SHOULD_DELETE_INDEX:\n",
     "    if es.indices.exists(index=INDEX_NAME):\n",
     "        print(\"Deleting existing %s\" % INDEX_NAME)\n",
     "        es.indices.delete(index=INDEX_NAME, ignore=[400, 404])\n",
     "\n",
-    "if not es.indices.exists(index=INDEX_NAME):\n",
-    "    print(\"Creating index %s\" % INDEX_NAME)\n",
-    "    es.indices.create(\n",
-    "        index=INDEX_NAME,\n",
-    "        mappings=INDEX_MAPPING,\n",
-    "        settings=INDEX_SETTINGS,\n",
-    "        ignore=[400, 404],\n",
-    "    )"
-   ],
-   "metadata": {
-    "id": "xAkc1OVcOxy3"
-   },
-   "execution_count": null,
-   "outputs": []
+    "print(\"Creating index %s\" % INDEX_NAME)\n",
+    "es.indices.create(\n",
+    "    index=INDEX_NAME, mappings=INDEX_MAPPING, settings=INDEX_SETTINGS, ignore=[400, 404]\n",
+    ")"
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {
+    "id": "NKE-j0kPUMn_"
+   },
    "source": [
     "### Get image dataset and embeddings\n",
     "Download:\n",
@@ -353,17 +261,11 @@
     "- The [Image embeddings](https://github.com/radoondas/flask-elastic-nlp/blob/main/embeddings/blogs/blogs-no-embeddings.json.zip) are pre-generated using CLIP model\n",
     "\n",
     "Then unzip both files."
-   ],
-   "metadata": {
-    "id": "NKE-j0kPUMn_"
-   }
+   ]
   },
   {
    "cell_type": "code",
-   "source": [
-    "!wget https://unsplash.com/data/lite/1.2.0 -O data/unsplash-research-dataset-lite-1.2.0.zip\n",
-    "!wget https://raw.githubusercontent.com/radoondas/flask-elastic-nlp/main/embeddings/images/image-embeddings.json.zip -P data"
-   ],
+   "execution_count": null,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -371,49 +273,27 @@
     "id": "zFGaPDRR5mqT",
     "outputId": "0114cdd6-a714-41ab-9b46-3013bd36698a"
    },
-   "execution_count": null,
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": [
-      "--2023-07-25 14:28:32--  https://unsplash.com/data/lite/1.2.0\n",
-      "Resolving unsplash.com (unsplash.com)... 151.101.65.181, 151.101.1.181, 151.101.129.181, ...\n",
-      "Connecting to unsplash.com (unsplash.com)|151.101.65.181|:443... connected.\n",
-      "HTTP request sent, awaiting response... 302 Found\n",
-      "Location: https://unsplash-datasets.s3.amazonaws.com/lite/1.2.0/unsplash-research-dataset-lite-1.2.0.zip [following]\n",
-      "--2023-07-25 14:28:32--  https://unsplash-datasets.s3.amazonaws.com/lite/1.2.0/unsplash-research-dataset-lite-1.2.0.zip\n",
-      "Resolving unsplash-datasets.s3.amazonaws.com (unsplash-datasets.s3.amazonaws.com)... 52.217.102.84, 3.5.25.253, 52.217.96.188, ...\n",
-      "Connecting to unsplash-datasets.s3.amazonaws.com (unsplash-datasets.s3.amazonaws.com)|52.217.102.84|:443... connected.\n",
-      "HTTP request sent, awaiting response... 200 OK\n",
-      "Length: 632351052 (603M) [application/zip]\n",
-      "Saving to: ‘data/unsplash-research-dataset-lite-1.2.0.zip’\n",
-      "\n",
-      "data/unsplash-resea 100%[===================>] 603.06M  14.1MB/s    in 42s     \n",
-      "\n",
-      "2023-07-25 14:29:16 (14.2 MB/s) - ‘data/unsplash-research-dataset-lite-1.2.0.zip’ saved [632351052/632351052]\n",
-      "\n",
-      "--2023-07-25 14:29:16--  https://raw.githubusercontent.com/radoondas/flask-elastic-nlp/main/embeddings/images/image-embeddings.json.zip\n",
-      "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n",
-      "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n",
-      "HTTP request sent, awaiting response... 200 OK\n",
-      "Length: 94256217 (90M) [application/zip]\n",
-      "Saving to: ‘data/image-embeddings.json.zip.1’\n",
-      "\n",
-      "image-embeddings.js 100%[===================>]  89.89M   164MB/s    in 0.5s    \n",
-      "\n",
-      "2023-07-25 14:29:16 (164 MB/s) - ‘data/image-embeddings.json.zip.1’ saved [94256217/94256217]\n",
-      "\n"
-     ]
-    }
+   "outputs": [],
+   "source": [
+    "!curl -L https://unsplash.com/data/lite/1.2.0 -o unsplash-research-dataset-lite-1.2.0.zip\n",
+    "!curl -L https://raw.githubusercontent.com/radoondas/flask-elastic-nlp/main/embeddings/images/image-embeddings.json.zip -o image-embeddings.json.zip"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "MBh4AQ8i7C0-",
+    "outputId": "17a50b7f-f052-4b72-daa8-0e8fc630326f"
+   },
+   "outputs": [],
    "source": [
     "# Unzip downloaded files\n",
-    "UNSPLASH_ZIP_FILE = \"data/unsplash-research-dataset-lite-1.2.0.zip\"\n",
-    "EMBEDDINGS_ZIP_FILE = \"data/image-embeddings.json.zip\"\n",
+    "UNSPLASH_ZIP_FILE = \"unsplash-research-dataset-lite-1.2.0.zip\"\n",
+    "EMBEDDINGS_ZIP_FILE = \"image-embeddings.json.zip\"\n",
     "\n",
     "with zipfile.ZipFile(UNSPLASH_ZIP_FILE, \"r\") as zip_ref:\n",
     "    print(\"Extracting file \", UNSPLASH_ZIP_FILE, \".\")\n",
@@ -422,66 +302,54 @@
     "with zipfile.ZipFile(EMBEDDINGS_ZIP_FILE, \"r\") as zip_ref:\n",
     "    print(\"Extracting file \", EMBEDDINGS_ZIP_FILE, \".\")\n",
     "    zip_ref.extractall(\"data/embeddings/\")"
-   ],
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "id": "MBh4AQ8i7C0-",
-    "outputId": "17a50b7f-f052-4b72-daa8-0e8fc630326f"
-   },
-   "execution_count": null,
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": [
-      "Extracting file  data/unsplash-research-dataset-lite-1.2.0.zip .\n",
-      "Extracting file  data/image-embeddings.json.zip .\n"
-     ]
-    }
    ]
   },
   {
    "cell_type": "markdown",
-   "source": [
-    "# Image embeddings\n",
-    "We have now 2 options  how to proceed.\n",
-    "1. Import all pregenerated image embeddings (~19k). This is faster option with a lot of images available in a short time.\n",
-    "2. Import a small subset of randomly choosen images to see the process of generating of image embeddings using external Clip model."
-   ],
    "metadata": {
-    "id": "p6H7QYctQQA7"
-   }
-  },
-  {
-   "cell_type": "code",
-   "source": [
-    "# define helper function\n",
-    "def gen_rows(df):\n",
-    "    for doc in df.to_dict(orient=\"records\"):\n",
-    "        yield doc"
-   ],
-   "metadata": {
-    "id": "03YvC-_JY9OE"
+    "id": "qhZRdUyAQd-s"
    },
-   "execution_count": null,
-   "outputs": []
-  },
-  {
-   "cell_type": "markdown",
    "source": [
-    "## 1. Import all pregenerated image embeddings\n",
-    "This option lets you inport ~19k documents woth pregenenerated image embeddings with metadata.\n",
+    "# Import all pregenerated image embeddings\n",
+    "In this section you will import ~19k documents worth of pregenenerated image embeddings with metadata.\n",
     "\n",
     "The process downloads files with images information, merge them and index into Elasticsearch."
-   ],
-   "metadata": {
-    "id": "qhZRdUyAQd-s"
-   }
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {
+    "id": "32xrbSUXTODQ"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Indexed 1000 documents\n",
+      "Indexed 2000 documents\n",
+      "Indexed 3000 documents\n",
+      "Indexed 4000 documents\n",
+      "Indexed 5000 documents\n",
+      "Indexed 6000 documents\n",
+      "Indexed 7000 documents\n",
+      "Indexed 8000 documents\n",
+      "Indexed 9000 documents\n",
+      "Indexed 10000 documents\n",
+      "Indexed 11000 documents\n",
+      "Indexed 12000 documents\n",
+      "Indexed 13000 documents\n",
+      "Indexed 14000 documents\n",
+      "Indexed 15000 documents\n",
+      "Indexed 16000 documents\n",
+      "Indexed 17000 documents\n",
+      "Indexed 18000 documents\n",
+      "Indexed 19000 documents\n",
+      "Indexed 19833 image embeddings documents\n"
+     ]
+    }
+   ],
    "source": [
     "df_unsplash = pd.read_csv(\"data/unsplash/\" + \"photos.tsv000\", sep=\"\\t\", header=0)\n",
     "\n",
@@ -536,134 +404,19 @@
     "\n",
     "print(\"Indexed %s image embeddings documents\" % str(count), flush=True)\n",
     "sys.stdout.flush()"
-   ],
-   "metadata": {
-    "id": "32xrbSUXTODQ"
-   },
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "markdown",
-   "source": [
-    "## 2. Import small number of random\n",
-    "In this part you will randomly choose small number of images and then generate image embeddings. The script will then index documents into Elasticserach."
-   ],
-   "metadata": {
-    "id": "xypgh4lFQmkc"
-   }
-  },
-  {
-   "cell_type": "code",
-   "source": [
-    "NUMBER_OF_IMAGES = 20\n",
-    "INDEX_NAME = \"images-test\"\n",
-    "\n",
-    "df_unsplash = pd.read_csv(\"data/unsplash/\" + \"photos.tsv000\", sep=\"\\t\", header=0)\n",
-    "\n",
-    "## stat fix\n",
-    "# follwing 8 lines are fix for inconsistent/incorrect data\n",
-    "df_unsplash[\"photo_description\"].fillna(\"\", inplace=True)\n",
-    "df_unsplash[\"ai_description\"].fillna(\"\", inplace=True)\n",
-    "df_unsplash[\"photographer_first_name\"].fillna(\"\", inplace=True)\n",
-    "df_unsplash[\"photographer_last_name\"].fillna(\"\", inplace=True)\n",
-    "df_unsplash[\"photographer_username\"].fillna(\"\", inplace=True)\n",
-    "df_unsplash[\"exif_camera_make\"].fillna(\"\", inplace=True)\n",
-    "df_unsplash[\"exif_camera_model\"].fillna(\"\", inplace=True)\n",
-    "df_unsplash[\"exif_iso\"].fillna(0, inplace=True)\n",
-    "## end of fix\n",
-    "\n",
-    "df_unsplash_subset = df_unsplash[\n",
-    "    [\n",
-    "        \"photo_id\",\n",
-    "        \"photo_url\",\n",
-    "        \"photo_image_url\",\n",
-    "        \"photo_description\",\n",
-    "        \"ai_description\",\n",
-    "        \"photographer_first_name\",\n",
-    "        \"photographer_last_name\",\n",
-    "        \"photographer_username\",\n",
-    "        \"exif_camera_make\",\n",
-    "        \"exif_camera_model\",\n",
-    "        \"exif_iso\",\n",
-    "    ]\n",
-    "]\n",
-    "\n",
-    "df_random_subset = df_unsplash_subset.sample(n=NUMBER_OF_IMAGES, replace=False)\n",
-    "df_random_subset = df_random_subset.reset_index()\n",
-    "\n",
-    "# Load model CLIP\n",
-    "img_model = SentenceTransformer(\"clip-ViT-B-32\")\n",
-    "\n",
-    "# new list of image documents for indexing into ES\n",
-    "lst = []\n",
-    "if not os.path.exists(\"data/images\"):\n",
-    "    os.mkdir(\"data/images\")\n",
-    "\n",
-    "for index, row in df_random_subset.iterrows():\n",
-    "    # open image from url\n",
-    "    img_path = \"data/images/\" + row[\"photo_id\"]\n",
-    "    try:\n",
-    "        urllib.request.urlretrieve(row[\"photo_image_url\"], img_path)\n",
-    "        print(row[\"photo_id\"] + \" \" + row[\"photo_url\"])\n",
-    "    except urllib.error.HTTPError as err:\n",
-    "        if err.code == 404:\n",
-    "            print(\"404 error: Image not found at {}\".format(row[\"photo_image_url\"]))\n",
-    "        else:\n",
-    "            raise\n",
-    "\n",
-    "    img = Image.open(img_path)\n",
-    "    # create doc\n",
-    "    doc = {}\n",
-    "    embedding = img_model.encode(img)\n",
-    "    doc[\"photo_id\"] = row[\"photo_id\"]\n",
-    "    doc[\"image_embedding\"] = embedding.tolist()\n",
-    "    lst.append(doc)\n",
-    "    # print(doc)\n",
-    "\n",
-    "    # Image cleanup.\n",
-    "    # If file exists, delete it.\n",
-    "    if os.path.exists(img_path):\n",
-    "        os.remove(img_path)\n",
-    "\n",
-    "# read all pregenerated embeddings\n",
-    "df_embeddings = pd.read_json(\"data/embeddings/\" + \"image-embeddings.json\", lines=True)\n",
-    "\n",
-    "df_merged = pd.merge(df_random_subset, pd.DataFrame(lst), on=\"photo_id\", how=\"inner\")\n",
-    "# print(df_merged)\n",
-    "\n",
-    "count = 0\n",
-    "for success, info in parallel_bulk(\n",
-    "    client=es,\n",
-    "    actions=gen_rows(df_merged),\n",
-    "    thread_count=5,\n",
-    "    chunk_size=10,\n",
-    "    index=INDEX_NAME,\n",
-    "):\n",
-    "    if success:\n",
-    "        count += 1\n",
-    "        if count % 10 == 0:\n",
-    "            print(\"Indexed %s documents\" % str(count), flush=True)\n",
-    "            sys.stdout.flush()\n",
-    "    else:\n",
-    "        print(\"Doc failed\", info)\n",
-    "\n",
-    "print(\"Indexed %s image embeddings documents\" % str(count), flush=True)\n",
-    "sys.stdout.flush()"
-   ],
    "metadata": {
-    "id": "r_txQjP2RKnr"
+    "id": "-_i2CIpSz9vw"
    },
-   "execution_count": null,
-   "outputs": []
-  },
-  {
-   "cell_type": "markdown",
    "source": [
-    "### Query the image dataset\n",
-    "The next step is to run a query to search for images. The example query searches for `\"model_text\": \"Valentine day flowers\"` using the model we uploaded to Elasticsearch `sentence-transformers__clip-vit-b-32-multilingual-v1`.\n",
+    "# Query the image dataset\n",
+    "The next step is to run a query to search for images. The example query searches for `\"model_text\": \"Valentine day flowers\"` using the model `sentence-transformers__clip-vit-b-32-multilingual-v1` that we uploaded to Elasticsearch earlier.\n",
+    "\n",
+    "The process is carried out with a single query, even though internaly it consists of two tasks. One is to tramsform your search text into a vector using the NLP model and the second task is to run the vector search over the image dataset.\n",
     "\n",
-    "The process is one query even it internaly consists of two tasks. One is to tramsform your search text into a vector using the NLP model and the second task is to run the vector search over the image dataset.\n",
     "```\n",
     "POST images/_search\n",
     "{\n",
@@ -687,17 +440,23 @@
     "}\n",
     "```\n",
     "\n"
-   ],
-   "metadata": {
-    "id": "-_i2CIpSz9vw"
-   }
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 375
+    },
+    "id": "wdicpvRlzmXG",
+    "outputId": "00550041-0aed-4f51-ccd3-18eb705ff7ed"
+   },
+   "outputs": [],
    "source": [
     "# Search queary\n",
     "WHAT_ARE_YOU_LOOKING_FOR = \"Valentine day flowers\"\n",
-    "INDEX_IM_EMBED = \"images\"\n",
     "\n",
     "source_fields = [\n",
     "    \"photo_description\",\n",
@@ -721,9 +480,7 @@
     "    },\n",
     "}\n",
     "\n",
-    "response = es.search(\n",
-    "    index=INDEX_IM_EMBED, fields=source_fields, knn=query, source=False\n",
-    ")\n",
+    "response = es.search(index=INDEX_NAME, fields=source_fields, knn=query, source=False)\n",
     "\n",
     "print(response.body)\n",
     "\n",
@@ -753,319 +510,13 @@
     "        \"fields.photo_url\",\n",
     "    ]\n",
     "]"
-   ],
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 375
-    },
-    "id": "wdicpvRlzmXG",
-    "outputId": "00550041-0aed-4f51-ccd3-18eb705ff7ed"
-   },
-   "execution_count": 35,
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": [
-      "{'took': 114, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 5, 'relation': 'eq'}, 'max_score': 0.646751, 'hits': [{'_index': 'images', '_id': 'nK5Fh4kBLg4Kd5ySLbKC', '_score': 0.646751, 'fields': {'photo_image_url': ['https://images.unsplash.com/photo-1587404787163-d03a28fcc1f0'], 'photo_id': ['gQFZxLe3m4g'], 'photographer_first_name': ['Vadim'], 'photo_description': ['instagram.com/vadimsadovski'], 'photographer_last_name': ['Sadovski'], 'photo_url': ['https://unsplash.com/photos/gQFZxLe3m4g'], 'photographer_username': ['vadimsadovski'], 'ai_description': ['']}}, {'_index': 'images', '_id': 'Xa5Eh4kBLg4Kd5yS84Qf', '_score': 0.64675057, 'fields': {'photo_image_url': ['https://images.unsplash.com/photo-1574259148543-dd376a61339f'], 'photo_id': ['g5Mhx29yp-A'], 'photographer_first_name': ['Erin'], 'photo_description': ['Cute but grumpy cat in the Austrian mountains'], 'photographer_last_name': ['East'], 'photo_url': ['https://unsplash.com/photos/g5Mhx29yp-A'], 'photographer_username': ['mserineast'], 'ai_description': ['brown Persian cat on white bench']}}, {'_index': 'images', '_id': '265Eh4kBLg4Kd5yS84Uf', '_score': 0.64244866, 'fields': {'photo_image_url': ['https://images.unsplash.com/photo-1565772101068-dec21f7b36eb'], 'photo_id': ['9KZ0PGNCxNE'], 'photographer_first_name': ['Perchek'], 'photo_description': [''], 'photographer_last_name': ['Industrie'], 'photo_url': ['https://unsplash.com/photos/9KZ0PGNCxNE'], 'photographer_username': ['perchek_industrie'], 'ai_description': ['siamese cat']}}, {'_index': 'images', '_id': 'xq5Fh4kBLg4Kd5ySEpuC', '_score': 0.64216036, 'fields': {'photo_image_url': ['https://images.unsplash.com/photo-1589994205353-325f40210d02'], 'photo_id': ['VOZQkkKXvY4'], 'photographer_first_name': ['Andrey'], 'photo_description': [''], 'photographer_last_name': ['Svistunov'], 'photo_url': ['https://unsplash.com/photos/VOZQkkKXvY4'], 'photographer_username': ['svistal13'], 'ai_description': ['orange tabby cat on ground covered with snow during daytime']}}, {'_index': 'images', '_id': 'WK5Eh4kBLg4Kd5yS5XcD', '_score': 0.64185303, 'fields': {'photo_image_url': ['https://images.unsplash.com/photo-1556820161-b605d166fce1'], 'photo_id': ['wmz8y6L6c_k'], 'photographer_first_name': ['Phillip'], 'photo_description': [''], 'photographer_last_name': ['Suitcases'], 'photo_url': ['https://unsplash.com/photos/wmz8y6L6c_k'], 'photographer_username': ['nillait'], 'ai_description': ['brown and black kitten close-up photography']}}]}}\n"
-     ]
-    },
-    {
-     "output_type": "execute_result",
-     "data": {
-      "text/plain": [
-       "                    _id    _score fields.photo_id  \\\n",
-       "0  nK5Fh4kBLg4Kd5ySLbKC  0.646751   [gQFZxLe3m4g]   \n",
-       "1  Xa5Eh4kBLg4Kd5yS84Qf  0.646751   [g5Mhx29yp-A]   \n",
-       "2  265Eh4kBLg4Kd5yS84Uf  0.642449   [9KZ0PGNCxNE]   \n",
-       "3  xq5Fh4kBLg4Kd5ySEpuC  0.642160   [VOZQkkKXvY4]   \n",
-       "4  WK5Eh4kBLg4Kd5yS5XcD  0.641853   [wmz8y6L6c_k]   \n",
-       "\n",
-       "                              fields.photo_image_url  \\\n",
-       "0  [https://images.unsplash.com/photo-15874047871...   \n",
-       "1  [https://images.unsplash.com/photo-15742591485...   \n",
-       "2  [https://images.unsplash.com/photo-15657721010...   \n",
-       "3  [https://images.unsplash.com/photo-15899942053...   \n",
-       "4  [https://images.unsplash.com/photo-1556820161-...   \n",
-       "\n",
-       "                          fields.photo_description  \\\n",
-       "0                    [instagram.com/vadimsadovski]   \n",
-       "1  [Cute but grumpy cat in the Austrian mountains]   \n",
-       "2                                               []   \n",
-       "3                                               []   \n",
-       "4                                               []   \n",
-       "\n",
-       "  fields.photographer_first_name fields.photographer_last_name  \\\n",
-       "0                        [Vadim]                    [Sadovski]   \n",
-       "1                         [Erin]                        [East]   \n",
-       "2                      [Perchek]                   [Industrie]   \n",
-       "3                       [Andrey]                   [Svistunov]   \n",
-       "4                      [Phillip]                   [Suitcases]   \n",
-       "\n",
-       "                               fields.ai_description  \\\n",
-       "0                                                 []   \n",
-       "1                 [brown Persian cat on white bench]   \n",
-       "2                                      [siamese cat]   \n",
-       "3  [orange tabby cat on ground covered with snow ...   \n",
-       "4      [brown and black kitten close-up photography]   \n",
-       "\n",
-       "                            fields.photo_url  \n",
-       "0  [https://unsplash.com/photos/gQFZxLe3m4g]  \n",
-       "1  [https://unsplash.com/photos/g5Mhx29yp-A]  \n",
-       "2  [https://unsplash.com/photos/9KZ0PGNCxNE]  \n",
-       "3  [https://unsplash.com/photos/VOZQkkKXvY4]  \n",
-       "4  [https://unsplash.com/photos/wmz8y6L6c_k]  "
-      ],
-      "text/html": [
-       "\n",
-       "\n",
-       "  <div id=\"df-b6abcb85-4cf5-4089-9ed7-a06f75186b38\">\n",
-       "    <div class=\"colab-df-container\">\n",
-       "      <div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>_id</th>\n",
-       "      <th>_score</th>\n",
-       "      <th>fields.photo_id</th>\n",
-       "      <th>fields.photo_image_url</th>\n",
-       "      <th>fields.photo_description</th>\n",
-       "      <th>fields.photographer_first_name</th>\n",
-       "      <th>fields.photographer_last_name</th>\n",
-       "      <th>fields.ai_description</th>\n",
-       "      <th>fields.photo_url</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>nK5Fh4kBLg4Kd5ySLbKC</td>\n",
-       "      <td>0.646751</td>\n",
-       "      <td>[gQFZxLe3m4g]</td>\n",
-       "      <td>[https://images.unsplash.com/photo-15874047871...</td>\n",
-       "      <td>[instagram.com/vadimsadovski]</td>\n",
-       "      <td>[Vadim]</td>\n",
-       "      <td>[Sadovski]</td>\n",
-       "      <td>[]</td>\n",
-       "      <td>[https://unsplash.com/photos/gQFZxLe3m4g]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>Xa5Eh4kBLg4Kd5yS84Qf</td>\n",
-       "      <td>0.646751</td>\n",
-       "      <td>[g5Mhx29yp-A]</td>\n",
-       "      <td>[https://images.unsplash.com/photo-15742591485...</td>\n",
-       "      <td>[Cute but grumpy cat in the Austrian mountains]</td>\n",
-       "      <td>[Erin]</td>\n",
-       "      <td>[East]</td>\n",
-       "      <td>[brown Persian cat on white bench]</td>\n",
-       "      <td>[https://unsplash.com/photos/g5Mhx29yp-A]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>265Eh4kBLg4Kd5yS84Uf</td>\n",
-       "      <td>0.642449</td>\n",
-       "      <td>[9KZ0PGNCxNE]</td>\n",
-       "      <td>[https://images.unsplash.com/photo-15657721010...</td>\n",
-       "      <td>[]</td>\n",
-       "      <td>[Perchek]</td>\n",
-       "      <td>[Industrie]</td>\n",
-       "      <td>[siamese cat]</td>\n",
-       "      <td>[https://unsplash.com/photos/9KZ0PGNCxNE]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>xq5Fh4kBLg4Kd5ySEpuC</td>\n",
-       "      <td>0.642160</td>\n",
-       "      <td>[VOZQkkKXvY4]</td>\n",
-       "      <td>[https://images.unsplash.com/photo-15899942053...</td>\n",
-       "      <td>[]</td>\n",
-       "      <td>[Andrey]</td>\n",
-       "      <td>[Svistunov]</td>\n",
-       "      <td>[orange tabby cat on ground covered with snow ...</td>\n",
-       "      <td>[https://unsplash.com/photos/VOZQkkKXvY4]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>WK5Eh4kBLg4Kd5yS5XcD</td>\n",
-       "      <td>0.641853</td>\n",
-       "      <td>[wmz8y6L6c_k]</td>\n",
-       "      <td>[https://images.unsplash.com/photo-1556820161-...</td>\n",
-       "      <td>[]</td>\n",
-       "      <td>[Phillip]</td>\n",
-       "      <td>[Suitcases]</td>\n",
-       "      <td>[brown and black kitten close-up photography]</td>\n",
-       "      <td>[https://unsplash.com/photos/wmz8y6L6c_k]</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>\n",
-       "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-b6abcb85-4cf5-4089-9ed7-a06f75186b38')\"\n",
-       "              title=\"Convert this dataframe to an interactive table.\"\n",
-       "              style=\"display:none;\">\n",
-       "\n",
-       "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
-       "       width=\"24px\">\n",
-       "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
-       "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
-       "  </svg>\n",
-       "      </button>\n",
-       "\n",
-       "\n",
-       "\n",
-       "    <div id=\"df-5fa64969-378b-4c22-8956-67dbb0649d64\">\n",
-       "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-5fa64969-378b-4c22-8956-67dbb0649d64')\"\n",
-       "              title=\"Suggest charts.\"\n",
-       "              style=\"display:none;\">\n",
-       "\n",
-       "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
-       "     width=\"24px\">\n",
-       "    <g>\n",
-       "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
-       "    </g>\n",
-       "</svg>\n",
-       "      </button>\n",
-       "    </div>\n",
-       "\n",
-       "<style>\n",
-       "  .colab-df-quickchart {\n",
-       "    background-color: #E8F0FE;\n",
-       "    border: none;\n",
-       "    border-radius: 50%;\n",
-       "    cursor: pointer;\n",
-       "    display: none;\n",
-       "    fill: #1967D2;\n",
-       "    height: 32px;\n",
-       "    padding: 0 0 0 0;\n",
-       "    width: 32px;\n",
-       "  }\n",
-       "\n",
-       "  .colab-df-quickchart:hover {\n",
-       "    background-color: #E2EBFA;\n",
-       "    box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
-       "    fill: #174EA6;\n",
-       "  }\n",
-       "\n",
-       "  [theme=dark] .colab-df-quickchart {\n",
-       "    background-color: #3B4455;\n",
-       "    fill: #D2E3FC;\n",
-       "  }\n",
-       "\n",
-       "  [theme=dark] .colab-df-quickchart:hover {\n",
-       "    background-color: #434B5C;\n",
-       "    box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
-       "    filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
-       "    fill: #FFFFFF;\n",
-       "  }\n",
-       "</style>\n",
-       "\n",
-       "    <script>\n",
-       "      async function quickchart(key) {\n",
-       "        const containerElement = document.querySelector('#' + key);\n",
-       "        const charts = await google.colab.kernel.invokeFunction(\n",
-       "            'suggestCharts', [key], {});\n",
-       "      }\n",
-       "    </script>\n",
-       "\n",
-       "      <script>\n",
-       "\n",
-       "function displayQuickchartButton(domScope) {\n",
-       "  let quickchartButtonEl =\n",
-       "    domScope.querySelector('#df-5fa64969-378b-4c22-8956-67dbb0649d64 button.colab-df-quickchart');\n",
-       "  quickchartButtonEl.style.display =\n",
-       "    google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
-       "}\n",
-       "\n",
-       "        displayQuickchartButton(document);\n",
-       "      </script>\n",
-       "      <style>\n",
-       "    .colab-df-container {\n",
-       "      display:flex;\n",
-       "      flex-wrap:wrap;\n",
-       "      gap: 12px;\n",
-       "    }\n",
-       "\n",
-       "    .colab-df-convert {\n",
-       "      background-color: #E8F0FE;\n",
-       "      border: none;\n",
-       "      border-radius: 50%;\n",
-       "      cursor: pointer;\n",
-       "      display: none;\n",
-       "      fill: #1967D2;\n",
-       "      height: 32px;\n",
-       "      padding: 0 0 0 0;\n",
-       "      width: 32px;\n",
-       "    }\n",
-       "\n",
-       "    .colab-df-convert:hover {\n",
-       "      background-color: #E2EBFA;\n",
-       "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
-       "      fill: #174EA6;\n",
-       "    }\n",
-       "\n",
-       "    [theme=dark] .colab-df-convert {\n",
-       "      background-color: #3B4455;\n",
-       "      fill: #D2E3FC;\n",
-       "    }\n",
-       "\n",
-       "    [theme=dark] .colab-df-convert:hover {\n",
-       "      background-color: #434B5C;\n",
-       "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
-       "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
-       "      fill: #FFFFFF;\n",
-       "    }\n",
-       "  </style>\n",
-       "\n",
-       "      <script>\n",
-       "        const buttonEl =\n",
-       "          document.querySelector('#df-b6abcb85-4cf5-4089-9ed7-a06f75186b38 button.colab-df-convert');\n",
-       "        buttonEl.style.display =\n",
-       "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
-       "\n",
-       "        async function convertToInteractive(key) {\n",
-       "          const element = document.querySelector('#df-b6abcb85-4cf5-4089-9ed7-a06f75186b38');\n",
-       "          const dataTable =\n",
-       "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
-       "                                                     [key], {});\n",
-       "          if (!dataTable) return;\n",
-       "\n",
-       "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
-       "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
-       "            + ' to learn more about interactive tables.';\n",
-       "          element.innerHTML = '';\n",
-       "          dataTable['output_type'] = 'display_data';\n",
-       "          await google.colab.output.renderOutput(dataTable, element);\n",
-       "          const docLink = document.createElement('div');\n",
-       "          docLink.innerHTML = docLinkHtml;\n",
-       "          element.appendChild(docLink);\n",
-       "        }\n",
-       "      </script>\n",
-       "    </div>\n",
-       "  </div>\n"
-      ]
-     },
-     "metadata": {},
-     "execution_count": 35
-    }
    ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {
+    "id": "Ry62sfHFHFi9"
+   },
    "source": [
     "# [Optional] Simple streamlit UI\n",
     "In the following section, you will view the response in a simple UI for better visualisation.\n",
@@ -1073,70 +524,52 @@
     "The query in the previous step did write down a file response `json_data.json` for the UI to load and visualise.\n",
     "\n",
     "Follow the steps below to see the results in a table."
-   ],
-   "metadata": {
-    "id": "Ry62sfHFHFi9"
-   }
+   ]
   },
   {
    "cell_type": "markdown",
-   "source": [
-    "### Install tunnel library"
-   ],
    "metadata": {
     "id": "iUAbRqr8II-x"
-   }
+   },
+   "source": [
+    "### Install tunnel library"
+   ]
   },
   {
    "cell_type": "code",
-   "source": [
-    "!npm install localtunnel"
-   ],
+   "execution_count": null,
    "metadata": {
-    "id": "RGEmAt2DjtN7",
     "colab": {
      "base_uri": "https://localhost:8080/"
     },
+    "id": "RGEmAt2DjtN7",
     "outputId": "f6c37d54-7e09-4e59-fc21-8a3db4fa840d"
    },
-   "execution_count": 12,
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": [
-      "\u001b[K\u001b[?25h\u001b[37;40mnpm\u001b[0m \u001b[0m\u001b[30;43mWARN\u001b[0m \u001b[0m\u001b[35msaveError\u001b[0m ENOENT: no such file or directory, open '/content/package.json'\n",
-      "\u001b[0m\u001b[37;40mnpm\u001b[0m \u001b[0m\u001b[34;40mnotice\u001b[0m\u001b[35m\u001b[0m created a lockfile as package-lock.json. You should commit this file.\n",
-      "\u001b[0m\u001b[37;40mnpm\u001b[0m \u001b[0m\u001b[30;43mWARN\u001b[0m \u001b[0m\u001b[35menoent\u001b[0m ENOENT: no such file or directory, open '/content/package.json'\n",
-      "\u001b[0m\u001b[37;40mnpm\u001b[0m \u001b[0m\u001b[30;43mWARN\u001b[0m\u001b[35m\u001b[0m content No description\n",
-      "\u001b[0m\u001b[37;40mnpm\u001b[0m \u001b[0m\u001b[30;43mWARN\u001b[0m\u001b[35m\u001b[0m content No repository field.\n",
-      "\u001b[0m\u001b[37;40mnpm\u001b[0m \u001b[0m\u001b[30;43mWARN\u001b[0m\u001b[35m\u001b[0m content No README data\n",
-      "\u001b[0m\u001b[37;40mnpm\u001b[0m \u001b[0m\u001b[30;43mWARN\u001b[0m\u001b[35m\u001b[0m content No license field.\n",
-      "\u001b[0m\n",
-      "\u001b[K\u001b[?25h+ localtunnel@2.0.2\n",
-      "added 22 packages from 22 contributors and audited 22 packages in 5.903s\n",
-      "\n",
-      "3 packages are looking for funding\n",
-      "  run `npm fund` for details\n",
-      "\n",
-      "found \u001b[92m0\u001b[0m vulnerabilities\n",
-      "\n",
-      "\u001b[K\u001b[?25h"
-     ]
-    }
+   "outputs": [],
+   "source": [
+    "!npm install localtunnel"
    ]
   },
   {
    "cell_type": "markdown",
-   "source": [
-    "### Create application"
-   ],
    "metadata": {
     "id": "KUAfucnYITka"
-   }
+   },
+   "source": [
+    "### Create application"
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "9Wb7GOWMXFnF",
+    "outputId": "6db23ef3-b25e-4f80-a3cb-6d08c1c78c16"
+   },
+   "outputs": [],
    "source": [
     "%%writefile app.py\n",
     "\n",
@@ -1176,109 +609,107 @@
     "\n",
     "\n",
     "if __name__ == \"__main__\":\n",
-    "    main()\n",
-    "\n"
-   ],
-   "metadata": {
-    "id": "9Wb7GOWMXFnF",
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "outputId": "6db23ef3-b25e-4f80-a3cb-6d08c1c78c16"
-   },
-   "execution_count": 36,
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": [
-      "Overwriting app.py\n"
-     ]
-    }
+    "    main()"
    ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {
+    "id": "CjDhvbGhHuiz"
+   },
    "source": [
     "### Run app\n",
     "Run the application and check your IP for the tunneling"
-   ],
-   "metadata": {
-    "id": "CjDhvbGhHuiz"
-   }
+   ]
   },
   {
    "cell_type": "code",
-   "source": [
-    "!streamlit run app.py &>/content/logs.txt & curl ipv4.icanhazip.com"
-   ],
+   "execution_count": null,
    "metadata": {
-    "id": "851CeYi8jvuF",
     "colab": {
      "base_uri": "https://localhost:8080/"
     },
+    "id": "851CeYi8jvuF",
     "outputId": "46a64023-e990-4900-f482-5558237f08cc"
    },
-   "execution_count": 37,
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": [
-      "34.138.156.22\n"
-     ]
-    }
+   "outputs": [],
+   "source": [
+    "!streamlit run app.py &>/content/logs.txt & curl ipv4.icanhazip.com"
    ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {
+    "id": "4OuSLFHyHy5M"
+   },
    "source": [
     "### Create the tunnel\n",
     "Run the tunnel and use the link below to connect to the tunnel.\n",
     "\n",
     "Use the IP from the previous step to connect to the application"
-   ],
-   "metadata": {
-    "id": "4OuSLFHyHy5M"
-   }
+   ]
   },
   {
    "cell_type": "code",
-   "source": [
-    "!npx localtunnel --port 8501"
-   ],
+   "execution_count": 38,
    "metadata": {
-    "id": "inF7ceBmjyE3",
     "colab": {
      "base_uri": "https://localhost:8080/"
     },
+    "id": "inF7ceBmjyE3",
     "outputId": "559ce180-3f0f-4475-c9a9-46dc91389276"
    },
-   "execution_count": 38,
    "outputs": [
     {
-     "output_type": "stream",
      "name": "stdout",
+     "output_type": "stream",
      "text": [
       "\u001b[K\u001b[?25hnpx: installed 22 in 2.186s\n",
       "your url is: https://nine-facts-act.loca.lt\n",
       "^C\n"
      ]
     }
+   ],
+   "source": [
+    "!npx localtunnel --port 8501"
    ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {
+    "id": "SbxbVzvQ7caR"
+   },
    "source": [
     "# Resources\n",
     "\n",
     "Blog: https://www.elastic.co/blog/implement-image-similarity-search-elastic\n",
     "\n",
     "GH  : https://github.com/radoondas/flask-elastic-image-search\n"
-   ],
-   "metadata": {
-    "id": "SbxbVzvQ7caR"
-   }
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
   }
- ]
-}
\ No newline at end of file
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}