From 4b426540e7665a9cfe4919c12155a9d5f475e5cc Mon Sep 17 00:00:00 2001 From: "Chaunte W. Lacewell" Date: Mon, 27 Jan 2025 12:21:23 -0500 Subject: [PATCH] Update faiss version in both Dockerfiles and any required API updates (#251) * Update faiss version in both Dockerfiles * update Faiss version to latest and add IP metric to Flat HNSW * Automated updates: Format and/or coverage --------- Co-authored-by: s-gobriel Co-authored-by: sys_vdms --- .../coverage/cpp.develop.coverage_report.txt | 4 +- .../coverage/cpp.develop.coverage_value.txt | 2 +- .github/scripts/Dockerfile.checkin | 2 +- docker/base/Dockerfile | 4 +- src/vcl/FaissDescriptorSet.cc | 3 ++ tests/unit_tests/DescriptorSetAdd_test.cc | 45 +++++++++++++++++++ tests/unit_tests/helpers.cc | 31 +++++++++++++ tests/unit_tests/helpers.h | 5 +++ 8 files changed, 90 insertions(+), 6 deletions(-) diff --git a/.github/coverage/cpp.develop.coverage_report.txt b/.github/coverage/cpp.develop.coverage_report.txt index 689716ee..da620eba 100644 --- a/.github/coverage/cpp.develop.coverage_report.txt +++ b/.github/coverage/cpp.develop.coverage_report.txt @@ -34,7 +34,7 @@ src/vcl/CustomVCL.cc 51 22 43% 55,57-58,60-63 src/vcl/DescriptorSet.cc 209 155 74% 65,69-70,93-94,115-116,133,135,137,190-193,196,221-222,224-225,228-231,240-244,256,268,319-320,323,325-328,331,345-346,348-350,354-356,358,365-367,369-370,373-374 src/vcl/DescriptorSetData.cc 55 47 85% 48,58,64,67,114,116-118 src/vcl/Exception.cc 7 6 85% 38 -src/vcl/FaissDescriptorSet.cc 206 177 85% 83,115-116,132,167,187-188,204-205,224-225,238-239,245,258-259,261,272-273,279,303-304,306-307,309,372-373,379,397 +src/vcl/FaissDescriptorSet.cc 209 177 84% 83,115-116,132,167,187-188,204-205,224-225,238-239,245,258-259,261,272-273,279,303-304,306-307,309,372-373,379,394-396,400 src/vcl/FlinngDescriptorSet.cc 149 109 73% 60-66,89,109-111,113-114,118-121,124,126,128,130,132,134-137,140-141,143-144,170-171,176-177,182,206,208,228,248,279 src/vcl/Image.cc 910 689 75% 62,73-74,76-78,81-84,86,92,101,122-123,125,132-133,135,147,165,170,193,196-199,223,246,249-252,264,273,276-279,291,323,326-329,341,347,349-352,360-362,369,393-396,415,417,425,427,432,436,441,445,459,462,467-468,471-472,474,490,500,513,531,553-556,594,605-606,608,615,619,624,627-630,658-660,712,757-758,809,838-842,844-850,852,854-855,896,899-900,939-940,944-945,966,985-986,988,1028-1030,1032-1036,1038-1042,1044-1048,1050-1054,1056-1060,1062-1065,1088,1109,1128-1136,1147-1148,1167-1186,1198-1199,1207,1218,1220-1222,1224-1226,1228,1242,1246-1247,1249,1254-1255,1257,1278,1282,1285,1292,1307,1313,1322,1336,1361,1379,1462,1481 src/vcl/KeyFrame.cc 303 244 80% 58,62,86,90,95,97,102,105-107,109-111,113,119,139,148,154,172,186,190,216,220,224,235,239,249,255,274,284,288,307,315,341,345,347,359,367,369,394,396,405,430,442,449,465,469,478,483,495,500,507,514,518,525,541,547,557,563 @@ -57,5 +57,5 @@ utils/src/comm/Exception.cc 6 0 0% 35-40 utils/src/stats/SystemStats.cc 250 249 99% 453 utils/src/timers/TimerMap.cc 82 75 91% 126,151,153,155-158 ------------------------------------------------------------------------------ -TOTAL 10243 6572 64% +TOTAL 10246 6572 64% ------------------------------------------------------------------------------ diff --git a/.github/coverage/cpp.develop.coverage_value.txt b/.github/coverage/cpp.develop.coverage_value.txt index 2b9c7c96..34d8017c 100644 --- a/.github/coverage/cpp.develop.coverage_value.txt +++ b/.github/coverage/cpp.develop.coverage_value.txt @@ -1 +1 @@ -64.1609 +64.1421 diff --git a/.github/scripts/Dockerfile.checkin b/.github/scripts/Dockerfile.checkin index 6240755c..dd508f04 100644 --- a/.github/scripts/Dockerfile.checkin +++ b/.github/scripts/Dockerfile.checkin @@ -69,7 +69,7 @@ WORKDIR /dependencies ENV AUTOCONF_VERSION="2.71" \ AWS_SDK_VERSION="1.11.336" \ CMAKE_VERSION="v3.28.5" \ - FAISS_VERSION="v1.7.4" \ + FAISS_VERSION="v1.9.0" \ LIBEDIT_VERSION="20230828-3.1" \ OPENCV_VERSION="4.9.0" \ PEG_VERSION="0.1.19" \ diff --git a/docker/base/Dockerfile b/docker/base/Dockerfile index 30b5c796..aa83577d 100644 --- a/docker/base/Dockerfile +++ b/docker/base/Dockerfile @@ -69,7 +69,7 @@ WORKDIR /dependencies ENV AUTOCONF_VERSION="2.71" \ AWS_SDK_VERSION="1.11.336" \ CMAKE_VERSION="v3.28.5" \ - FAISS_VERSION="v1.7.4" \ + FAISS_VERSION="v1.9.0" \ LIBEDIT_VERSION="20230828-3.1" \ OPENCV_VERSION="4.9.0" \ PEG_VERSION="0.1.19" \ @@ -188,7 +188,7 @@ RUN apt-get update -y && apt-get upgrade -y && \ libssl-dev libswscale-dev libtbb-dev libtbbmalloc2 libtiff5-dev libzip-dev openjdk-17-jdk-headless \ procps && \ apt-get --purge remove -y python3.11 && apt-get autoremove -y && \ - apt-get clean && rm -rf /var/lib/apt/lists/* && \ + apt-get clean && rm -rf /var/lib/apt/lists/* && \ echo "/usr/local/lib" >> /etc/ld.so.conf.d/all-libs.conf && ldconfig && \ python3 -m pip install --no-cache-dir "numpy>=${NUMPY_MIN_VERSION},<2.0.0" "protobuf==4.${PROTOBUF_VERSION}" \ "coverage>=7.3.1" "cryptography>=42.0.7" diff --git a/src/vcl/FaissDescriptorSet.cc b/src/vcl/FaissDescriptorSet.cc index 4adb5e52..257b0d45 100644 --- a/src/vcl/FaissDescriptorSet.cc +++ b/src/vcl/FaissDescriptorSet.cc @@ -391,6 +391,9 @@ FaissHNSWFlatDescriptorSet::FaissHNSWFlatDescriptorSet( if (metric == L2) { _index = new faiss::IndexHNSWFlat(dim, hnsw_M, faiss::METRIC_L2); ((faiss::IndexHNSWFlat *)_index)->hnsw.efConstruction = 96; + } else if (metric == IP) { + _index = new faiss::IndexHNSWFlat(dim, hnsw_M, faiss::METRIC_INNER_PRODUCT); + ((faiss::IndexHNSWFlat *)_index)->hnsw.efConstruction = 96; } else { // only metric L2 is supported for HNSWFLAT for FAISS v1.7.4 // newer version of Faiss e.g. V1.8.0 supports I.P. metric for HNSW diff --git a/tests/unit_tests/DescriptorSetAdd_test.cc b/tests/unit_tests/DescriptorSetAdd_test.cc index a90ebac5..45e5aa55 100644 --- a/tests/unit_tests/DescriptorSetAdd_test.cc +++ b/tests/unit_tests/DescriptorSetAdd_test.cc @@ -343,6 +343,51 @@ TEST(Descriptors_Add, add_hnswflatl2_100d_2add) { delete[] xb; } +TEST(Descriptors_Add, add_hnswflatip_100d) { + + // test to add 100 descriptors of 100D each + // descriptors are created by varying an init with a cyclic value + // init init ... init (D times) + // init.11 init.11 ... init.11 (D times) + // ... + // init.nb-1 init.nb-1 ... init.nb-1 (D times) + // hence, nearest neigbor of any query descriptor are the IDs that is next to + // the query ID + + int d = 100; + int nb = 100; // we are using 2 decimal points for I.P. nb is maximum 100 + float *xb = generate_desc_inner_product_increase(d, nb); + + std::string index_filename = "dbs/add_hnswflatip_100d"; + VCL::DescriptorSet index(index_filename, unsigned(d), VCL::FaissHNSWFlat); + + std::vector classes(nb); + + for (auto &str : classes) { + str = 1; + } + + index.add(xb, nb, classes); + + std::vector distances; + std::vector desc_ids; + index.search(xb, 1, 4, desc_ids, distances); + + int exp = 0; + for (auto &desc : desc_ids) { + EXPECT_EQ(desc, exp++); + } + + // Check that the distance of k neighbor is always less than k+1 neighbor + for (int i = 0; i < distances.size() - 1; ++i) { + EXPECT_LT(distances[i], distances[i + 1]); + } + + index.store(); + + delete[] xb; +} + // Flinng Tests TEST(Descriptors_Add, add_flinngIP_100d) { diff --git a/tests/unit_tests/helpers.cc b/tests/unit_tests/helpers.cc index bb4ffdb3..0046207f 100644 --- a/tests/unit_tests/helpers.cc +++ b/tests/unit_tests/helpers.cc @@ -213,6 +213,37 @@ float *generate_desc_linear_increase(int d, int nb, float init) { return xb; } +// Functions to generate descriptors close in inner product metric +// for example for d=3, nb=5 and init=1.0f +// the generated vectors will be +// Vector 0: [1.00, 1.00, 1.00] +// Vector 1: [1.11, 1.11, 1.11] +// Vector 2: [1.23, 1.23, 1.23] +// Vector 3: [1.36, 1.36, 1.36] +// Vector 4: [1.50, 1.50, 1.50] +// in the IP domain, closest vector to descriptor K is (K+1) + +void generate_desc_inner_product_increase(int d, int nb, float *xb, + float init) { + float val = init; + for (int i = 0; i < nb; ++i) { + for (int j = 0; j < d; ++j) { + xb[i * d + j] = val; + } + val += 0.1f + (i * 0.01f); // Increase increment as index increases + } +} + +float *generate_desc_inner_product_increase(int d, int nb, float init) { + float *xb = new float[d * nb]; + generate_desc_inner_product_increase(d, nb, xb, init); + return xb; +} + +// Functions to create a distribution of descriptors +// resulting descriptors are clustered around cluster heads +// nearest neighbors when the cluster head is used for query should be +// the remaining descriptors within the cluster (with no strict order) void generate_desc_normal_cluster(int d, int nb, float *xb, float init, int cluster_size, float clusterhead_std, float cluster_std) { diff --git a/tests/unit_tests/helpers.h b/tests/unit_tests/helpers.h index 2dc55563..5dbbcc50 100644 --- a/tests/unit_tests/helpers.h +++ b/tests/unit_tests/helpers.h @@ -60,6 +60,11 @@ void generate_desc_linear_increase(int d, int nb, float *xb, float init = 0); float *generate_desc_linear_increase(int d, int nb, float init = 0); +void generate_desc_inner_product_increase(int d, int nb, float *xb, + float init = 1.0); + +float *generate_desc_inner_product_increase(int d, int nb, float init = 1.0); + void generate_desc_normal_cluster(int d, int nb, float *xb, float init = 0, int cluster_size = 5, float clusterhead_std = 1.0,