From faf95c988ad1fc80a838f039f10be6ee802336d9 Mon Sep 17 00:00:00 2001 From: Likith B Date: Fri, 2 Feb 2024 10:45:10 +0530 Subject: [PATCH] MB-59616: Adding vector_base64 field - Changed the quick editor, regular editor and import to incorporate a new field type called vector_base64. - It acts the same as vector field at this layer. - vectorFieldsExistWithinIndexMapping(...) now also detects vector_base64 - Added feature check for base64 by attaching it to xattrs Change-Id: I3c7a9839e0c09a8e2a06fc92dff076f6a88e2ed5 Reviewed-on: https://review.couchbase.org/c/cbft/+/204899 Reviewed-by: Abhi Dangeti Tested-by: Abhi Dangeti Well-Formed: Build Bot Well-Formed: Restriction Checker --- cmd/cbft/main.go | 2 +- ns_server_static/fts/fts.js | 12 +- ns_server_static/fts/fts_easy_field.js | 2 +- ns_server_static/fts/fts_easy_mapping.js | 10 ++ ns_server_static/fts/fts_easy_parse.js | 18 ++ ns_server_static/fts/fts_new_easy.html | 8 +- .../js/mapping/type-mapping.js | 2 +- .../partials/mapping/type-mapping-tree.html | 10 +- pindex_bleve.go | 76 +++++--- pindex_bleve_test.go | 162 ++++++++++++++++++ 10 files changed, 264 insertions(+), 38 deletions(-) diff --git a/cmd/cbft/main.go b/cmd/cbft/main.go index eec3c8b6..3ec4fb7a 100644 --- a/cmd/cbft/main.go +++ b/cmd/cbft/main.go @@ -343,7 +343,7 @@ func mainStart(cfg cbgt.Cfg, uuid string, tags []string, container string, "," + cbft.FeatureFileTransferRebalance + "," + cbft.FeatureGeoSpatial + cbft.FeatureVectorSearchSupport() + - "," + cbft.FeatureXattrs + "," + cbft.FeatureXattrsAndBase64 extrasMap["version-cbft.app"] = version extrasMap["version-cbft.lib"] = cbft.VERSION diff --git a/ns_server_static/fts/fts.js b/ns_server_static/fts/fts.js index ac7dbc93..b08bc7f8 100644 --- a/ns_server_static/fts/fts.js +++ b/ns_server_static/fts/fts.js @@ -1732,7 +1732,7 @@ function IndexNewCtrlFT_NS($scope, $http, $state, $stateParams, mapping.docvalues = value.fields[i].docvalues } - if (mapping.type == "vector") { + if (mapping.type == "vector" || mapping.type == "vector_base64") { if ("dims" in value.fields[i]) { mapping.dims = value.fields[i].dims } @@ -2522,6 +2522,16 @@ function IndexNewCtrlFTEasy_NS($scope, $http, $state, $stateParams, $scope.editField.similarity = "dot_product"; } $scope.editField.vector_index_optimized_for = "recall"; + } else if (valType === "vector_base64") { + $scope.editField.type = "vector_base64"; + var dims = parsedDoc.getDims(newRow); + $scope.editField.dims = dims; + if (dims && dims <= 4) { + $scope.editField.similarity = "l2_norm"; + } else { + $scope.editField.similarity = "dot_product"; + } + $scope.editField.vector_index_optimized_for = "recall"; } else { // default to text if we aren't sure $scope.editField.type = "text"; diff --git a/ns_server_static/fts/fts_easy_field.js b/ns_server_static/fts/fts_easy_field.js index 40972512..8311ac9e 100644 --- a/ns_server_static/fts/fts_easy_field.js +++ b/ns_server_static/fts/fts_easy_field.js @@ -56,7 +56,7 @@ function newEditField() { rv = "boolean "; } else if (this.type == "IP") { rv = "IP "; - } else if (this.type == "vector") { + } else if (this.type == "vector" || this.type == "vector_base64") { rv = "vector (dims: " + this.dims + "; metric: " + this.similarity + "; optimized for: " + this.vector_index_optimized_for + ")"; } diff --git a/ns_server_static/fts/fts_easy_mapping.js b/ns_server_static/fts/fts_easy_mapping.js index 4c1057c2..6a27addf 100644 --- a/ns_server_static/fts/fts_easy_mapping.js +++ b/ns_server_static/fts/fts_easy_mapping.js @@ -288,6 +288,12 @@ function newEasyMapping() { return fieldMapping; }; + var newVectorBase64Field = function(field) { + var fieldMapping = newVectorField(field) + fieldMapping.type = "vector_base64"; + return fieldMapping; + }; + var addDocumentMappingFromPathField = function(mapping, path, field) { // split dotted-path into path elements var pathElements = path.split('.'); @@ -332,6 +338,8 @@ function newEasyMapping() { mapping.fields.push(newIPField(field)); } else if (field.type == "vector") { mapping.fields.push(newVectorField(field)); + } else if (field.type == "vector_base64") { + mapping.fields.push(newVectorBase64Field(field)) } }; @@ -406,6 +414,8 @@ function newEasyMapping() { editField.type = "IP"; } else if (field.type == "vector") { editField.type = "vector"; + } else if (field.type == "vector_base64") { + editField.type = "vector_base64"; } // finish some common settings diff --git a/ns_server_static/fts/fts_easy_parse.js b/ns_server_static/fts/fts_easy_parse.js index 01fe44b3..aa4c29e5 100644 --- a/ns_server_static/fts/fts_easy_parse.js +++ b/ns_server_static/fts/fts_easy_parse.js @@ -145,6 +145,15 @@ function parseDocument(doc) { } } + // check whether the object is a vector_base64 + if (rowTypes[col] === "string") { + var vecLen = parseBase64Length(parsedObj[rowPaths[col]]) + if (vecLen > 2) { + dims[col] = vecLen + return "vector_base64" + } + } + return rowTypes[col]; }, getDocument: function () { @@ -164,4 +173,13 @@ function parseDocument(doc) { }; } +function parseBase64Length(str) { + try { + var vecStr = atob(str) + return vecStr.length / 4 + } catch { + return -1 + } +} + export { newParsedDocs }; diff --git a/ns_server_static/fts/fts_new_easy.html b/ns_server_static/fts/fts_new_easy.html index 90ad3277..a1d2200c 100644 --- a/ns_server_static/fts/fts_new_easy.html +++ b/ns_server_static/fts/fts_new_easy.html @@ -252,7 +252,7 @@

Edit Fields

-
+
Edit Fields
-
+
Edit Fields
-
Edit Fields
-
+
diff --git a/ns_server_static/fts/static-bleve-mapping/js/mapping/type-mapping.js b/ns_server_static/fts/static-bleve-mapping/js/mapping/type-mapping.js index 5d025fa7..9418ff41 100644 --- a/ns_server_static/fts/static-bleve-mapping/js/mapping/type-mapping.js +++ b/ns_server_static/fts/static-bleve-mapping/js/mapping/type-mapping.js @@ -51,7 +51,7 @@ function initBleveTypeMappingController($scope, typeMappingIn, options) { var mappings = bleveConvertFromTypeMapping(typeMappingIn); - $scope.fieldTypes = ['text', 'number', 'datetime', 'boolean', 'disabled', 'geopoint', 'geoshape', 'IP', 'vector']; + $scope.fieldTypes = ['text', 'number', 'datetime', 'boolean', 'disabled', 'geopoint', 'geoshape', 'IP', 'vector', 'vector_base64']; var kindAttrs = { "field": { diff --git a/ns_server_static/fts/static-bleve-mapping/partials/mapping/type-mapping-tree.html b/ns_server_static/fts/static-bleve-mapping/partials/mapping/type-mapping-tree.html index d5238df0..5c6af6e0 100644 --- a/ns_server_static/fts/static-bleve-mapping/partials/mapping/type-mapping-tree.html +++ b/ns_server_static/fts/static-bleve-mapping/partials/mapping/type-mapping-tree.html @@ -246,7 +246,7 @@
-
+
-
+