From cecfe3d953979f456da7cb09f3fd21ecfb9bc0cb Mon Sep 17 00:00:00 2001 From: Sebastian Benjamin Date: Thu, 8 Feb 2024 15:30:11 -0800 Subject: [PATCH] Basic cursor search, WIP --- .../components/VariantTableWidget.tsx | 16 +++-- jbrowse/src/client/JBrowse/utils.ts | 15 ++-- .../org/labkey/jbrowse/JBrowseController.java | 14 +--- .../labkey/jbrowse/JBrowseLuceneSearch.java | 68 ++++++------------- 4 files changed, 40 insertions(+), 73 deletions(-) diff --git a/jbrowse/src/client/JBrowse/VariantSearch/components/VariantTableWidget.tsx b/jbrowse/src/client/JBrowse/VariantSearch/components/VariantTableWidget.tsx index afe99496c..577480508 100644 --- a/jbrowse/src/client/JBrowse/VariantSearch/components/VariantTableWidget.tsx +++ b/jbrowse/src/client/JBrowse/VariantSearch/components/VariantTableWidget.tsx @@ -60,6 +60,8 @@ const VariantTableWidget = observer(props => { return obj })) setTotalHits(data.totalHits) + setLastDoc(data.lastDoc) + setLastScore(data.lastScore) setDataLoaded(true) } @@ -67,8 +69,8 @@ const VariantTableWidget = observer(props => { session.hideWidget(widget) } - function handleQuery(passedFilters, pushToHistory, pageQueryModel = pageSizeModel) { - const { page = pageSizeModel.page, pageSize = pageSizeModel.pageSize } = pageQueryModel; + function handleQuery(passedFilters, pushToHistory, paginate, pageQueryModel = pageSizeModel) { + const { pageSize = pageSizeModel.pageSize } = pageQueryModel; const encodedSearchString = createEncodedFilterString(passedFilters, false); const currentUrl = new URL(window.location.href); @@ -82,7 +84,7 @@ const VariantTableWidget = observer(props => { setFilters(passedFilters); setDataLoaded(false) - fetchLuceneQuery(passedFilters, sessionId, trackGUID, page, pageSize, (json)=>{handleSearch(json)}, (error) => {setDataLoaded(true); setError(error)}); + fetchLuceneQuery(passedFilters, sessionId, trackGUID, pageSize, paginate ? lastDoc : -1, paginate ? lastScore : -1, (json)=>{handleSearch(json)}, (error) => {setDataLoaded(true); setError(error)}); } const TableCellWithPopover = (props: { value: any }) => { @@ -223,6 +225,8 @@ const VariantTableWidget = observer(props => { const [filterModalOpen, setFilterModalOpen] = useState(false); const [filters, setFilters] = useState([]); const [totalHits, setTotalHits] = useState(0); + const [lastDoc, setLastDoc] = useState(-1); + const [lastScore, setLastScore] = useState(-1); const [fieldTypeInfo, setFieldTypeInfo] = useState([]); const [allowedGroupNames, setAllowedGroupNames] = useState([]); const [promotedFilters, setPromotedFilters] = useState>(null); @@ -281,7 +285,7 @@ const VariantTableWidget = observer(props => { setAllowedGroupNames(groups) setPromotedFilters(promotedFilters) - handleQuery(searchStringToInitialFilters(fields.map((x) => x.name)), false) + handleQuery(searchStringToInitialFilters(fields.map((x) => x.name)), false, false) }, (error) => { setError(error) @@ -403,7 +407,7 @@ const VariantTableWidget = observer(props => { paginationMode="server" onPaginationModelChange = {(newModel) => { setPageSizeModel(newModel) - handleQuery(filters, true, newModel) + handleQuery(filters, true, true, newModel) }} onColumnVisibilityModelChange={(model) => { setColumnVisibilityModel(model) @@ -440,7 +444,7 @@ const VariantTableWidget = observer(props => { fieldTypeInfo: fieldTypeInfo, allowedGroupNames: allowedGroupNames, promotedFilters: promotedFilters, - handleQuery: (filters) => handleQuery(filters, true) + handleQuery: (filters) => handleQuery(filters, true, false) }} /> ); diff --git a/jbrowse/src/client/JBrowse/utils.ts b/jbrowse/src/client/JBrowse/utils.ts index 0502b0bc7..e7946d3a6 100644 --- a/jbrowse/src/client/JBrowse/utils.ts +++ b/jbrowse/src/client/JBrowse/utils.ts @@ -414,11 +414,7 @@ function generateLuceneString(field, operator, value) { return luceneQueryString; } -export async function fetchLuceneQuery(filters, sessionId, trackGUID, offset, pageSize, successCallback, failureCallback) { - if (!offset) { - offset = 0 - } - +export async function fetchLuceneQuery(filters, sessionId, trackGUID, pageSize, lastDoc, lastScore, successCallback, failureCallback) { if (!sessionId) { failureCallback("There was an error: " + "Lucene query: no session ID") return @@ -444,7 +440,14 @@ export async function fetchLuceneQuery(filters, sessionId, trackGUID, offset, pa failure: function(res) { failureCallback("There was an error: " + res.status + "\n Status Body: " + res.responseText + "\n Session ID:" + sessionId) }, - params: {"searchString": createEncodedFilterString(filters, true), "sessionId": sessionId, "trackId": trackGUID, "offset": offset, "pageSize": pageSize}, + params: { + "searchString": createEncodedFilterString(filters, true), + "sessionId": sessionId, + "trackId": trackGUID, + "pageSize": pageSize, + "lastDoc": lastDoc || -1, + "lastScore": lastScore || -1 + }, }); } diff --git a/jbrowse/src/org/labkey/jbrowse/JBrowseController.java b/jbrowse/src/org/labkey/jbrowse/JBrowseController.java index e363dc8eb..ec3c33b40 100644 --- a/jbrowse/src/org/labkey/jbrowse/JBrowseController.java +++ b/jbrowse/src/org/labkey/jbrowse/JBrowseController.java @@ -910,7 +910,7 @@ public ApiResponse execute(LuceneQueryForm form, BindException errors) try { - return new ApiSimpleResponse(searcher.doSearch(getUser(), PageFlowUtil.decode(form.getSearchString()), form.getPageSize(), form.getOffset(), form.getLastDoc(), form.getLastScore())); + return new ApiSimpleResponse(searcher.doSearch(getUser(), PageFlowUtil.decode(form.getSearchString()), form.getPageSize(), form.getLastDoc(), form.getLastScore())); } catch (Exception e) { @@ -945,8 +945,6 @@ public static class LuceneQueryForm private int _pageSize = 100; - private int _offset = 0; - private int _lastDoc = -1; private int _lastScore = -1; @@ -981,16 +979,6 @@ public void setPageSize(int pageSize) _pageSize = pageSize; } - public int getOffset() - { - return _offset; - } - - public void setOffset(int offset) - { - _offset = offset; - } - public String getTrackId() { return _trackId; diff --git a/jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java b/jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java index edffaea1d..3897ce426 100644 --- a/jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java +++ b/jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java @@ -134,10 +134,10 @@ public String extractFieldName(String queryString) { return parts.length > 0 ? parts[0].trim() : null; } - public JSONObject doSearch(User u, String searchString, final int pageSize, final int offset, final int lastDoc, final int lastScore) throws IOException, ParseException + public JSONObject doSearch(User u, String searchString, final int pageSize, final int lastDoc, final int lastScore) throws IOException, ParseException { searchString = tryUrlDecode(searchString); - File indexPath = _jsonFile.getExpectedLocationOfLuceneIndex(true); + File indexPath = new File("C:\\Users\\sebas\\Desktop\\lucene");//_jsonFile.getExpectedLocationOfLuceneIndex(true); Map fields = JBrowseFieldUtils.getIndexedFields(_jsonFile, u, getContainer()); // Open directory of lucene path, get a directory reader, and create the index search manager @@ -230,60 +230,26 @@ else if (numericQueryParserFields.contains(fieldName)) BooleanQuery query = booleanQueryBuilder.build(); - // TODO: if the index is generated with a sort on genomicPosition, maybe we just use Sort.INDEXORDER? - // Sort sort = Sort.INDEXORDER; - Sort sort = new Sort(new SortedNumericSortField("genomicPosition", SortField.Type.INT, false)); - - // Get chunks of size {pageSize}. Default to 1 chunk -- add to the offset to get more. - // We then iterate over the range of documents we want based on the offset. This does grow in memory - // linearly with the number of documents, but my understanding is that these are just score,id pairs - // Get chunks of size {pageSize}. Default to 1 chunk -- add to the offset to get more. - // We then iterate over the range of documents we want based on the offset. This does grow in memory - // linearly with the number of documents, but my understanding is that these are just score,id pairs - - // TODO: rather than a simple offset, the client should be able to provide the max(genomicPosition). - // We could add this as a lucene filter (i.e., get the first pageSize docs above that value), - // which might really reduce what we need to scan though. - // Note that filter is different than query string (https://javaranch.com/journal/2009/02/filtering-a-lucene-search.html) - // A second idea is to use searchAfter(). If we knew the max(genomicPosition) of the last query, we could directly add it to that method + Sort sort = Sort.INDEXORDER; + TopDocs topDocs; - //if (searchString.equals(ALL_DOCS)) - //{ - // // TODO: since this is loaded on each page, consider special casing it to grad the first 100 records as fast as possible. - // // this will return the index size - // indexSearcher.getIndexReader().numDocs() - //} - - if (lastDoc > -1) - { - topDocs = indexSearcher.searchAfter(new ScoreDoc(lastDoc, lastScore), query, pageSize, sort); - } - else - { - topDocs = indexSearcher.search(query, pageSize * (offset + 1), sort); + if (lastDoc > -1) { + ScoreDoc lastScoreDoc = new ScoreDoc(lastDoc, lastScore); + topDocs = indexSearcher.searchAfter(lastScoreDoc, query, pageSize, sort); + } else { + topDocs = indexSearcher.search(query, pageSize, sort); } JSONObject results = new JSONObject(); - - // Iterate over the doc list, (either to the total end or until the page ends) grab the requested docs, - // and add to returned results List data = new ArrayList<>(); - for (int i = pageSize * offset; i < Math.min(pageSize * (offset + 1), topDocs.scoreDocs.length); i++) - { + for (ScoreDoc sd : topDocs.scoreDocs) { JSONObject elem = new JSONObject(); - ScoreDoc sd = topDocs.scoreDocs[i]; Document doc = indexSearcher.doc(sd.doc); for (IndexableField field : doc.getFields()) { String fieldName = field.name(); String[] fieldValues = doc.getValues(fieldName); - if (fieldValues.length > 1) { - // If there is more than one value, put the array of values into the JSON object. - elem.put(fieldName, fieldValues); - } else { - // If there is only one value, just put this single value into the JSON object. - elem.put(fieldName, fieldValues[0]); - } + elem.put(fieldName, fieldValues.length > 1 ? fieldValues : fieldValues[0]); } data.add(elem); @@ -291,10 +257,16 @@ else if (numericQueryParserFields.contains(fieldName)) results.put("data", data); results.put("totalHits", topDocs.totalHits.value); - results.put("lastDoc", topDocs.scoreDocs[topDocs.scoreDocs.length - 1].doc); - results.put("lastScore", topDocs.scoreDocs[topDocs.scoreDocs.length - 1].score); - //TODO: we should probably stream this + if(topDocs.scoreDocs.length > 0) { + ScoreDoc sortedLastDoc = topDocs.scoreDocs[topDocs.scoreDocs.length - 1]; + results.put("lastDoc", sortedLastDoc.doc); + results.put("lastScore", Float.isNaN(sortedLastDoc.score) ? -1 : sortedLastDoc.score); + } else { + results.put("lastDoc", -1); + results.put("lastScore", -1); + } + return results; } }