Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ideas for lucene query perf #263

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -60,15 +60,17 @@ const VariantTableWidget = observer(props => {
return obj
}))
setTotalHits(data.totalHits)
setLastDoc(data.lastDoc)
setLastScore(data.lastScore)
setDataLoaded(true)
}

function handleModalClose(widget) {
session.hideWidget(widget)
}

function handleQuery(passedFilters, pushToHistory, pageQueryModel = pageSizeModel) {
const { page = pageSizeModel.page, pageSize = pageSizeModel.pageSize } = pageQueryModel;
function handleQuery(passedFilters, pushToHistory, paginate, pageQueryModel = pageSizeModel) {
const { pageSize = pageSizeModel.pageSize } = pageQueryModel;

const encodedSearchString = createEncodedFilterString(passedFilters, false);
const currentUrl = new URL(window.location.href);
Expand All @@ -82,7 +84,7 @@ const VariantTableWidget = observer(props => {

setFilters(passedFilters);
setDataLoaded(false)
fetchLuceneQuery(passedFilters, sessionId, trackGUID, page, pageSize, (json)=>{handleSearch(json)}, (error) => {setDataLoaded(true); setError(error)});
fetchLuceneQuery(passedFilters, sessionId, trackGUID, pageSize, paginate ? lastDoc : -1, paginate ? lastScore : -1, (json)=>{handleSearch(json)}, (error) => {setDataLoaded(true); setError(error)});
}

const TableCellWithPopover = (props: { value: any }) => {
Expand Down Expand Up @@ -223,6 +225,8 @@ const VariantTableWidget = observer(props => {
const [filterModalOpen, setFilterModalOpen] = useState(false);
const [filters, setFilters] = useState([]);
const [totalHits, setTotalHits] = useState(0);
const [lastDoc, setLastDoc] = useState(-1);
const [lastScore, setLastScore] = useState(-1);
const [fieldTypeInfo, setFieldTypeInfo] = useState<FieldModel[]>([]);
const [allowedGroupNames, setAllowedGroupNames] = useState<string[]>([]);
const [promotedFilters, setPromotedFilters] = useState<Map<string, Filter[]>>(null);
Expand Down Expand Up @@ -281,7 +285,7 @@ const VariantTableWidget = observer(props => {
setAllowedGroupNames(groups)
setPromotedFilters(promotedFilters)

handleQuery(searchStringToInitialFilters(fields.map((x) => x.name)), false)
handleQuery(searchStringToInitialFilters(fields.map((x) => x.name)), false, false)
},
(error) => {
setError(error)
Expand Down Expand Up @@ -403,7 +407,7 @@ const VariantTableWidget = observer(props => {
paginationMode="server"
onPaginationModelChange = {(newModel) => {
setPageSizeModel(newModel)
handleQuery(filters, true, newModel)
handleQuery(filters, true, true, newModel)
}}
onColumnVisibilityModelChange={(model) => {
setColumnVisibilityModel(model)
Expand Down Expand Up @@ -440,7 +444,7 @@ const VariantTableWidget = observer(props => {
fieldTypeInfo: fieldTypeInfo,
allowedGroupNames: allowedGroupNames,
promotedFilters: promotedFilters,
handleQuery: (filters) => handleQuery(filters, true)
handleQuery: (filters) => handleQuery(filters, true, false)
}}
/>
);
Expand Down
15 changes: 9 additions & 6 deletions jbrowse/src/client/JBrowse/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -414,11 +414,7 @@ function generateLuceneString(field, operator, value) {
return luceneQueryString;
}

export async function fetchLuceneQuery(filters, sessionId, trackGUID, offset, pageSize, successCallback, failureCallback) {
if (!offset) {
offset = 0
}

export async function fetchLuceneQuery(filters, sessionId, trackGUID, pageSize, lastDoc, lastScore, successCallback, failureCallback) {
if (!sessionId) {
failureCallback("There was an error: " + "Lucene query: no session ID")
return
Expand All @@ -444,7 +440,14 @@ export async function fetchLuceneQuery(filters, sessionId, trackGUID, offset, pa
failure: function(res) {
failureCallback("There was an error: " + res.status + "\n Status Body: " + res.responseText + "\n Session ID:" + sessionId)
},
params: {"searchString": createEncodedFilterString(filters, true), "sessionId": sessionId, "trackId": trackGUID, "offset": offset, "pageSize": pageSize},
params: {
"searchString": createEncodedFilterString(filters, true),
"sessionId": sessionId,
"trackId": trackGUID,
"pageSize": pageSize,
"lastDoc": lastDoc || -1,
"lastScore": lastScore || -1
},
});
}

Expand Down
32 changes: 22 additions & 10 deletions jbrowse/src/org/labkey/jbrowse/JBrowseController.java
Original file line number Diff line number Diff line change
Expand Up @@ -910,7 +910,7 @@ public ApiResponse execute(LuceneQueryForm form, BindException errors)

try
{
return new ApiSimpleResponse(searcher.doSearch(getUser(), PageFlowUtil.decode(form.getSearchString()), form.getPageSize(), form.getOffset()));
return new ApiSimpleResponse(searcher.doSearch(getUser(), PageFlowUtil.decode(form.getSearchString()), form.getPageSize(), form.getLastDoc(), form.getLastScore()));
}
catch (Exception e)
{
Expand Down Expand Up @@ -945,7 +945,9 @@ public static class LuceneQueryForm

private int _pageSize = 100;

private int _offset = 0;
private int _lastDoc = -1;

private int _lastScore = -1;

public String getSearchString()
{
Expand Down Expand Up @@ -977,24 +979,34 @@ public void setPageSize(int pageSize)
_pageSize = pageSize;
}

public int getOffset()
public String getTrackId()
{
return _offset;
return _trackId;
}

public void setOffset(int offset)
public void setTrackId(String trackId)
{
_offset = offset;
_trackId = trackId;
}

public String getTrackId()
public int getLastDoc()
{
return _trackId;
return _lastDoc;
}

public void setTrackId(String trackId)
public void setLastDoc(int lastDoc)
{
_trackId = trackId;
_lastDoc = lastDoc;
}

public int getLastScore()
{
return _lastScore;
}

public void setLastScore(int lastScore)
{
_lastScore = lastScore;
}
}

Expand Down
59 changes: 29 additions & 30 deletions jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.search.SortedNumericSortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.jetbrains.annotations.Nullable;
Expand Down Expand Up @@ -57,6 +59,8 @@

public class JBrowseLuceneSearch
{
private static final String ALL_DOCS = "all";

private final JBrowseSession _session;
private final JsonFile _jsonFile;
private final User _user;
Expand Down Expand Up @@ -130,10 +134,10 @@ public String extractFieldName(String queryString) {
return parts.length > 0 ? parts[0].trim() : null;
}

public JSONObject doSearch(User u, String searchString, final int pageSize, final int offset) throws IOException, ParseException
public JSONObject doSearch(User u, String searchString, final int pageSize, final int lastDoc, final int lastScore) throws IOException, ParseException
{
searchString = tryUrlDecode(searchString);
File indexPath = _jsonFile.getExpectedLocationOfLuceneIndex(true);
File indexPath = new File("C:\\Users\\sebas\\Desktop\\lucene");//_jsonFile.getExpectedLocationOfLuceneIndex(true);
Map<String, JBrowseFieldDescriptor> fields = JBrowseFieldUtils.getIndexedFields(_jsonFile, u, getContainer());

// Open directory of lucene path, get a directory reader, and create the index search manager
Expand Down Expand Up @@ -182,14 +186,14 @@ public JSONObject doSearch(User u, String searchString, final int pageSize, fina

BooleanQuery.Builder booleanQueryBuilder = new BooleanQuery.Builder();

if (searchString.equals("all")) {
if (searchString.equals(ALL_DOCS)) {
booleanQueryBuilder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
}

// Split input into tokens, 1 token per query separated by &
StringTokenizer tokenizer = new StringTokenizer(searchString, "&");

while (tokenizer.hasMoreTokens() && !searchString.equals("all"))
while (tokenizer.hasMoreTokens() && !searchString.equals(ALL_DOCS))
{
String queryString = tokenizer.nextToken();
Query query = null;
Expand Down Expand Up @@ -226,39 +230,26 @@ else if (numericQueryParserFields.contains(fieldName))

BooleanQuery query = booleanQueryBuilder.build();

// Get chunks of size {pageSize}. Default to 1 chunk -- add to the offset to get more.
// We then iterate over the range of documents we want based on the offset. This does grow in memory
// linearly with the number of documents, but my understanding is that these are just score,id pairs
// rather than full documents, so mem usage *should* still be pretty low.
//TopDocs topDocs = indexSearcher.search(query, pageSize * (offset + 1));

// Define sort field
SortField sortField = new SortField("pos", SortField.Type.INT, false);
Sort sort = new Sort(sortField);
Sort sort = Sort.INDEXORDER;

// Perform the search with sorting
TopFieldDocs topDocs = indexSearcher.search(query, pageSize * (offset + 1), sort);
TopDocs topDocs;
if (lastDoc > -1) {
ScoreDoc lastScoreDoc = new ScoreDoc(lastDoc, lastScore);
topDocs = indexSearcher.searchAfter(lastScoreDoc, query, pageSize, sort);
} else {
topDocs = indexSearcher.search(query, pageSize, sort);
}

JSONObject results = new JSONObject();

// Iterate over the doc list, (either to the total end or until the page ends) grab the requested docs,
// and add to returned results
List<JSONObject> data = new ArrayList<>();
for (int i = pageSize * offset; i < Math.min(pageSize * (offset + 1), topDocs.scoreDocs.length); i++)
{
for (ScoreDoc sd : topDocs.scoreDocs) {
JSONObject elem = new JSONObject();
Document doc = indexSearcher.doc(topDocs.scoreDocs[i].doc);
Document doc = indexSearcher.doc(sd.doc);

for (IndexableField field : doc.getFields()) {
String fieldName = field.name();
String[] fieldValues = doc.getValues(fieldName);
if (fieldValues.length > 1) {
// If there is more than one value, put the array of values into the JSON object.
elem.put(fieldName, fieldValues);
} else {
// If there is only one value, just put this single value into the JSON object.
elem.put(fieldName, fieldValues[0]);
}
elem.put(fieldName, fieldValues.length > 1 ? fieldValues : fieldValues[0]);
}

data.add(elem);
Expand All @@ -267,7 +258,15 @@ else if (numericQueryParserFields.contains(fieldName))
results.put("data", data);
results.put("totalHits", topDocs.totalHits.value);

//TODO: we should probably stream this
if(topDocs.scoreDocs.length > 0) {
ScoreDoc sortedLastDoc = topDocs.scoreDocs[topDocs.scoreDocs.length - 1];
results.put("lastDoc", sortedLastDoc.doc);
results.put("lastScore", Float.isNaN(sortedLastDoc.score) ? -1 : sortedLastDoc.score);
} else {
results.put("lastDoc", -1);
results.put("lastScore", -1);
}

return results;
}
}
Expand Down
Loading