Skip to content

Commit 9d5d688

Browse files
Merge pull request #58 from uc-cdis/improve_query_performance_and_use_dialect
Improve query performance and use dialect
2 parents 3649283 + 7b8d63e commit 9d5d688

File tree

5 files changed

+132
-46
lines changed

5 files changed

+132
-46
lines changed

models/cohortdata.go

+33-8
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ func (h CohortData) RetrieveDataBySourceIdAndCohortIdAndConceptIdsOrderedByPerso
6868

6969
// get the observations for the subjects and the concepts, to build up the data rows to return:
7070
var cohortData []*PersonConceptAndValue
71-
meta_result := omopDataSource.Db.Table(omopDataSource.Schema+".observation_continuous as observation").
71+
meta_result := omopDataSource.Db.Table(omopDataSource.Schema+".observation_continuous as observation"+omopDataSource.GetViewDirective()).
7272
Select("observation.person_id, observation.observation_concept_id as concept_id, observation.value_as_string as concept_value_as_string, observation.value_as_number as concept_value_as_number, observation.value_as_concept_id as concept_value_as_concept_id").
7373
Joins("INNER JOIN "+resultsDataSource.Schema+".cohort as cohort ON cohort.subject_id = observation.person_id").
7474
Where("cohort.cohort_definition_id = ?", cohortDefinitionId).
@@ -87,14 +87,14 @@ func (h CohortData) RetrieveHistogramDataBySourceIdAndCohortIdAndConceptIdsAndCo
8787

8888
// get the observations for the subjects and the concepts, to build up the data rows to return:
8989
var cohortData []*PersonConceptAndValue
90-
query := omopDataSource.Db.Table(omopDataSource.Schema+".observation_continuous as observation").
90+
query := omopDataSource.Db.Table(omopDataSource.Schema+".observation_continuous as observation"+omopDataSource.GetViewDirective()).
9191
Select("distinct(observation.person_id), observation.observation_concept_id as concept_id, observation.value_as_number as concept_value_as_number").
9292
Joins("INNER JOIN "+resultsDataSource.Schema+".cohort as cohort ON cohort.subject_id = observation.person_id").
9393
Where("cohort.cohort_definition_id = ?", cohortDefinitionId).
9494
Where("observation.observation_concept_id = ?", histogramConceptId).
9595
Where("observation.value_as_number is not null")
9696

97-
query = QueryFilterByConceptIdsAndCohortPairsHelper(query, filterConceptIds, filterCohortPairs, omopDataSource.Schema, resultsDataSource.Schema)
97+
query = QueryFilterByConceptIdsAndCohortPairsHelper(query, filterConceptIds, filterCohortPairs, omopDataSource, resultsDataSource.Schema, "observation")
9898

9999
meta_result := query.Scan(&cohortData)
100100
return cohortData, meta_result.Error
@@ -111,7 +111,7 @@ func (h CohortData) RetrieveCohortOverlapStats(sourceId int, caseCohortId int, c
111111

112112
// count persons that are in the intersection of both case and control cohorts, filtering on filterConceptValue:
113113
var cohortOverlapStats CohortOverlapStats
114-
query := omopDataSource.Db.Table(omopDataSource.Schema+".observation_continuous as observation").
114+
query := omopDataSource.Db.Table(omopDataSource.Schema+".observation_continuous as observation"+omopDataSource.GetViewDirective()).
115115
Select("count(distinct(observation.person_id)) as case_control_overlap").
116116
Joins("INNER JOIN "+resultsDataSource.Schema+".cohort as case_cohort ON case_cohort.subject_id = observation.person_id").
117117
Joins("INNER JOIN "+resultsDataSource.Schema+".cohort as control_cohort ON control_cohort.subject_id = case_cohort.subject_id"). // this one allows for the intersection between case and control and the assessment of the overlap
@@ -120,7 +120,7 @@ func (h CohortData) RetrieveCohortOverlapStats(sourceId int, caseCohortId int, c
120120
Where("observation.observation_concept_id = ?", filterConceptId).
121121
Where("observation.value_as_concept_id = ?", filterConceptValue)
122122

123-
query = QueryFilterByConceptIdsAndCohortPairsHelper(query, otherFilterConceptIds, filterCohortPairs, omopDataSource.Schema, resultsDataSource.Schema)
123+
query = QueryFilterByConceptIdsAndCohortPairsHelper(query, otherFilterConceptIds, filterCohortPairs, omopDataSource, resultsDataSource.Schema, "observation")
124124

125125
meta_result := query.Scan(&cohortOverlapStats)
126126
return cohortOverlapStats, meta_result.Error
@@ -130,20 +130,45 @@ func (h CohortData) RetrieveCohortOverlapStats(sourceId int, caseCohortId int, c
130130
func (h CohortData) RetrieveCohortOverlapStatsWithoutFilteringOnConceptValue(sourceId int, caseCohortId int, controlCohortId int,
131131
otherFilterConceptIds []int64, filterCohortPairs []utils.CustomDichotomousVariableDef) (CohortOverlapStats, error) {
132132

133+
// special case for when filter lists are empty:
134+
if len(otherFilterConceptIds) == 0 && len(filterCohortPairs) == 0 {
135+
// call the faster version of overlap check:
136+
return h.RetrieveCohortOverlapStatsWithoutFiltering(sourceId, caseCohortId, controlCohortId)
137+
}
138+
133139
var dataSourceModel = new(Source)
134140
omopDataSource := dataSourceModel.GetDataSource(sourceId, Omop)
135141
resultsDataSource := dataSourceModel.GetDataSource(sourceId, Results)
136142

137143
// count persons that are in the intersection of both case and control cohorts, filtering on filterConceptValue:
138144
var cohortOverlapStats CohortOverlapStats
139-
query := omopDataSource.Db.Table(omopDataSource.Schema+".observation_continuous as observation").
145+
query := omopDataSource.Db.Table(omopDataSource.Schema+".observation_continuous as observation"+omopDataSource.GetViewDirective()).
140146
Select("count(distinct(observation.person_id)) as case_control_overlap").
141147
Joins("INNER JOIN "+resultsDataSource.Schema+".cohort as case_cohort ON case_cohort.subject_id = observation.person_id").
142148
Joins("INNER JOIN "+resultsDataSource.Schema+".cohort as control_cohort ON control_cohort.subject_id = case_cohort.subject_id"). // this one allows for the intersection between case and control and the assessment of the overlap
143149
Where("case_cohort.cohort_definition_id = ?", caseCohortId).
144150
Where("control_cohort.cohort_definition_id = ?", controlCohortId)
145151

146-
query = QueryFilterByConceptIdsAndCohortPairsHelper(query, otherFilterConceptIds, filterCohortPairs, omopDataSource.Schema, resultsDataSource.Schema)
152+
query = QueryFilterByConceptIdsAndCohortPairsHelper(query, otherFilterConceptIds, filterCohortPairs, omopDataSource, resultsDataSource.Schema, "observation")
153+
154+
meta_result := query.Scan(&cohortOverlapStats)
155+
return cohortOverlapStats, meta_result.Error
156+
}
157+
158+
// Basically the same as the method above, but without any filtering on any concepts or on any CustomDichotomousVariableDef:
159+
func (h CohortData) RetrieveCohortOverlapStatsWithoutFiltering(sourceId int, caseCohortId int, controlCohortId int) (CohortOverlapStats, error) {
160+
161+
var dataSourceModel = new(Source)
162+
omopDataSource := dataSourceModel.GetDataSource(sourceId, Omop)
163+
resultsDataSource := dataSourceModel.GetDataSource(sourceId, Results)
164+
165+
// count persons that are in the intersection of both case and control cohorts, filtering on filterConceptValue:
166+
var cohortOverlapStats CohortOverlapStats
167+
query := omopDataSource.Db.Table(resultsDataSource.Schema+".cohort as case_cohort").
168+
Select("count(distinct(case_cohort.subject_id)) as case_control_overlap").
169+
Joins("INNER JOIN "+resultsDataSource.Schema+".cohort as control_cohort ON control_cohort.subject_id = case_cohort.subject_id"). // this one allows for the intersection between case and control and the assessment of the overlap
170+
Where("case_cohort.cohort_definition_id = ?", caseCohortId).
171+
Where("control_cohort.cohort_definition_id = ?", controlCohortId)
147172

148173
meta_result := query.Scan(&cohortOverlapStats)
149174
return cohortOverlapStats, meta_result.Error
@@ -173,7 +198,7 @@ func (h CohortData) ValidateObservationData(observationConceptIdsToCheck []int64
173198
log.Printf("INFO: checking if no duplicate data is found for concept ids %v in `observation` table of data source %d...",
174199
observationConceptIdsToCheck, source.SourceId)
175200
var personConceptAndCount []*PersonConceptAndCount
176-
query := omopDataSource.Db.Table(omopDataSource.Schema+".observation_continuous as observation").
201+
query := omopDataSource.Db.Table(omopDataSource.Schema+".observation_continuous as observation"+omopDataSource.GetViewDirective()).
177202
Select("observation.person_id, observation.observation_concept_id as concept_id, count(*)").
178203
Where("observation.observation_concept_id in (?)", observationConceptIdsToCheck).
179204
Group("observation.person_id, observation.observation_concept_id").

models/concept.go

+6-5
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ func (h Concept) RetrieveStatsBySourceIdAndCohortIdAndConceptIds(sourceId int, c
161161
// no value for this concept by first finding the ones that do have some value and
162162
// then subtracting them from cohort size before dividing:
163163
var conceptsAndPersonsWithData []*ConceptAndPersonsWithDataStats
164-
meta_result = omopDataSource.Db.Table(omopDataSource.Schema+".observation_continuous as observation").
164+
meta_result = omopDataSource.Db.Table(omopDataSource.Schema+".observation_continuous as observation"+omopDataSource.GetViewDirective()).
165165
Select("observation_concept_id as concept_id, count(distinct(person_id)) as nperson_ids").
166166
Joins("INNER JOIN "+resultsDataSource.Schema+".cohort as cohort ON cohort.subject_id = observation.person_id").
167167
Where("cohort.cohort_definition_id = ?", cohortDefinitionId).
@@ -220,16 +220,17 @@ func (h Concept) RetrieveBreakdownStatsBySourceIdAndCohortIdAndConceptIdsAndCoho
220220
// count persons, grouping by concept value:
221221
var breakdownValueFieldName = "observation.value_as_" + getConceptValueType(breakdownConceptId)
222222
var conceptBreakdownList []*ConceptBreakdown
223-
query := omopDataSource.Db.Table(omopDataSource.Schema+".observation_continuous as observation").
223+
query := QueryFilterByCohortPairsHelper(filterCohortPairs, resultsDataSource, cohortDefinitionId, "unionAndIntersect").
224224
Select("observation.value_as_concept_id, count(distinct(observation.person_id)) as npersons_in_cohort_with_value").
225-
Joins("INNER JOIN "+resultsDataSource.Schema+".cohort as cohort ON cohort.subject_id = observation.person_id").
226-
Where("cohort.cohort_definition_id = ?", cohortDefinitionId).
225+
Joins("INNER JOIN "+omopDataSource.Schema+".observation_continuous as observation"+omopDataSource.GetViewDirective()+" ON unionAndIntersect.subject_id = observation.person_id").
227226
Where("observation.observation_concept_id = ?", breakdownConceptId).
228227
Where(breakdownValueFieldName + " is not null"). // this one seems like a bit of a random constraint...but was a request from the business side: skip records where this field is null
229228
Where("observation.value_as_concept_id is not null"). // this is assuming that the breakdownConceptId has its values nicely stored as concepts as well and correctly used in observation table...
230229
Where("observation.value_as_concept_id != 0")
231230

232-
query = QueryFilterByConceptIdsAndCohortPairsHelper(query, filterConceptIds, filterCohortPairs, omopDataSource.Schema, resultsDataSource.Schema)
231+
// note: here we pass empty []utils.CustomDichotomousVariableDef{} instead of filterCohortPairs, since we already use the SQL generated by QueryFilterByCohortPairsHelper above,
232+
// which is a better performing SQL in this particular scenario:
233+
query = QueryFilterByConceptIdsAndCohortPairsHelper(query, filterConceptIds, []utils.CustomDichotomousVariableDef{}, omopDataSource, resultsDataSource.Schema, "observation")
233234

234235
meta_result := query.Group("observation.value_as_concept_id").
235236
Scan(&conceptBreakdownList)

models/helper.go

+43-8
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,14 @@ import (
1313
// a list of filters in the form of concept ids and cohort pairs. The one assumption it makes is that the given `query` object already contains
1414
// a basic query on a table or view that have been named or aliased as "observation" (see comments in code). This assumption is
1515
// checked at the start.
16-
func QueryFilterByConceptIdsAndCohortPairsHelper(query *gorm.DB, filterConceptIds []int64, filterCohortPairs []utils.CustomDichotomousVariableDef, omopSchemaName string, resultSchemaName string) *gorm.DB {
17-
// Validate assumption of a table or view aliased as "observation":
18-
if query.Statement.Table != "observation" {
19-
panic("Error: this QueryFilterByConceptIdsAndCohortPairsHelper is meant for adding extra filters to a query on a table or view with the alias name `observation`")
20-
}
21-
16+
func QueryFilterByConceptIdsAndCohortPairsHelper(query *gorm.DB, filterConceptIds []int64, filterCohortPairs []utils.CustomDichotomousVariableDef,
17+
omopDataSource *utils.DbAndSchema, resultSchemaName string, mainObservationTableAlias string) *gorm.DB {
2218
// iterate over the filterConceptIds, adding a new INNER JOIN and filters for each, so that the resulting set is the
2319
// set of persons that have a non-null value for each and every one of the concepts:
2420
for i, filterConceptId := range filterConceptIds {
2521
observationTableAlias := fmt.Sprintf("observation_filter_%d", i)
2622
log.Printf("Adding extra INNER JOIN with alias %s", observationTableAlias)
27-
query = query.Joins("INNER JOIN "+omopSchemaName+".observation_continuous as "+observationTableAlias+" ON "+observationTableAlias+".person_id = observation.person_id"). // assumption: there is a table or view named or aliased as "observation"
23+
query = query.Joins("INNER JOIN "+omopDataSource.Schema+".observation_continuous as "+observationTableAlias+omopDataSource.GetViewDirective()+" ON "+observationTableAlias+".person_id = "+mainObservationTableAlias+".person_id").
2824
Where(observationTableAlias+".observation_concept_id = ?", filterConceptId).
2925
Where("(" + observationTableAlias + ".value_as_string is not null or " + observationTableAlias + ".value_as_number is not null)") // TODO - improve performance by only filtering on type according to getConceptValueType()
3026
}
@@ -43,9 +39,48 @@ func QueryFilterByConceptIdsAndCohortPairsHelper(query *gorm.DB, filterConceptId
4339
" EXCEPT "+ //now use EXCEPT to exclude the part where both cohorts INTERSECT
4440
" Select "+cohortTableAlias2+".subject_id FROM "+resultSchemaName+".cohort as "+cohortTableAlias2+
4541
" INNER JOIN "+resultSchemaName+".cohort as "+cohortTableAlias3+" ON "+cohortTableAlias3+".subject_id = "+cohortTableAlias2+".subject_id "+
46-
" where "+cohortTableAlias2+".cohort_definition_id = ? AND "+cohortTableAlias3+".cohort_definition_id =? ) AS "+unionExceptAlias+" ON "+unionExceptAlias+".subject_id = observation.person_id", // assumption: there is a table or view named or aliased as "observation"
42+
" where "+cohortTableAlias2+".cohort_definition_id = ? AND "+cohortTableAlias3+".cohort_definition_id =? ) AS "+unionExceptAlias+" ON "+unionExceptAlias+".subject_id = "+mainObservationTableAlias+".person_id",
4743
filterCohortPair.CohortId1, filterCohortPair.CohortId2, filterCohortPair.CohortId1, filterCohortPair.CohortId2)
4844
}
45+
return query
46+
}
4947

48+
// Helper function that adds extra filter clauses to the query, for the given filterCohortPairs, intersecting on the
49+
// right set of tables, excluding data where necessary, etc.
50+
// It basically iterates over the list of filterCohortPairs, adding relevant INTERSECT and EXCEPT clauses, so that the resulting set is the
51+
// set of persons that are part of the intersections of cohortDefinitionId and of one of the cohorts in the filterCohortPairs. The EXCEPT
52+
// clauses exclude the persons that are found in both cohorts of a filterCohortPair.
53+
func QueryFilterByCohortPairsHelper(filterCohortPairs []utils.CustomDichotomousVariableDef, resultsDataSource *utils.DbAndSchema, cohortDefinitionId int, unionAndIntersectSQLAlias string) *gorm.DB {
54+
unionAndIntersectSQL := "(" +
55+
"SELECT subject_id FROM " + resultsDataSource.Schema + ".cohort WHERE cohort_definition_id=? "
56+
var idsList []interface{}
57+
idsList = append(idsList, cohortDefinitionId)
58+
if len(filterCohortPairs) > 0 {
59+
// INTERSECT UNIONs section:
60+
unionAndIntersectSQL = unionAndIntersectSQL + "INTERSECT ("
61+
for i, filterCohortPair := range filterCohortPairs {
62+
unionAndIntersectSQL = unionAndIntersectSQL +
63+
"SELECT subject_id FROM " + resultsDataSource.Schema + ".cohort WHERE cohort_definition_id=? UNION " +
64+
"SELECT subject_id FROM " + resultsDataSource.Schema + ".cohort WHERE cohort_definition_id=? "
65+
if i+1 < len(filterCohortPairs) {
66+
unionAndIntersectSQL = unionAndIntersectSQL + " UNION "
67+
}
68+
idsList = append(idsList, filterCohortPair.CohortId1, filterCohortPair.CohortId2)
69+
}
70+
unionAndIntersectSQL = unionAndIntersectSQL + ") "
71+
// EXCEPTs section:
72+
for _, filterCohortPair := range filterCohortPairs {
73+
unionAndIntersectSQL = unionAndIntersectSQL +
74+
"EXCEPT ( " +
75+
"SELECT subject_id FROM " + resultsDataSource.Schema + ".cohort WHERE cohort_definition_id=? " +
76+
"INTERSECT " +
77+
"SELECT subject_id FROM " + resultsDataSource.Schema + ".cohort WHERE cohort_definition_id=? " +
78+
")"
79+
idsList = append(idsList, filterCohortPair.CohortId1, filterCohortPair.CohortId2)
80+
}
81+
}
82+
unionAndIntersectSQL = unionAndIntersectSQL +
83+
") "
84+
query := resultsDataSource.Db.Table(unionAndIntersectSQL+" as "+unionAndIntersectSQLAlias+" ", idsList...)
5085
return query
5186
}

0 commit comments

Comments
 (0)