Skip to content

Commit 5a20d45

Browse files
Merge pull request #91 from uc-cdis/fix/remove_unnecessary_joins_on_observation
Feat: remove unnecessary joins on observation
2 parents f439c23 + 6b46606 commit 5a20d45

File tree

8 files changed

+64
-41
lines changed

8 files changed

+64
-41
lines changed

controllers/cohortdata.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ func populateConceptValue(row []string, cohortItem models.PersonConceptAndValue,
241241
return row
242242
}
243243

244-
func (u CohortDataController) RetrieveCohortOverlapStatsWithoutFilteringOnConceptValue(c *gin.Context) {
244+
func (u CohortDataController) RetrieveCohortOverlapStats(c *gin.Context) {
245245
errors := make([]error, 4)
246246
var sourceId, caseCohortId, controlCohortId int
247247
var conceptIds []int64
@@ -264,7 +264,7 @@ func (u CohortDataController) RetrieveCohortOverlapStatsWithoutFilteringOnConcep
264264
c.Abort()
265265
return
266266
}
267-
overlapStats, err := u.cohortDataModel.RetrieveCohortOverlapStatsWithoutFilteringOnConceptValue(sourceId, caseCohortId,
267+
overlapStats, err := u.cohortDataModel.RetrieveCohortOverlapStats(sourceId, caseCohortId,
268268
controlCohortId, conceptIds, cohortPairs)
269269
if err != nil {
270270
c.JSON(http.StatusInternalServerError, gin.H{"message": "Error retrieving stats", "error": err.Error()})

models/cohortdata.go

+6-8
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ import (
99

1010
type CohortDataI interface {
1111
RetrieveDataBySourceIdAndCohortIdAndConceptIdsOrderedByPersonId(sourceId int, cohortDefinitionId int, conceptIds []int64) ([]*PersonConceptAndValue, error)
12-
RetrieveCohortOverlapStatsWithoutFilteringOnConceptValue(sourceId int, caseCohortId int, controlCohortId int, otherFilterConceptIds []int64, filterCohortPairs []utils.CustomDichotomousVariableDef) (CohortOverlapStats, error)
12+
RetrieveCohortOverlapStats(sourceId int, caseCohortId int, controlCohortId int, otherFilterConceptIds []int64, filterCohortPairs []utils.CustomDichotomousVariableDef) (CohortOverlapStats, error)
1313
RetrieveDataByOriginalCohortAndNewCohort(sourceId int, originalCohortDefinitionId int, cohortDefinitionId int) ([]*PersonIdAndCohort, error)
1414
RetrieveHistogramDataBySourceIdAndCohortIdAndConceptIdsAndCohortPairs(sourceId int, cohortDefinitionId int, histogramConceptId int64, filterConceptIds []int64, filterCohortPairs []utils.CustomDichotomousVariableDef) ([]*PersonConceptAndValue, error)
1515
}
@@ -101,7 +101,7 @@ func (h CohortData) RetrieveHistogramDataBySourceIdAndCohortIdAndConceptIdsAndCo
101101
Where("observation.observation_concept_id = ?", histogramConceptId).
102102
Where("observation.value_as_number is not null")
103103

104-
query = QueryFilterByConceptIdsHelper(query, sourceId, filterConceptIds, omopDataSource, resultsDataSource.Schema, "observation")
104+
query = QueryFilterByConceptIdsHelper(query, sourceId, filterConceptIds, omopDataSource, resultsDataSource.Schema, "unionAndIntersect.subject_id")
105105

106106
query, cancel := utils.AddTimeoutToQuery(query)
107107
defer cancel()
@@ -110,22 +110,20 @@ func (h CohortData) RetrieveHistogramDataBySourceIdAndCohortIdAndConceptIdsAndCo
110110
}
111111

112112
// Basically the same as the method above, but without the extra filtering on filterConceptId and filterConceptValue:
113-
func (h CohortData) RetrieveCohortOverlapStatsWithoutFilteringOnConceptValue(sourceId int, caseCohortId int, controlCohortId int,
114-
otherFilterConceptIds []int64, filterCohortPairs []utils.CustomDichotomousVariableDef) (CohortOverlapStats, error) {
113+
func (h CohortData) RetrieveCohortOverlapStats(sourceId int, caseCohortId int, controlCohortId int,
114+
filterConceptIds []int64, filterCohortPairs []utils.CustomDichotomousVariableDef) (CohortOverlapStats, error) {
115115

116116
var dataSourceModel = new(Source)
117117
omopDataSource := dataSourceModel.GetDataSource(sourceId, Omop)
118118
resultsDataSource := dataSourceModel.GetDataSource(sourceId, Results)
119119

120-
// count persons that are in the intersection of both case and control cohorts, filtering on filterConceptValue:
121120
var cohortOverlapStats CohortOverlapStats
122121
query := QueryFilterByCohortPairsHelper(filterCohortPairs, resultsDataSource, caseCohortId, "case_cohort_unionedAndIntersectedWithFilters").
123122
Select("count(distinct(case_cohort_unionedAndIntersectedWithFilters.subject_id)) as case_control_overlap").
124123
Joins("INNER JOIN " + resultsDataSource.Schema + ".cohort as control_cohort ON control_cohort.subject_id = case_cohort_unionedAndIntersectedWithFilters.subject_id") // this one allows for the intersection between case and control and the assessment of the overlap
125124

126-
if len(otherFilterConceptIds) > 0 {
127-
query = query.Joins("INNER JOIN " + omopDataSource.Schema + ".observation_continuous as observation" + omopDataSource.GetViewDirective() + " ON control_cohort.subject_id = observation.person_id")
128-
query = QueryFilterByConceptIdsHelper(query, sourceId, otherFilterConceptIds, omopDataSource, resultsDataSource.Schema, "observation")
125+
if len(filterConceptIds) > 0 {
126+
query = QueryFilterByConceptIdsHelper(query, sourceId, filterConceptIds, omopDataSource, resultsDataSource.Schema, "control_cohort.subject_id")
129127
}
130128
query = query.Where("control_cohort.cohort_definition_id = ?", controlCohortId)
131129
query, cancel := utils.AddTimeoutToQuery(query)

models/concept.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ func (h Concept) RetrieveBreakdownStatsBySourceIdAndCohortIdAndConceptIdsAndCoho
151151
Where("observation.observation_concept_id = ?", breakdownConceptId).
152152
Where(GetConceptValueNotNullCheckBasedOnConceptType("observation", sourceId, breakdownConceptId))
153153

154-
query = QueryFilterByConceptIdsHelper(query, sourceId, filterConceptIds, omopDataSource, resultsDataSource.Schema, "observation")
154+
query = QueryFilterByConceptIdsHelper(query, sourceId, filterConceptIds, omopDataSource, resultsDataSource.Schema, "unionAndIntersect.subject_id")
155155

156156
query, cancel := utils.AddTimeoutToQuery(query)
157157
defer cancel()

models/helper.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,13 @@ import (
1212
// * It was added here to make it reusable, given these filters need to be added to many of the queries that take in
1313
// a list of filters in the form of concept ids.
1414
func QueryFilterByConceptIdsHelper(query *gorm.DB, sourceId int, filterConceptIds []int64,
15-
omopDataSource *utils.DbAndSchema, resultSchemaName string, mainObservationTableAlias string) *gorm.DB {
15+
omopDataSource *utils.DbAndSchema, resultSchemaName string, personIdFieldForObservationJoin string) *gorm.DB {
1616
// iterate over the filterConceptIds, adding a new INNER JOIN and filters for each, so that the resulting set is the
1717
// set of persons that have a non-null value for each and every one of the concepts:
1818
for i, filterConceptId := range filterConceptIds {
1919
observationTableAlias := fmt.Sprintf("observation_filter_%d", i)
2020
log.Printf("Adding extra INNER JOIN with alias %s", observationTableAlias)
21-
query = query.Joins("INNER JOIN "+omopDataSource.Schema+".observation_continuous as "+observationTableAlias+omopDataSource.GetViewDirective()+" ON "+observationTableAlias+".person_id = "+mainObservationTableAlias+".person_id").
21+
query = query.Joins("INNER JOIN "+omopDataSource.Schema+".observation_continuous as "+observationTableAlias+omopDataSource.GetViewDirective()+" ON "+observationTableAlias+".person_id = "+personIdFieldForObservationJoin).
2222
Where(observationTableAlias+".observation_concept_id = ?", filterConceptId).
2323
Where(GetConceptValueNotNullCheckBasedOnConceptType(observationTableAlias, sourceId, filterConceptId))
2424
}

server/router.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ func NewRouter() *gin.Engine {
4848
// cohort stats and checks:
4949
cohortData := controllers.NewCohortDataController(*new(models.CohortData), middlewares.NewTeamProjectAuthz(*new(models.CohortDefinition), &http.Client{}))
5050
// :casecohortid/:controlcohortid are just labels here and have no special meaning. Could also just be :cohortAId/:cohortBId here:
51-
authorized.POST("/cohort-stats/check-overlap/by-source-id/:sourceid/by-cohort-definition-ids/:casecohortid/:controlcohortid", cohortData.RetrieveCohortOverlapStatsWithoutFilteringOnConceptValue)
51+
authorized.POST("/cohort-stats/check-overlap/by-source-id/:sourceid/by-cohort-definition-ids/:casecohortid/:controlcohortid", cohortData.RetrieveCohortOverlapStats)
5252

5353
// full data endpoints:
5454
authorized.POST("/cohort-data/by-source-id/:sourceid/by-cohort-definition-id/:cohortid", cohortData.RetrieveDataBySourceIdAndCohortIdAndVariables)

tests/controllers_tests/controllers_test.go

+6-6
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ func (h dummyCohortDataModel) RetrieveHistogramDataBySourceIdAndCohortIdAndConce
7676
return cohortData, nil
7777
}
7878

79-
func (h dummyCohortDataModel) RetrieveCohortOverlapStatsWithoutFilteringOnConceptValue(sourceId int, caseCohortId int, controlCohortId int,
79+
func (h dummyCohortDataModel) RetrieveCohortOverlapStats(sourceId int, caseCohortId int, controlCohortId int,
8080
otherFilterConceptIds []int64, filterCohortPairs []utils.CustomDichotomousVariableDef) (models.CohortOverlapStats, error) {
8181
var zeroOverlap models.CohortOverlapStats
8282
return zeroOverlap, nil
@@ -335,7 +335,7 @@ func TestRetrieveDataBySourceIdAndCohortIdAndVariablesCorrectParams(t *testing.T
335335
}
336336
}
337337

338-
func TestRetrieveCohortOverlapStatsWithoutFilteringOnConceptValue(t *testing.T) {
338+
func TestRetrieveCohortOverlapStats(t *testing.T) {
339339
setUp(t)
340340
requestContext := new(gin.Context)
341341
requestContext.Params = append(requestContext.Params, gin.Param{Key: "sourceid", Value: strconv.Itoa(tests.GetTestSourceId())})
@@ -346,7 +346,7 @@ func TestRetrieveCohortOverlapStatsWithoutFilteringOnConceptValue(t *testing.T)
346346
requestBody := "{\"variables\":[{\"variable_type\": \"concept\", \"concept_id\": 2000000324},{\"variable_type\": \"concept\", \"concept_id\": 2000006885}]}"
347347
requestContext.Request.Body = io.NopCloser(strings.NewReader(requestBody))
348348

349-
cohortDataController.RetrieveCohortOverlapStatsWithoutFilteringOnConceptValue(requestContext)
349+
cohortDataController.RetrieveCohortOverlapStats(requestContext)
350350
// Params above are correct, so request should NOT abort:
351351
if requestContext.IsAborted() {
352352
t.Errorf("Did not expect this request to abort")
@@ -358,7 +358,7 @@ func TestRetrieveCohortOverlapStatsWithoutFilteringOnConceptValue(t *testing.T)
358358

359359
// the same request should fail if the teamProject authorization fails:
360360
requestContext.Request.Body = io.NopCloser(strings.NewReader(requestBody))
361-
cohortDataControllerWithFailingTeamProjectAuthz.RetrieveCohortOverlapStatsWithoutFilteringOnConceptValue(requestContext)
361+
cohortDataControllerWithFailingTeamProjectAuthz.RetrieveCohortOverlapStats(requestContext)
362362
result = requestContext.Writer.(*tests.CustomResponseWriter)
363363
// expect error:
364364
if !strings.Contains(result.CustomResponseWriterOut, "access denied") {
@@ -369,13 +369,13 @@ func TestRetrieveCohortOverlapStatsWithoutFilteringOnConceptValue(t *testing.T)
369369
}
370370
}
371371

372-
func TestRetrieveCohortOverlapStatsWithoutFilteringOnConceptValueBadRequest(t *testing.T) {
372+
func TestRetrieveCohortOverlapStatsBadRequest(t *testing.T) {
373373
setUp(t)
374374
requestContext := new(gin.Context)
375375
requestContext.Params = append(requestContext.Params, gin.Param{Key: "sourceid", Value: strconv.Itoa(tests.GetTestSourceId())})
376376
requestContext.Writer = new(tests.CustomResponseWriter)
377377

378-
cohortDataController.RetrieveCohortOverlapStatsWithoutFilteringOnConceptValue(requestContext)
378+
cohortDataController.RetrieveCohortOverlapStats(requestContext)
379379
// Params above are incorrect, so request should abort:
380380
if !requestContext.IsAborted() {
381381
t.Errorf("Expected this request to abort")

tests/models_tests/models_test.go

+44-21
Original file line numberDiff line numberDiff line change
@@ -281,8 +281,8 @@ func TestQueryFilterByCohortPairsHelper(t *testing.T) {
281281
query = models.QueryFilterByCohortPairsHelper(filterCohortPairs, resultsDataSource, population.Id, "unionAndIntersect").
282282
Select("subject_id")
283283
_ = query.Scan(&subjectIds)
284-
// in this case we expect overlap the size of the largestCohort-5:
285-
if len(subjectIds) != (largestCohort.CohortSize - 5) {
284+
// in this case we expect overlap the size of the largestCohort-6 (where 6 is the size of the overlap between extendedCopyOfSecondLargestCohort and largestCohort):
285+
if len(subjectIds) != (largestCohort.CohortSize - 6) {
286286
t.Errorf("Expected %d overlap, found %d", largestCohort.CohortSize-5, len(subjectIds))
287287
}
288288

@@ -304,7 +304,7 @@ func TestQueryFilterByCohortPairsHelper(t *testing.T) {
304304
Select("subject_id")
305305
_ = query.Scan(&subjectIds)
306306
// in this case we expect same as previous test above:
307-
if len(subjectIds) != (largestCohort.CohortSize - 5) {
307+
if len(subjectIds) != (largestCohort.CohortSize - 6) {
308308
t.Errorf("Expected %d overlap, found %d", largestCohort.CohortSize-5, len(subjectIds))
309309
}
310310

@@ -495,8 +495,10 @@ func TestRetrieveBreakdownStatsBySourceIdAndCohortIdAndConceptIdsAndCohortPairsW
495495
if len(stats) > len(stats2) {
496496
t.Errorf("First query is more restrictive, so its stats should not be larger than stats2 of second query. Got %d and %d", len(stats), len(stats2))
497497
}
498-
// test filtering with smallest cohort, lenght should be 1, since that's the size of the smallest cohort:
499-
// setting the same cohort id here (artificial...normally it should be two different ids):
498+
499+
// test filtering with secondLargestCohort, smallest and largestCohort.
500+
// Lenght of result set should be 2 persons (one HIS, one ASN), since there is a overlap of 1 between secondLargestCohort and smallest cohort,
501+
// and overlap of 2 between secondLargestCohort and largestCohort, BUT only 1 has a HARE value:
500502
filterCohortPairs = []utils.CustomDichotomousVariableDef{
501503
{
502504
CohortDefinitionId1: smallestCohort.Id,
@@ -506,7 +508,14 @@ func TestRetrieveBreakdownStatsBySourceIdAndCohortIdAndConceptIdsAndCohortPairsW
506508
stats3, _ := conceptModel.RetrieveBreakdownStatsBySourceIdAndCohortIdAndConceptIdsAndCohortPairs(testSourceId,
507509
secondLargestCohort.Id, filterIds, filterCohortPairs, breakdownConceptId)
508510
if len(stats3) != 2 {
509-
t.Errorf("Expected only two items in resultset, found %d", len(stats))
511+
t.Errorf("Expected only two items in resultset, found %d", len(stats3))
512+
}
513+
countPersons := 0
514+
for _, stat := range stats3 {
515+
countPersons = countPersons + stat.NpersonsInCohortWithValue
516+
}
517+
if countPersons != 2 {
518+
t.Errorf("Expected only two persons in resultset, found %d", countPersons)
510519
}
511520
}
512521

@@ -731,9 +740,9 @@ func TestRetrieveHistogramDataBySourceIdAndCohortIdAndConceptIdsAndCohortPairs(t
731740
filterConceptIds := []int64{}
732741
filterCohortPairs := []utils.CustomDichotomousVariableDef{}
733742
data, _ := cohortDataModel.RetrieveHistogramDataBySourceIdAndCohortIdAndConceptIdsAndCohortPairs(testSourceId, largestCohort.Id, histogramConceptId, filterConceptIds, filterCohortPairs)
734-
// everyone in the largestCohort has the histogramConceptId:
735-
if len(data) != largestCohort.CohortSize {
736-
t.Errorf("expected 10 or more histogram data but got %d", len(data))
743+
// everyone in the largestCohort has the histogramConceptId, but one person has NULL in the value_as_number:
744+
if len(data) != largestCohort.CohortSize-1 {
745+
t.Errorf("expected %d histogram data but got %d", largestCohort.CohortSize, len(data))
737746
}
738747

739748
// now filter on the extendedCopyOfSecondLargestCohort
@@ -766,15 +775,15 @@ func TestQueryFilterByConceptIdsHelper(t *testing.T) {
766775
// Subtest1: correct alias "observation":
767776
query := omopDataSource.Db.Table(omopDataSource.Schema + ".observation_continuous as observation" + omopDataSource.GetViewDirective()).
768777
Select("observation.person_id")
769-
query = models.QueryFilterByConceptIdsHelper(query, testSourceId, filterConceptIds, omopDataSource, "", "observation")
778+
query = models.QueryFilterByConceptIdsHelper(query, testSourceId, filterConceptIds, omopDataSource, "", "observation.person_id")
770779
meta_result := query.Scan(&personIds)
771780
if meta_result.Error != nil {
772781
t.Errorf("Did NOT expect an error")
773782
}
774783
// Subtest2: incorrect alias "observation"...should fail:
775784
query = omopDataSource.Db.Table(omopDataSource.Schema + ".observation_continuous as observationWRONG").
776785
Select("*")
777-
query = models.QueryFilterByConceptIdsHelper(query, testSourceId, filterConceptIds, omopDataSource, "", "observation")
786+
query = models.QueryFilterByConceptIdsHelper(query, testSourceId, filterConceptIds, omopDataSource, "", "observation.person_id")
778787
meta_result = query.Scan(&personIds)
779788
if meta_result.Error == nil {
780789
t.Errorf("Expected an error")
@@ -850,14 +859,14 @@ func TestErrorForRetrieveDataBySourceIdAndCohortIdAndConceptIdsOrderedByPersonId
850859
tests.FixSomething(models.Results, "cohort", "cohort_definition_id")
851860
}
852861

853-
func TestRetrieveCohortOverlapStatsWithoutFilteringOnConceptValue(t *testing.T) {
862+
func TestRetrieveCohortOverlapStats(t *testing.T) {
854863
// Tests if we get the expected overlap
855864
setUp(t)
856865
caseCohortId := secondLargestCohort.Id
857866
controlCohortId := secondLargestCohort.Id // to ensure we get some overlap, just repeat the same here...
858867
otherFilterConceptIds := []int64{}
859868
filterCohortPairs := []utils.CustomDichotomousVariableDef{}
860-
stats, _ := cohortDataModel.RetrieveCohortOverlapStatsWithoutFilteringOnConceptValue(testSourceId, caseCohortId, controlCohortId,
869+
stats, _ := cohortDataModel.RetrieveCohortOverlapStats(testSourceId, caseCohortId, controlCohortId,
861870
otherFilterConceptIds, filterCohortPairs)
862871
// basic test:
863872
if stats.CaseControlOverlap != int64(secondLargestCohort.CohortSize) {
@@ -873,11 +882,11 @@ func TestRetrieveCohortOverlapStatsWithoutFilteringOnConceptValue(t *testing.T)
873882
CohortDefinitionId2: extendedCopyOfSecondLargestCohort.Id,
874883
ProvidedName: "test"},
875884
}
876-
// then we expect overlap of 5 for extendedCopyOfSecondLargestCohort and largestCohort:
877-
stats, _ = cohortDataModel.RetrieveCohortOverlapStatsWithoutFilteringOnConceptValue(testSourceId, caseCohortId, controlCohortId,
885+
// then we expect overlap of 6 for extendedCopyOfSecondLargestCohort and largestCohort:
886+
stats, _ = cohortDataModel.RetrieveCohortOverlapStats(testSourceId, caseCohortId, controlCohortId,
878887
otherFilterConceptIds, filterCohortPairs)
879-
if stats.CaseControlOverlap != 5 {
880-
t.Errorf("Expected nr persons to be %d, found %d", 5, stats.CaseControlOverlap)
888+
if stats.CaseControlOverlap != 6 {
889+
t.Errorf("Expected nr persons to be %d, found %d", 6, stats.CaseControlOverlap)
881890
}
882891

883892
// extra test: different parameters that should return the same as above ^:
@@ -886,10 +895,10 @@ func TestRetrieveCohortOverlapStatsWithoutFilteringOnConceptValue(t *testing.T)
886895
filterCohortPairs = []utils.CustomDichotomousVariableDef{}
887896
otherFilterConceptIds = []int64{histogramConceptId} // extra filter, to cover this part of the code...
888897
// then we expect overlap of 5 for extendedCopyOfSecondLargestCohort and largestCohort (the filter on histogramConceptId should not matter
889-
// since all in largestCohort have an observation for this concept id):
890-
stats2, _ := cohortDataModel.RetrieveCohortOverlapStatsWithoutFilteringOnConceptValue(testSourceId, caseCohortId, controlCohortId,
898+
// since all in largestCohort have an observation for this concept id except one person who has it but has value_as_number as NULL):
899+
stats2, _ := cohortDataModel.RetrieveCohortOverlapStats(testSourceId, caseCohortId, controlCohortId,
891900
otherFilterConceptIds, filterCohortPairs)
892-
if stats2.CaseControlOverlap != stats.CaseControlOverlap {
901+
if stats2.CaseControlOverlap != stats.CaseControlOverlap-1 {
893902
t.Errorf("Expected nr persons to be %d, found %d", stats.CaseControlOverlap, stats2.CaseControlOverlap)
894903
}
895904

@@ -898,7 +907,7 @@ func TestRetrieveCohortOverlapStatsWithoutFilteringOnConceptValue(t *testing.T)
898907
otherFilterConceptIds = []int64{histogramConceptId, dummyContinuousConceptId}
899908
// all other arguments are the same as test above, and we expect overlap of 0, showing the otherFilterConceptIds
900909
// had the expected effect:
901-
stats3, _ := cohortDataModel.RetrieveCohortOverlapStatsWithoutFilteringOnConceptValue(testSourceId, caseCohortId, controlCohortId,
910+
stats3, _ := cohortDataModel.RetrieveCohortOverlapStats(testSourceId, caseCohortId, controlCohortId,
902911
otherFilterConceptIds, filterCohortPairs)
903912
if stats3.CaseControlOverlap != 0 {
904913
t.Errorf("Expected nr persons to be 0, found %d", stats3.CaseControlOverlap)
@@ -1014,3 +1023,17 @@ func TestAddTimeoutToQuery(t *testing.T) {
10141023
t.Errorf("Expected result and NO error")
10151024
}
10161025
}
1026+
1027+
func TestPersonConceptAndCountString(t *testing.T) {
1028+
a := models.PersonConceptAndCount{
1029+
PersonId: 1,
1030+
ConceptId: 2,
1031+
Count: 3,
1032+
}
1033+
1034+
expected := "(person_id=1, concept_id=2, count=3)"
1035+
if a.String() != expected {
1036+
t.Errorf("Expected %s, found %s", expected, a.String())
1037+
}
1038+
1039+
}

0 commit comments

Comments
 (0)