Skip to content

Commit 3649283

Browse files
Merge pull request #57 from uc-cdis/use_separate_view_for_observation_queries
Feat: use new observation_continuous view to speed up queries
2 parents 4543f45 + 657208d commit 3649283

File tree

7 files changed

+82
-23
lines changed

7 files changed

+82
-23
lines changed

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ TABLE results.COHORT
101101
TABLE omop.person
102102
TABLE omop.observation
103103
TABLE omop.concept
104+
VIEW omop.observation_continuous
104105
```
105106

106107

models/cohortdata.go

+5-5
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ func (h CohortData) RetrieveDataBySourceIdAndCohortIdAndConceptIdsOrderedByPerso
6868

6969
// get the observations for the subjects and the concepts, to build up the data rows to return:
7070
var cohortData []*PersonConceptAndValue
71-
meta_result := omopDataSource.Db.Model(&Observation{}).
71+
meta_result := omopDataSource.Db.Table(omopDataSource.Schema+".observation_continuous as observation").
7272
Select("observation.person_id, observation.observation_concept_id as concept_id, observation.value_as_string as concept_value_as_string, observation.value_as_number as concept_value_as_number, observation.value_as_concept_id as concept_value_as_concept_id").
7373
Joins("INNER JOIN "+resultsDataSource.Schema+".cohort as cohort ON cohort.subject_id = observation.person_id").
7474
Where("cohort.cohort_definition_id = ?", cohortDefinitionId).
@@ -87,7 +87,7 @@ func (h CohortData) RetrieveHistogramDataBySourceIdAndCohortIdAndConceptIdsAndCo
8787

8888
// get the observations for the subjects and the concepts, to build up the data rows to return:
8989
var cohortData []*PersonConceptAndValue
90-
query := omopDataSource.Db.Model(&Observation{}).
90+
query := omopDataSource.Db.Table(omopDataSource.Schema+".observation_continuous as observation").
9191
Select("distinct(observation.person_id), observation.observation_concept_id as concept_id, observation.value_as_number as concept_value_as_number").
9292
Joins("INNER JOIN "+resultsDataSource.Schema+".cohort as cohort ON cohort.subject_id = observation.person_id").
9393
Where("cohort.cohort_definition_id = ?", cohortDefinitionId).
@@ -111,7 +111,7 @@ func (h CohortData) RetrieveCohortOverlapStats(sourceId int, caseCohortId int, c
111111

112112
// count persons that are in the intersection of both case and control cohorts, filtering on filterConceptValue:
113113
var cohortOverlapStats CohortOverlapStats
114-
query := omopDataSource.Db.Model(&Observation{}).
114+
query := omopDataSource.Db.Table(omopDataSource.Schema+".observation_continuous as observation").
115115
Select("count(distinct(observation.person_id)) as case_control_overlap").
116116
Joins("INNER JOIN "+resultsDataSource.Schema+".cohort as case_cohort ON case_cohort.subject_id = observation.person_id").
117117
Joins("INNER JOIN "+resultsDataSource.Schema+".cohort as control_cohort ON control_cohort.subject_id = case_cohort.subject_id"). // this one allows for the intersection between case and control and the assessment of the overlap
@@ -136,7 +136,7 @@ func (h CohortData) RetrieveCohortOverlapStatsWithoutFilteringOnConceptValue(sou
136136

137137
// count persons that are in the intersection of both case and control cohorts, filtering on filterConceptValue:
138138
var cohortOverlapStats CohortOverlapStats
139-
query := omopDataSource.Db.Model(&Observation{}).
139+
query := omopDataSource.Db.Table(omopDataSource.Schema+".observation_continuous as observation").
140140
Select("count(distinct(observation.person_id)) as case_control_overlap").
141141
Joins("INNER JOIN "+resultsDataSource.Schema+".cohort as case_cohort ON case_cohort.subject_id = observation.person_id").
142142
Joins("INNER JOIN "+resultsDataSource.Schema+".cohort as control_cohort ON control_cohort.subject_id = case_cohort.subject_id"). // this one allows for the intersection between case and control and the assessment of the overlap
@@ -173,7 +173,7 @@ func (h CohortData) ValidateObservationData(observationConceptIdsToCheck []int64
173173
log.Printf("INFO: checking if no duplicate data is found for concept ids %v in `observation` table of data source %d...",
174174
observationConceptIdsToCheck, source.SourceId)
175175
var personConceptAndCount []*PersonConceptAndCount
176-
query := omopDataSource.Db.Model(&Observation{}).
176+
query := omopDataSource.Db.Table(omopDataSource.Schema+".observation_continuous as observation").
177177
Select("observation.person_id, observation.observation_concept_id as concept_id, count(*)").
178178
Where("observation.observation_concept_id in (?)", observationConceptIdsToCheck).
179179
Group("observation.person_id, observation.observation_concept_id").

models/concept.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ func (h Concept) RetrieveStatsBySourceIdAndCohortIdAndConceptIds(sourceId int, c
161161
// no value for this concept by first finding the ones that do have some value and
162162
// then subtracting them from cohort size before dividing:
163163
var conceptsAndPersonsWithData []*ConceptAndPersonsWithDataStats
164-
meta_result = omopDataSource.Db.Model(&Observation{}).
164+
meta_result = omopDataSource.Db.Table(omopDataSource.Schema+".observation_continuous as observation").
165165
Select("observation_concept_id as concept_id, count(distinct(person_id)) as nperson_ids").
166166
Joins("INNER JOIN "+resultsDataSource.Schema+".cohort as cohort ON cohort.subject_id = observation.person_id").
167167
Where("cohort.cohort_definition_id = ?", cohortDefinitionId).
@@ -220,7 +220,7 @@ func (h Concept) RetrieveBreakdownStatsBySourceIdAndCohortIdAndConceptIdsAndCoho
220220
// count persons, grouping by concept value:
221221
var breakdownValueFieldName = "observation.value_as_" + getConceptValueType(breakdownConceptId)
222222
var conceptBreakdownList []*ConceptBreakdown
223-
query := omopDataSource.Db.Model(&Observation{}).
223+
query := omopDataSource.Db.Table(omopDataSource.Schema+".observation_continuous as observation").
224224
Select("observation.value_as_concept_id, count(distinct(observation.person_id)) as npersons_in_cohort_with_value").
225225
Joins("INNER JOIN "+resultsDataSource.Schema+".cohort as cohort ON cohort.subject_id = observation.person_id").
226226
Where("cohort.cohort_definition_id = ?", cohortDefinitionId).

models/helper.go

+12-2
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,23 @@ import (
88
"gorm.io/gorm"
99
)
1010

11+
// Helper function that adds extra filter clauses to the query, joining on the right set of tables, excluding data where necessary, etc.
12+
// * It was added here to make it reusable, given these filters need to be added to many of the queries that take in
13+
// a list of filters in the form of concept ids and cohort pairs. The one assumption it makes is that the given `query` object already contains
14+
// a basic query on a table or view that have been named or aliased as "observation" (see comments in code). This assumption is
15+
// checked at the start.
1116
func QueryFilterByConceptIdsAndCohortPairsHelper(query *gorm.DB, filterConceptIds []int64, filterCohortPairs []utils.CustomDichotomousVariableDef, omopSchemaName string, resultSchemaName string) *gorm.DB {
17+
// Validate assumption of a table or view aliased as "observation":
18+
if query.Statement.Table != "observation" {
19+
panic("Error: this QueryFilterByConceptIdsAndCohortPairsHelper is meant for adding extra filters to a query on a table or view with the alias name `observation`")
20+
}
21+
1222
// iterate over the filterConceptIds, adding a new INNER JOIN and filters for each, so that the resulting set is the
1323
// set of persons that have a non-null value for each and every one of the concepts:
1424
for i, filterConceptId := range filterConceptIds {
1525
observationTableAlias := fmt.Sprintf("observation_filter_%d", i)
1626
log.Printf("Adding extra INNER JOIN with alias %s", observationTableAlias)
17-
query = query.Joins("INNER JOIN "+omopSchemaName+".observation as "+observationTableAlias+" ON "+observationTableAlias+".person_id = observation.person_id").
27+
query = query.Joins("INNER JOIN "+omopSchemaName+".observation_continuous as "+observationTableAlias+" ON "+observationTableAlias+".person_id = observation.person_id"). // assumption: there is a table or view named or aliased as "observation"
1828
Where(observationTableAlias+".observation_concept_id = ?", filterConceptId).
1929
Where("(" + observationTableAlias + ".value_as_string is not null or " + observationTableAlias + ".value_as_number is not null)") // TODO - improve performance by only filtering on type according to getConceptValueType()
2030
}
@@ -33,7 +43,7 @@ func QueryFilterByConceptIdsAndCohortPairsHelper(query *gorm.DB, filterConceptId
3343
" EXCEPT "+ //now use EXCEPT to exclude the part where both cohorts INTERSECT
3444
" Select "+cohortTableAlias2+".subject_id FROM "+resultSchemaName+".cohort as "+cohortTableAlias2+
3545
" INNER JOIN "+resultSchemaName+".cohort as "+cohortTableAlias3+" ON "+cohortTableAlias3+".subject_id = "+cohortTableAlias2+".subject_id "+
36-
" where "+cohortTableAlias2+".cohort_definition_id = ? AND "+cohortTableAlias3+".cohort_definition_id =? ) AS "+unionExceptAlias+" ON "+unionExceptAlias+".subject_id = observation.person_id",
46+
" where "+cohortTableAlias2+".cohort_definition_id = ? AND "+cohortTableAlias3+".cohort_definition_id =? ) AS "+unionExceptAlias+" ON "+unionExceptAlias+".subject_id = observation.person_id", // assumption: there is a table or view named or aliased as "observation"
3747
filterCohortPair.CohortId1, filterCohortPair.CohortId2, filterCohortPair.CohortId1, filterCohortPair.CohortId2)
3848
}
3949

tests/models_tests/models_test.go

+46-4
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package models_tests
33
import (
44
"log"
55
"os"
6+
"strings"
67
"testing"
78

89
"github.com/uc-cdis/cohort-middleware/config"
@@ -366,6 +367,46 @@ func TestRetrieveHistogramDataBySourceIdAndCohortIdAndConceptIdsAndCohortPairs(t
366367
}
367368
}
368369

370+
func TestQueryFilterByConceptIdsAndCohortPairsHelper(t *testing.T) {
371+
// This test checks whether the query succeeds when there is a table or
372+
// view aliased as "observation" and whether it fails otherwise.
373+
374+
setUp(t)
375+
omopDataSource := tests.GetOmopDataSource()
376+
filterConceptIds := []int64{1, 2, 3}
377+
filterCohortPairs := []utils.CustomDichotomousVariableDef{} // empty / not really needed for test
378+
var personIds []struct {
379+
PersonId int64
380+
}
381+
382+
// Subtest1: correct alias "observation":
383+
query := omopDataSource.Db.Table(omopDataSource.Schema + ".observation_continuous as observation").
384+
Select("observation.person_id")
385+
query = models.QueryFilterByConceptIdsAndCohortPairsHelper(query, filterConceptIds, filterCohortPairs, omopDataSource.Schema, "")
386+
meta_result := query.Scan(&personIds)
387+
if meta_result.Error != nil {
388+
t.Errorf("Did NOT expect an error")
389+
}
390+
// Subtest2: incorrect alias "observationWRONG"...should fail/panic:
391+
defer func() {
392+
panicMessage := recover()
393+
394+
if panicMessage == nil {
395+
t.Errorf("The code did not panic")
396+
}
397+
panicMessageStr, isString := panicMessage.(string)
398+
399+
if !isString || !strings.HasPrefix(panicMessageStr, "Error: this QueryFilterByConceptIdsAndCohortPairsHelper is meant for ") {
400+
t.Errorf("The code did not panic with expected error message")
401+
}
402+
403+
}()
404+
query = omopDataSource.Db.Table(omopDataSource.Schema + ".observation_continuous as observationWRONG").
405+
Select("*")
406+
query = models.QueryFilterByConceptIdsAndCohortPairsHelper(query, filterConceptIds, filterCohortPairs, omopDataSource.Schema, "")
407+
query.Scan(&personIds)
408+
}
409+
369410
func TestRetrieveDataBySourceIdAndCohortIdAndConceptIdsOrderedByPersonId(t *testing.T) {
370411
setUp(t)
371412
cohortDefinitions, _ := cohortDefinitionModel.GetAllCohortDefinitionsAndStatsOrderBySizeDesc(testSourceId)
@@ -404,18 +445,19 @@ func TestRetrieveDataBySourceIdAndCohortIdAndConceptIdsOrderedByPersonId(t *test
404445
func TestErrorForRetrieveDataBySourceIdAndCohortIdAndConceptIdsOrderedByPersonId(t *testing.T) {
405446
// Tests if the method returns an error when query fails.
406447

407-
// break something in the omop schema to cause a query failure in the next method:
408-
tests.BreakSomething(models.Omop, "observation", "person_id")
448+
cohortDefinitions, _ := cohortDefinitionModel.GetAllCohortDefinitionsAndStatsOrderBySizeDesc(testSourceId)
449+
450+
// break something in the Results schema to cause a query failure in the next method:
451+
tests.BreakSomething(models.Results, "cohort", "cohort_definition_id")
409452
// set last action to restore back:
410453
// run test:
411-
cohortDefinitions, _ := cohortDefinitionModel.GetAllCohortDefinitionsAndStatsOrderBySizeDesc(testSourceId)
412454
_, error := cohortDataModel.RetrieveDataBySourceIdAndCohortIdAndConceptIdsOrderedByPersonId(
413455
testSourceId, cohortDefinitions[0].Id, allConceptIds)
414456
if error == nil {
415457
t.Errorf("Expected error")
416458
}
417459
// revert the broken part:
418-
tests.FixSomething(models.Omop, "observation", "person_id")
460+
tests.FixSomething(models.Results, "cohort", "cohort_definition_id")
419461
}
420462

421463
// for given source and cohort, counts how many persons have the given HARE value

tests/setup_local_db/ddl_results_and_cdm.sql

+6
Original file line numberDiff line numberDiff line change
@@ -94,3 +94,9 @@ CREATE TABLE omop.concept
9494
valid_end_date date NOT NULL DEFAULT DATE('2099-01-01'),
9595
invalid_reason character varying(1) COLLATE pg_catalog."default"
9696
);
97+
98+
CREATE VIEW omop.OBSERVATION_CONTINUOUS AS
99+
SELECT ob.person_id, ob.observation_concept_id, ob.value_as_string, ob.value_as_number, ob.value_as_concept_id
100+
FROM omop.observation ob
101+
INNER JOIN omop.concept concept ON concept.CONCEPT_ID=ob.OBSERVATION_CONCEPT_ID
102+
WHERE concept.CONCEPT_CLASS_ID='MVP Continuous' or concept.CONCEPT_ID=2000007027

tests/setup_local_db/test_data_results_and_cdm.sql

+10-10
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@
55
insert into omop.concept
66
(concept_id,concept_name,domain_id,vocabulary_id,concept_class_id,standard_concept,concept_code,valid_start_date,valid_end_date,invalid_reason)
77
values
8-
(2000006885,'Average height ','Measurement','Measurement','Measurement','S','F','1970-01-01','2099-12-31',NULL),
9-
(2000000323,'MVP Age Group','Person','Person','Person','S','F','1970-01-01','2099-12-31',NULL),
10-
(2000000324,'Sex, indicated by the subject','Person','Person','Observation Type',NULL,'OMOP4822310','1970-01-01','2099-12-31',NULL),
11-
(2000000280,'BMI at enrollment','Measurement','Measurement','Measurement','S','2','1970-01-01','2099-12-31',NULL)
8+
(2000006885,'Average height ','Measurement','Measurement','MVP Continuous','S','F','1970-01-01','2099-12-31',NULL),
9+
(2000000323,'MVP Age Group','Person','Person','MVP Continuous','S','F','1970-01-01','2099-12-31',NULL),
10+
(2000000324,'Some continuous value, indicated by the subject','Person','Person','MVP Continuous',NULL,'OMOP4822310','1970-01-01','2099-12-31',NULL),
11+
(2000000280,'BMI at enrollment','Measurement','Measurement','MVP Continuous','S','2','1970-01-01','2099-12-31',NULL)
1212
;
1313

1414
-- TODO - add concept_type_class "concept_class_id" "MVP Continuous" to better reflect real queries?
@@ -99,12 +99,12 @@ For HARE info, see https://pubmed.ncbi.nlm.nih.gov/31564439/.
9999
insert into omop.concept
100100
(concept_id,concept_code,concept_name,domain_id,vocabulary_id,concept_class_id,standard_concept,valid_start_date,valid_end_date,invalid_reason)
101101
values
102-
(2000007027,'HARE_CODE','HARE', 'Person','Person','Observation Type','S','1970-01-01','2099-12-31',NULL),
103-
(2000007028,'HIS', 'Hispanic', 'Person','Person','Observation Type','S','1970-01-01','2099-12-31',NULL),
104-
(2000007029,'ASN','non-Hispanic Asian','Person','Person','Observation Type','S','1970-01-01','2099-12-31',NULL),
105-
(2000007030,'AFR','non-Hispanic Black','Person','Person','Observation Type','S','1970-01-01','2099-12-31',NULL),
106-
(2000007031,'EUR','non-Hispanic White','Person','Person','Observation Type','S','1970-01-01','2099-12-31',NULL),
107-
(2000007032,'OTH','Other', 'Person','Person','Observation Type','S','1970-01-01','2099-12-31',NULL)
102+
(2000007027,'HARE_CODE','HARE', 'Person','Person','MVP Ordinal','S','1970-01-01','2099-12-31',NULL),
103+
(2000007028,'HIS', 'Hispanic', 'Person','Person','MVP Ordinal','S','1970-01-01','2099-12-31',NULL),
104+
(2000007029,'ASN','non-Hispanic Asian','Person','Person','MVP Ordinal','S','1970-01-01','2099-12-31',NULL),
105+
(2000007030,'AFR','non-Hispanic Black','Person','Person','MVP Ordinal','S','1970-01-01','2099-12-31',NULL),
106+
(2000007031,'EUR','non-Hispanic White','Person','Person','MVP Ordinal','S','1970-01-01','2099-12-31',NULL),
107+
(2000007032,'OTH','Other', 'Person','Person','MVP Ordinal','S','1970-01-01','2099-12-31',NULL)
108108
;
109109

110110
-- insert `observation` records:

0 commit comments

Comments
 (0)