Skip to content

Commit 7e71f68

Browse files
Merge pull request #60 from uc-cdis/fix_align_breakdown_and_other_queries
Fix: make same null checks for breakdown and other queries
2 parents 33d992f + 53f7503 commit 7e71f68

File tree

6 files changed

+111
-32
lines changed

6 files changed

+111
-32
lines changed

models/cohortdata.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ func (h CohortData) RetrieveHistogramDataBySourceIdAndCohortIdAndConceptIdsAndCo
9494
Where("observation.observation_concept_id = ?", histogramConceptId).
9595
Where("observation.value_as_number is not null")
9696

97-
query = QueryFilterByConceptIdsAndCohortPairsHelper(query, filterConceptIds, filterCohortPairs, omopDataSource, resultsDataSource.Schema, "observation")
97+
query = QueryFilterByConceptIdsAndCohortPairsHelper(query, sourceId, filterConceptIds, filterCohortPairs, omopDataSource, resultsDataSource.Schema, "observation")
9898

9999
meta_result := query.Scan(&cohortData)
100100
return cohortData, meta_result.Error
@@ -120,7 +120,7 @@ func (h CohortData) RetrieveCohortOverlapStats(sourceId int, caseCohortId int, c
120120
Where("observation.observation_concept_id = ?", filterConceptId).
121121
Where("observation.value_as_concept_id = ?", filterConceptValue)
122122

123-
query = QueryFilterByConceptIdsAndCohortPairsHelper(query, otherFilterConceptIds, filterCohortPairs, omopDataSource, resultsDataSource.Schema, "observation")
123+
query = QueryFilterByConceptIdsAndCohortPairsHelper(query, sourceId, otherFilterConceptIds, filterCohortPairs, omopDataSource, resultsDataSource.Schema, "observation")
124124

125125
meta_result := query.Scan(&cohortOverlapStats)
126126
return cohortOverlapStats, meta_result.Error
@@ -149,7 +149,7 @@ func (h CohortData) RetrieveCohortOverlapStatsWithoutFilteringOnConceptValue(sou
149149
Where("case_cohort.cohort_definition_id = ?", caseCohortId).
150150
Where("control_cohort.cohort_definition_id = ?", controlCohortId)
151151

152-
query = QueryFilterByConceptIdsAndCohortPairsHelper(query, otherFilterConceptIds, filterCohortPairs, omopDataSource, resultsDataSource.Schema, "observation")
152+
query = QueryFilterByConceptIdsAndCohortPairsHelper(query, sourceId, otherFilterConceptIds, filterCohortPairs, omopDataSource, resultsDataSource.Schema, "observation")
153153

154154
meta_result := query.Scan(&cohortOverlapStats)
155155
return cohortOverlapStats, meta_result.Error

models/concept.go

+8-13
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ type ConceptI interface {
1616
RetrieveBreakdownStatsBySourceIdAndCohortIdAndConceptIdsAndCohortPairs(sourceId int, cohortDefinitionId int, filterConceptIds []int64, filterCohortPairs []utils.CustomDichotomousVariableDef, breakdownConceptId int64) ([]*ConceptBreakdown, error)
1717
}
1818
type Concept struct {
19-
ConceptId int `json:"concept_id"`
19+
ConceptId int64 `json:"concept_id"`
2020
ConceptName string `json:"concept_name"`
2121
ConceptType string `json:"concept_type"`
2222
}
@@ -75,19 +75,21 @@ func getNrPersonsWithData(conceptId int64, conceptsAndPersonsWithData []*Concept
7575
}
7676

7777
// Retrieve just a simple concept info for a given conceptId.
78+
// Raises an error if concept is not found.
7879
func (h Concept) RetrieveInfoBySourceIdAndConceptId(sourceId int, conceptId int64) (*ConceptSimple, error) {
7980
conceptIds := []int64{conceptId}
8081
result, err := h.RetrieveInfoBySourceIdAndConceptIds(sourceId, conceptIds)
8182
if err != nil {
8283
return nil, err
8384
} else if len(result) == 0 {
84-
// given concept_id not found, just return nil:
85-
return nil, nil
85+
// given concept_id not found, return error:
86+
return nil, fmt.Errorf("unexpected error: did not find concept")
8687
}
8788
return result[0], err
8889
}
8990

9091
// Retrieve just a simple list of concept names and type info for given list of conceptIds.
92+
// Raises an error if any of the concepts is not found.
9193
func (h Concept) RetrieveInfoBySourceIdAndConceptIds(sourceId int, conceptIds []int64) ([]*ConceptSimple, error) {
9294
var dataSourceModel = new(Source)
9395
omopDataSource := dataSourceModel.GetDataSource(sourceId, Omop)
@@ -106,7 +108,7 @@ func (h Concept) RetrieveInfoBySourceIdAndConceptIds(sourceId int, conceptIds []
106108
conceptItem.PrefixedConceptId = GetPrefixedConceptId(conceptItem.ConceptId)
107109
}
108110
if len(conceptItems) != len(conceptIds) {
109-
return nil, fmt.Errorf("unexpected error: did not find all concepts")
111+
return nil, fmt.Errorf("unexpected error: did not find the expected number of concepts")
110112
}
111113
return conceptItems, nil
112114
}
@@ -188,10 +190,6 @@ func (h Concept) RetrieveStatsBySourceIdAndCohortIdAndConceptIds(sourceId int, c
188190
return conceptStats, meta_result.Error
189191
}
190192

191-
func getConceptValueType(conceptId int64) string {
192-
return "string" // TODO - add logic to return "string" or "number" depending on concept type
193-
}
194-
195193
// This function will return cohort size broken down over the different values
196194
// of the given "breakdown concept" by querying, for each distinct concept value,
197195
// how many persons in the cohort have that value in their observation records.
@@ -218,19 +216,16 @@ func (h Concept) RetrieveBreakdownStatsBySourceIdAndCohortIdAndConceptIdsAndCoho
218216
resultsDataSource := dataSourceModel.GetDataSource(sourceId, Results)
219217

220218
// count persons, grouping by concept value:
221-
var breakdownValueFieldName = "observation.value_as_" + getConceptValueType(breakdownConceptId)
222219
var conceptBreakdownList []*ConceptBreakdown
223220
query := QueryFilterByCohortPairsHelper(filterCohortPairs, resultsDataSource, cohortDefinitionId, "unionAndIntersect").
224221
Select("observation.value_as_concept_id, count(distinct(observation.person_id)) as npersons_in_cohort_with_value").
225222
Joins("INNER JOIN "+omopDataSource.Schema+".observation_continuous as observation"+omopDataSource.GetViewDirective()+" ON unionAndIntersect.subject_id = observation.person_id").
226223
Where("observation.observation_concept_id = ?", breakdownConceptId).
227-
Where(breakdownValueFieldName + " is not null"). // this one seems like a bit of a random constraint...but was a request from the business side: skip records where this field is null
228-
Where("observation.value_as_concept_id is not null"). // this is assuming that the breakdownConceptId has its values nicely stored as concepts as well and correctly used in observation table...
229-
Where("observation.value_as_concept_id != 0")
224+
Where(GetConceptValueNotNullCheckBasedOnConceptType("observation", sourceId, breakdownConceptId))
230225

231226
// note: here we pass empty []utils.CustomDichotomousVariableDef{} instead of filterCohortPairs, since we already use the SQL generated by QueryFilterByCohortPairsHelper above,
232227
// which is a better performing SQL in this particular scenario:
233-
query = QueryFilterByConceptIdsAndCohortPairsHelper(query, filterConceptIds, []utils.CustomDichotomousVariableDef{}, omopDataSource, resultsDataSource.Schema, "observation")
228+
query = QueryFilterByConceptIdsAndCohortPairsHelper(query, sourceId, filterConceptIds, []utils.CustomDichotomousVariableDef{}, omopDataSource, resultsDataSource.Schema, "observation")
234229

235230
meta_result := query.Group("observation.value_as_concept_id").
236231
Scan(&conceptBreakdownList)

models/helper.go

+19-2
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ import (
1313
// a list of filters in the form of concept ids and cohort pairs. The one assumption it makes is that the given `query` object already contains
1414
// a basic query on a table or view that have been named or aliased as "observation" (see comments in code). This assumption is
1515
// checked at the start.
16-
func QueryFilterByConceptIdsAndCohortPairsHelper(query *gorm.DB, filterConceptIds []int64, filterCohortPairs []utils.CustomDichotomousVariableDef,
16+
func QueryFilterByConceptIdsAndCohortPairsHelper(query *gorm.DB, sourceId int, filterConceptIds []int64, filterCohortPairs []utils.CustomDichotomousVariableDef,
1717
omopDataSource *utils.DbAndSchema, resultSchemaName string, mainObservationTableAlias string) *gorm.DB {
1818
// iterate over the filterConceptIds, adding a new INNER JOIN and filters for each, so that the resulting set is the
1919
// set of persons that have a non-null value for each and every one of the concepts:
@@ -22,7 +22,7 @@ func QueryFilterByConceptIdsAndCohortPairsHelper(query *gorm.DB, filterConceptId
2222
log.Printf("Adding extra INNER JOIN with alias %s", observationTableAlias)
2323
query = query.Joins("INNER JOIN "+omopDataSource.Schema+".observation_continuous as "+observationTableAlias+omopDataSource.GetViewDirective()+" ON "+observationTableAlias+".person_id = "+mainObservationTableAlias+".person_id").
2424
Where(observationTableAlias+".observation_concept_id = ?", filterConceptId).
25-
Where("(" + observationTableAlias + ".value_as_string is not null or " + observationTableAlias + ".value_as_number is not null)") // TODO - improve performance by only filtering on type according to getConceptValueType()
25+
Where(GetConceptValueNotNullCheckBasedOnConceptType(observationTableAlias, sourceId, filterConceptId))
2626
}
2727
// iterate over the list of filterCohortPairs, adding a new INNER JOIN to the UNION of each pair, so that the resulting set is the
2828
// set of persons that are part of the intersections above and of one of the cohorts in the filterCohortPairs:
@@ -82,3 +82,20 @@ func QueryFilterByCohortPairsHelper(filterCohortPairs []utils.CustomDichotomousV
8282
query := resultsDataSource.Db.Table(unionAndIntersectSQL+" as "+unionAndIntersectSQLAlias+" ", idsList...)
8383
return query
8484
}
85+
86+
// This function will get the concept information for given conceptId, and
87+
// return the best SQL to use for doing a "not null" check on its value in the
88+
// observation table.
89+
func GetConceptValueNotNullCheckBasedOnConceptType(observationTableAlias string, sourceId int, conceptId int64) string {
90+
conceptModel := *new(Concept)
91+
conceptInfo, error := conceptModel.RetrieveInfoBySourceIdAndConceptId(sourceId, conceptId)
92+
if error != nil {
93+
panic("error while trying to get information for conceptId, or conceptId not found")
94+
} else if conceptInfo.ConceptType == "MVP Continuous" {
95+
return observationTableAlias + ".value_as_number is not null"
96+
} else if conceptInfo.ConceptType == "MVP Ordinal" {
97+
return observationTableAlias + ".value_as_concept_id is not null and " + observationTableAlias + ".value_as_concept_id != 0"
98+
} else {
99+
panic(fmt.Sprintf("error: concept type not supported [%s]", conceptInfo.ConceptType))
100+
}
101+
}

tests/models_tests/models_test.go

+63-13
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package models_tests
33
import (
44
"log"
55
"os"
6+
"strings"
67
"testing"
78

89
"github.com/uc-cdis/cohort-middleware/config"
@@ -107,6 +108,47 @@ func TestGetPrefixedConceptId(t *testing.T) {
107108
}
108109
}
109110

111+
func TestGetConceptValueNotNullCheckBasedOnConceptTypeError(t *testing.T) {
112+
setUp(t)
113+
// the call below should result in panic/error:
114+
defer func() {
115+
if r := recover(); r == nil {
116+
t.Errorf("The code did not panic")
117+
}
118+
}()
119+
models.GetConceptValueNotNullCheckBasedOnConceptType("observation", testSourceId, -1)
120+
}
121+
122+
func TestGetConceptValueNotNullCheckBasedOnConceptTypeError2(t *testing.T) {
123+
setUp(t)
124+
// add dummy concept:
125+
conceptId := tests.AddInvalidTypeConcept(models.Omop)
126+
127+
// the call below should result in a specific panic/error on the concept type not being supported:
128+
defer func() {
129+
r := recover()
130+
if r == nil || !strings.HasPrefix(r.(string), "error: concept type not supported") {
131+
t.Errorf("The code did not panic with expected error")
132+
}
133+
// cleanup:
134+
tests.RemoveConcept(models.Omop, conceptId)
135+
}()
136+
models.GetConceptValueNotNullCheckBasedOnConceptType("observation", testSourceId, conceptId)
137+
}
138+
139+
func TestGetConceptValueNotNullCheckBasedOnConceptTypeSuccess(t *testing.T) {
140+
setUp(t)
141+
// check success scenarios:
142+
result := models.GetConceptValueNotNullCheckBasedOnConceptType("observation", testSourceId, hareConceptId)
143+
if result != "observation.value_as_concept_id is not null and observation.value_as_concept_id != 0" {
144+
t.Errorf("Unexpected result. Found %s", result)
145+
}
146+
result = models.GetConceptValueNotNullCheckBasedOnConceptType("observation", testSourceId, histogramConceptId)
147+
if result != "observation.value_as_number is not null" {
148+
t.Errorf("Unexpected result. Found %s", result)
149+
}
150+
}
151+
110152
func TestRetriveAllBySourceId(t *testing.T) {
111153
setUp(t)
112154
concepts, _ := conceptModel.RetriveAllBySourceId(testSourceId)
@@ -173,10 +215,13 @@ func TestRetrieveInfoBySourceIdAndConceptTypes(t *testing.T) {
173215
func TestRetrieveInfoBySourceIdAndConceptIdNotFound(t *testing.T) {
174216
setUp(t)
175217
// get all concepts:
176-
conceptInfo, _ := conceptModel.RetrieveInfoBySourceIdAndConceptId(testSourceId,
218+
conceptInfo, error := conceptModel.RetrieveInfoBySourceIdAndConceptId(testSourceId,
177219
-1)
178220
if conceptInfo != nil {
179-
t.Errorf("Did not expected to find data")
221+
t.Errorf("Did not expect to find data")
222+
}
223+
if error == nil {
224+
t.Errorf("Expected error")
180225
}
181226
}
182227

@@ -298,8 +343,8 @@ func TestRetrieveBreakdownStatsBySourceIdAndCohortIdAndConceptIdsAndCohortPairsW
298343
stats2, _ := conceptModel.RetrieveBreakdownStatsBySourceIdAndCohortIdAndConceptIdsAndCohortPairs(testSourceId,
299344
extendedCopyOfSecondLargestCohort.Id, filterIds, filterCohortPairs, breakdownConceptId)
300345
// very rough check (ideally we would check the individual stats as well...TODO?):
301-
if len(stats) != len(stats2) {
302-
t.Errorf("Expected same result, got %d and %d", len(stats), len(stats2))
346+
if len(stats) > len(stats2) {
347+
t.Errorf("First query is more restrictive, so its stats should not be larger than stats2 of second query. Got %d and %d", len(stats), len(stats2))
303348
}
304349
// test filtering with smallest cohort, lenght should be 1, since that's the size of the smallest cohort:
305350
// setting the same cohort id here (artificial...normally it should be two different ids):
@@ -418,13 +463,13 @@ func TestRetrieveHistogramDataBySourceIdAndCohortIdAndConceptIdsAndCohortPairs(t
418463
}
419464

420465
func TestQueryFilterByConceptIdsAndCohortPairsHelper(t *testing.T) {
421-
// This test checks whether the query succeeds when there mainObservationTableAlias
466+
// This test checks whether the query succeeds when the mainObservationTableAlias
422467
// argument passed to QueryFilterByConceptIdsAndCohortPairsHelper (last argument)
423468
// matches the alias used in the main query, and whether it fails otherwise.
424469

425470
setUp(t)
426471
omopDataSource := tests.GetOmopDataSource()
427-
filterConceptIds := []int64{1, 2, 3}
472+
filterConceptIds := []int64{allConceptIds[0], allConceptIds[1], allConceptIds[2]}
428473
filterCohortPairs := []utils.CustomDichotomousVariableDef{} // empty / not really needed for test
429474
var personIds []struct {
430475
PersonId int64
@@ -433,15 +478,15 @@ func TestQueryFilterByConceptIdsAndCohortPairsHelper(t *testing.T) {
433478
// Subtest1: correct alias "observation":
434479
query := omopDataSource.Db.Table(omopDataSource.Schema + ".observation_continuous as observation" + omopDataSource.GetViewDirective()).
435480
Select("observation.person_id")
436-
query = models.QueryFilterByConceptIdsAndCohortPairsHelper(query, filterConceptIds, filterCohortPairs, omopDataSource, "", "observation")
481+
query = models.QueryFilterByConceptIdsAndCohortPairsHelper(query, testSourceId, filterConceptIds, filterCohortPairs, omopDataSource, "", "observation")
437482
meta_result := query.Scan(&personIds)
438483
if meta_result.Error != nil {
439484
t.Errorf("Did NOT expect an error")
440485
}
441486
// Subtest2: incorrect alias "observation"...should fail:
442487
query = omopDataSource.Db.Table(omopDataSource.Schema + ".observation_continuous as observationWRONG").
443488
Select("*")
444-
query = models.QueryFilterByConceptIdsAndCohortPairsHelper(query, filterConceptIds, filterCohortPairs, omopDataSource, "", "observation")
489+
query = models.QueryFilterByConceptIdsAndCohortPairsHelper(query, testSourceId, filterConceptIds, filterCohortPairs, omopDataSource, "", "observation")
445490
meta_result = query.Scan(&personIds)
446491
if meta_result.Error == nil {
447492
t.Errorf("Expected an error")
@@ -596,7 +641,7 @@ func TestRetrieveCohortOverlapStatsZeroOverlap(t *testing.T) {
596641
}
597642

598643
func TestRetrieveCohortOverlapStatsZeroOverlapScenario2(t *testing.T) {
599-
// Tests if a scenario where NO overlap is expected indeed results in 0
644+
// Tests if a scenario where NO overlap ends in the expected error/panic
600645
setUp(t)
601646
caseCohortId := secondLargestCohort.Id
602647
controlCohortId := secondLargestCohort.Id // to ensure THIS part does not cause the 0 overlap, just repeat the same...
@@ -605,11 +650,16 @@ func TestRetrieveCohortOverlapStatsZeroOverlapScenario2(t *testing.T) {
605650
// set this list to some dummy non-existing ids:
606651
otherFilterConceptIds := []int64{-1, -2}
607652
filterCohortPairs := []utils.CustomDichotomousVariableDef{}
608-
stats, _ := cohortDataModel.RetrieveCohortOverlapStats(testSourceId, caseCohortId, controlCohortId,
653+
654+
// the RetrieveCohortOverlapStats below should result in panic/error:
655+
defer func() {
656+
if r := recover(); r == nil {
657+
t.Errorf("The code did not panic")
658+
}
659+
}()
660+
661+
_, _ = cohortDataModel.RetrieveCohortOverlapStats(testSourceId, caseCohortId, controlCohortId,
609662
filterConceptId, filterConceptValue, otherFilterConceptIds, filterCohortPairs)
610-
if stats.CaseControlOverlap != 0 {
611-
t.Errorf("Expected overlap of 0, but found %d", stats.CaseControlOverlap)
612-
}
613663
}
614664

615665
func TestRetrieveCohortOverlapStatsZeroOverlapScenario3(t *testing.T) {

tests/setup_local_db/test_data_results_and_cdm.sql

+1-1
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ values
115115
(nextval('observation_id_seq'), 2, 2000007027, 2000007029, 'ASN', 'ASN', 38000276),
116116
(nextval('observation_id_seq'), 3, 2000007027, 2000007031, 'EUR', 'EUR', 38000276),
117117
(nextval('observation_id_seq'), 4, 2000007027, 2000007030, 'AFR', 'AFR', 38000276),
118-
(nextval('observation_id_seq'), 5, 2000007027, 2000007032, NULL, NULL, 38000276),
118+
(nextval('observation_id_seq'), 5, 2000007027, NULL, NULL, NULL, 38000276),
119119
(nextval('observation_id_seq'), 6, 2000007027, 2000007029, 'ASN', 'ASN', 38000276),
120120
(nextval('observation_id_seq'), 2, 2000007027, 2000007030, 'AFR', 'AFR', 38000276),
121121
(nextval('observation_id_seq'), 7, 2000007027, 2000007028, 'HIS', 'HIS', 38000276),

tests/testutils.go

+17
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package tests
22

33
import (
44
"bufio"
5+
"fmt"
56
"io/ioutil"
67
"log"
78
"net"
@@ -96,6 +97,22 @@ func FixSomething(sourceType models.SourceType, tableName string, fieldName stri
9697
" RENAME "+fieldName+"_broken TO "+fieldName, GetTestSourceId())
9798
}
9899

100+
// utility function that adds a new concept item with some invalid concept_class_id on the fly
101+
func AddInvalidTypeConcept(sourceType models.SourceType) int64 {
102+
omopDataSource := GetOmopDataSource()
103+
var lastConcept models.Concept
104+
omopDataSource.Db.Last(&lastConcept)
105+
conceptId := lastConcept.ConceptId + 1
106+
ExecSQLString(fmt.Sprintf("INSERT into "+GetSchemaNameForType(sourceType)+".concept (concept_id,concept_name,concept_class_id,domain_id,concept_code) "+
107+
" values (%v, 'dummy concept', 'Invalid type class', 1, 'dummy')", conceptId), GetTestSourceId())
108+
return conceptId
109+
}
110+
111+
func RemoveConcept(sourceType models.SourceType, conceptId int64) {
112+
ExecSQLString(fmt.Sprintf("DELETE FROM "+GetSchemaNameForType(sourceType)+".concept "+
113+
" where concept_id =%v", conceptId), GetTestSourceId())
114+
}
115+
99116
func GetInt64AttributeValue[T any](item T, attributeName string) int64 {
100117
r := reflect.ValueOf(item)
101118
f := reflect.Indirect(r).FieldByName(attributeName)

0 commit comments

Comments
 (0)