Skip to content

Commit 373e54e

Browse files
Merge pull request #63 from uc-cdis/fix/align_all_cohort_pair_filter_queries
Fix/align all cohort pair filter queries
2 parents ee36e24 + 1c84ecb commit 373e54e

File tree

10 files changed

+85
-449
lines changed

10 files changed

+85
-449
lines changed

README.md

-1
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,6 @@ curl -d '{"ConceptIds":[2000000324,2000006885]}' -H "Content-Type: application/j
127127
curl http://localhost:8080/concept-stats/by-source-id/1/by-cohort-definition-id/3/breakdown-by-concept-id/2000007027 | python3 -m json.tool
128128

129129
curl -d '{"variables": [{"variable_type": "concept", "concept_id": 2000006885}]}' -H "Content-Type: application/json" -X POST http://localhost:8080/concept-stats/by-source-id/1/by-cohort-definition-id/3/breakdown-by-concept-id/2000007027 | python3 -m json.tool
130-
curl -d '{"variables": [{"variable_type": "concept", "concept_id": 2000006885}]}' -H "Content-Type: application/json" -X POST http://localhost:8080/cohort-stats/check-overlap/by-source-id/1/by-case-control-cohort-definition-ids/2/3/filter-by-concept-id-and-value/2000007027/2000007029 | python3 -m json.tool
131130
```
132131

133132
CSV data endpoints:

controllers/cohortdata.go

-28
Original file line numberDiff line numberDiff line change
@@ -215,34 +215,6 @@ func populateConceptValue(row []string, cohortItem models.PersonConceptAndValue,
215215
return row
216216
}
217217

218-
func (u CohortDataController) RetrieveCohortOverlapStats(c *gin.Context) {
219-
errors := make([]error, 6)
220-
var sourceId, caseCohortId, controlCohortId int
221-
var filterConceptId int64
222-
var filterConceptValue int64
223-
var conceptIds []int64
224-
var cohortPairs []utils.CustomDichotomousVariableDef
225-
sourceId, errors[0] = utils.ParseNumericArg(c, "sourceid")
226-
filterConceptId, errors[1] = utils.ParseBigNumericArg(c, "filterconceptid")
227-
filterConceptValue, errors[2] = utils.ParseBigNumericArg(c, "filtervalue")
228-
caseCohortId, errors[3] = utils.ParseNumericArg(c, "casecohortid")
229-
controlCohortId, errors[4] = utils.ParseNumericArg(c, "controlcohortid")
230-
conceptIds, cohortPairs, errors[5] = utils.ParseConceptIdsAndDichotomousDefs(c)
231-
if utils.ContainsNonNil(errors) {
232-
c.JSON(http.StatusBadRequest, gin.H{"message": "bad request"})
233-
c.Abort()
234-
return
235-
}
236-
overlapStats, err := u.cohortDataModel.RetrieveCohortOverlapStats(sourceId, caseCohortId, controlCohortId,
237-
filterConceptId, filterConceptValue, conceptIds, cohortPairs)
238-
if err != nil {
239-
c.JSON(http.StatusInternalServerError, gin.H{"message": "Error retrieving stats", "error": err.Error()})
240-
c.Abort()
241-
return
242-
}
243-
c.JSON(http.StatusOK, gin.H{"cohort_overlap": overlapStats})
244-
}
245-
246218
func (u CohortDataController) RetrieveCohortOverlapStatsWithoutFilteringOnConceptValue(c *gin.Context) {
247219
errors := make([]error, 4)
248220
var sourceId, caseCohortId, controlCohortId int

models/cohortdata.go

+11-65
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ import (
99

1010
type CohortDataI interface {
1111
RetrieveDataBySourceIdAndCohortIdAndConceptIdsOrderedByPersonId(sourceId int, cohortDefinitionId int, conceptIds []int64) ([]*PersonConceptAndValue, error)
12-
RetrieveCohortOverlapStats(sourceId int, caseCohortId int, controlCohortId int, filterConceptId int64, filterConceptValue int64, otherFilterConceptIds []int64, filterCohortPairs []utils.CustomDichotomousVariableDef) (CohortOverlapStats, error)
1312
RetrieveCohortOverlapStatsWithoutFilteringOnConceptValue(sourceId int, caseCohortId int, controlCohortId int, otherFilterConceptIds []int64, filterCohortPairs []utils.CustomDichotomousVariableDef) (CohortOverlapStats, error)
1413
RetrieveDataByOriginalCohortAndNewCohort(sourceId int, originalCohortDefinitionId int, cohortDefinitionId int) ([]*PersonIdAndCohort, error)
1514
RetrieveHistogramDataBySourceIdAndCohortIdAndConceptIdsAndCohortPairs(sourceId int, cohortDefinitionId int, histogramConceptId int64, filterConceptIds []int64, filterCohortPairs []utils.CustomDichotomousVariableDef) ([]*PersonConceptAndValue, error)
@@ -79,97 +78,44 @@ func (h CohortData) RetrieveDataBySourceIdAndCohortIdAndConceptIdsOrderedByPerso
7978
}
8079

8180
func (h CohortData) RetrieveHistogramDataBySourceIdAndCohortIdAndConceptIdsAndCohortPairs(sourceId int, cohortDefinitionId int, histogramConceptId int64, filterConceptIds []int64, filterCohortPairs []utils.CustomDichotomousVariableDef) ([]*PersonConceptAndValue, error) {
82-
log.Printf(">> Using inner join impl. for large cohorts")
8381
var dataSourceModel = new(Source)
8482
omopDataSource := dataSourceModel.GetDataSource(sourceId, Omop)
8583

8684
resultsDataSource := dataSourceModel.GetDataSource(sourceId, Results)
8785

8886
// get the observations for the subjects and the concepts, to build up the data rows to return:
8987
var cohortData []*PersonConceptAndValue
90-
query := omopDataSource.Db.Table(omopDataSource.Schema+".observation_continuous as observation"+omopDataSource.GetViewDirective()).
88+
query := QueryFilterByCohortPairsHelper(filterCohortPairs, resultsDataSource, cohortDefinitionId, "unionAndIntersect").
9189
Select("distinct(observation.person_id), observation.observation_concept_id as concept_id, observation.value_as_number as concept_value_as_number").
92-
Joins("INNER JOIN "+resultsDataSource.Schema+".cohort as cohort ON cohort.subject_id = observation.person_id").
93-
Where("cohort.cohort_definition_id = ?", cohortDefinitionId).
90+
Joins("INNER JOIN "+omopDataSource.Schema+".observation_continuous as observation"+omopDataSource.GetViewDirective()+" ON unionAndIntersect.subject_id = observation.person_id").
9491
Where("observation.observation_concept_id = ?", histogramConceptId).
9592
Where("observation.value_as_number is not null")
9693

97-
query = QueryFilterByConceptIdsAndCohortPairsHelper(query, sourceId, filterConceptIds, filterCohortPairs, omopDataSource, resultsDataSource.Schema, "observation")
94+
query = QueryFilterByConceptIdsHelper(query, sourceId, filterConceptIds, omopDataSource, resultsDataSource.Schema, "observation")
9895

9996
meta_result := query.Scan(&cohortData)
10097
return cohortData, meta_result.Error
10198
}
10299

103-
// Assesses the overlap between case and control cohorts. It does this after filtering the cohorts and keeping only
104-
// the persons that have data for each of the selected conceptIds and match the filterConceptId/filterConceptValue criteria.
105-
func (h CohortData) RetrieveCohortOverlapStats(sourceId int, caseCohortId int, controlCohortId int,
106-
filterConceptId int64, filterConceptValue int64, otherFilterConceptIds []int64, filterCohortPairs []utils.CustomDichotomousVariableDef) (CohortOverlapStats, error) {
107-
108-
var dataSourceModel = new(Source)
109-
omopDataSource := dataSourceModel.GetDataSource(sourceId, Omop)
110-
resultsDataSource := dataSourceModel.GetDataSource(sourceId, Results)
111-
112-
// count persons that are in the intersection of both case and control cohorts, filtering on filterConceptValue:
113-
var cohortOverlapStats CohortOverlapStats
114-
query := omopDataSource.Db.Table(omopDataSource.Schema+".observation_continuous as observation"+omopDataSource.GetViewDirective()).
115-
Select("count(distinct(observation.person_id)) as case_control_overlap").
116-
Joins("INNER JOIN "+resultsDataSource.Schema+".cohort as case_cohort ON case_cohort.subject_id = observation.person_id").
117-
Joins("INNER JOIN "+resultsDataSource.Schema+".cohort as control_cohort ON control_cohort.subject_id = case_cohort.subject_id"). // this one allows for the intersection between case and control and the assessment of the overlap
118-
Where("case_cohort.cohort_definition_id = ?", caseCohortId).
119-
Where("control_cohort.cohort_definition_id = ?", controlCohortId).
120-
Where("observation.observation_concept_id = ?", filterConceptId).
121-
Where("observation.value_as_concept_id = ?", filterConceptValue)
122-
123-
query = QueryFilterByConceptIdsAndCohortPairsHelper(query, sourceId, otherFilterConceptIds, filterCohortPairs, omopDataSource, resultsDataSource.Schema, "observation")
124-
125-
meta_result := query.Scan(&cohortOverlapStats)
126-
return cohortOverlapStats, meta_result.Error
127-
}
128-
129100
// Basically the same as the method above, but without the extra filtering on filterConceptId and filterConceptValue:
130101
func (h CohortData) RetrieveCohortOverlapStatsWithoutFilteringOnConceptValue(sourceId int, caseCohortId int, controlCohortId int,
131102
otherFilterConceptIds []int64, filterCohortPairs []utils.CustomDichotomousVariableDef) (CohortOverlapStats, error) {
132103

133-
// special case for when filter lists are empty:
134-
if len(otherFilterConceptIds) == 0 && len(filterCohortPairs) == 0 {
135-
// call the faster version of overlap check:
136-
return h.RetrieveCohortOverlapStatsWithoutFiltering(sourceId, caseCohortId, controlCohortId)
137-
}
138-
139-
var dataSourceModel = new(Source)
140-
omopDataSource := dataSourceModel.GetDataSource(sourceId, Omop)
141-
resultsDataSource := dataSourceModel.GetDataSource(sourceId, Results)
142-
143-
// count persons that are in the intersection of both case and control cohorts, filtering on filterConceptValue:
144-
var cohortOverlapStats CohortOverlapStats
145-
query := omopDataSource.Db.Table(omopDataSource.Schema+".observation_continuous as observation"+omopDataSource.GetViewDirective()).
146-
Select("count(distinct(observation.person_id)) as case_control_overlap").
147-
Joins("INNER JOIN "+resultsDataSource.Schema+".cohort as case_cohort ON case_cohort.subject_id = observation.person_id").
148-
Joins("INNER JOIN "+resultsDataSource.Schema+".cohort as control_cohort ON control_cohort.subject_id = case_cohort.subject_id"). // this one allows for the intersection between case and control and the assessment of the overlap
149-
Where("case_cohort.cohort_definition_id = ?", caseCohortId).
150-
Where("control_cohort.cohort_definition_id = ?", controlCohortId)
151-
152-
query = QueryFilterByConceptIdsAndCohortPairsHelper(query, sourceId, otherFilterConceptIds, filterCohortPairs, omopDataSource, resultsDataSource.Schema, "observation")
153-
154-
meta_result := query.Scan(&cohortOverlapStats)
155-
return cohortOverlapStats, meta_result.Error
156-
}
157-
158-
// Basically the same as the method above, but without any filtering on any concepts or on any CustomDichotomousVariableDef:
159-
func (h CohortData) RetrieveCohortOverlapStatsWithoutFiltering(sourceId int, caseCohortId int, controlCohortId int) (CohortOverlapStats, error) {
160-
161104
var dataSourceModel = new(Source)
162105
omopDataSource := dataSourceModel.GetDataSource(sourceId, Omop)
163106
resultsDataSource := dataSourceModel.GetDataSource(sourceId, Results)
164107

165108
// count persons that are in the intersection of both case and control cohorts, filtering on filterConceptValue:
166109
var cohortOverlapStats CohortOverlapStats
167-
query := omopDataSource.Db.Table(resultsDataSource.Schema+".cohort as case_cohort").
168-
Select("count(distinct(case_cohort.subject_id)) as case_control_overlap").
169-
Joins("INNER JOIN "+resultsDataSource.Schema+".cohort as control_cohort ON control_cohort.subject_id = case_cohort.subject_id"). // this one allows for the intersection between case and control and the assessment of the overlap
170-
Where("case_cohort.cohort_definition_id = ?", caseCohortId).
171-
Where("control_cohort.cohort_definition_id = ?", controlCohortId)
110+
query := QueryFilterByCohortPairsHelper(filterCohortPairs, resultsDataSource, caseCohortId, "case_cohort_unionedAndIntersectedWithFilters").
111+
Select("count(distinct(case_cohort_unionedAndIntersectedWithFilters.subject_id)) as case_control_overlap").
112+
Joins("INNER JOIN " + resultsDataSource.Schema + ".cohort as control_cohort ON control_cohort.subject_id = case_cohort_unionedAndIntersectedWithFilters.subject_id") // this one allows for the intersection between case and control and the assessment of the overlap
172113

114+
if len(otherFilterConceptIds) > 0 {
115+
query = query.Joins("INNER JOIN " + omopDataSource.Schema + ".observation_continuous as observation" + omopDataSource.GetViewDirective() + " ON control_cohort.subject_id = observation.person_id")
116+
query = QueryFilterByConceptIdsHelper(query, sourceId, otherFilterConceptIds, omopDataSource, resultsDataSource.Schema, "observation")
117+
}
118+
query = query.Where("control_cohort.cohort_definition_id = ?", controlCohortId)
173119
meta_result := query.Scan(&cohortOverlapStats)
174120
return cohortOverlapStats, meta_result.Error
175121
}

models/concept.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ func (h Concept) RetrieveBreakdownStatsBySourceIdAndCohortIdAndConceptIdsAndCoho
225225

226226
// note: here we pass empty []utils.CustomDichotomousVariableDef{} instead of filterCohortPairs, since we already use the SQL generated by QueryFilterByCohortPairsHelper above,
227227
// which is a better performing SQL in this particular scenario:
228-
query = QueryFilterByConceptIdsAndCohortPairsHelper(query, sourceId, filterConceptIds, []utils.CustomDichotomousVariableDef{}, omopDataSource, resultsDataSource.Schema, "observation")
228+
query = QueryFilterByConceptIdsHelper(query, sourceId, filterConceptIds, omopDataSource, resultsDataSource.Schema, "observation")
229229

230230
meta_result := query.Group("observation.value_as_concept_id").
231231
Scan(&conceptBreakdownList)

models/helper.go

+3-23
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,10 @@ import (
88
"gorm.io/gorm"
99
)
1010

11-
// Helper function that adds extra filter clauses to the query, joining on the right set of tables, excluding data where necessary, etc.
11+
// Helper function that adds extra filter clauses to the query, joining on the right set of tables.
1212
// * It was added here to make it reusable, given these filters need to be added to many of the queries that take in
13-
// a list of filters in the form of concept ids and cohort pairs. The one assumption it makes is that the given `query` object already contains
14-
// a basic query on a table or view that have been named or aliased as "observation" (see comments in code). This assumption is
15-
// checked at the start.
16-
func QueryFilterByConceptIdsAndCohortPairsHelper(query *gorm.DB, sourceId int, filterConceptIds []int64, filterCohortPairs []utils.CustomDichotomousVariableDef,
13+
// a list of filters in the form of concept ids.
14+
func QueryFilterByConceptIdsHelper(query *gorm.DB, sourceId int, filterConceptIds []int64,
1715
omopDataSource *utils.DbAndSchema, resultSchemaName string, mainObservationTableAlias string) *gorm.DB {
1816
// iterate over the filterConceptIds, adding a new INNER JOIN and filters for each, so that the resulting set is the
1917
// set of persons that have a non-null value for each and every one of the concepts:
@@ -24,24 +22,6 @@ func QueryFilterByConceptIdsAndCohortPairsHelper(query *gorm.DB, sourceId int, f
2422
Where(observationTableAlias+".observation_concept_id = ?", filterConceptId).
2523
Where(GetConceptValueNotNullCheckBasedOnConceptType(observationTableAlias, sourceId, filterConceptId))
2624
}
27-
// iterate over the list of filterCohortPairs, adding a new INNER JOIN to the UNION of each pair, so that the resulting set is the
28-
// set of persons that are part of the intersections above and of one of the cohorts in the filterCohortPairs:
29-
for i, filterCohortPair := range filterCohortPairs {
30-
cohortTableAlias1 := fmt.Sprintf("cohort_filter_1_%d", i)
31-
cohortTableAlias2 := fmt.Sprintf("cohort_filter_2_%d", i)
32-
cohortTableAlias3 := fmt.Sprintf("cohort_filter_3_%d", i)
33-
unionExceptAlias := fmt.Sprintf("union_%d", i)
34-
log.Printf("Adding extra INNER JOIN on UNION and EXCEPT with alias %s", unionExceptAlias)
35-
query = query.Joins(
36-
"INNER JOIN "+
37-
" (Select "+cohortTableAlias1+".subject_id FROM "+resultSchemaName+".cohort as "+cohortTableAlias1+
38-
" where "+cohortTableAlias1+".cohort_definition_id in (?,?) "+ //the UNION of both cohorts
39-
" EXCEPT "+ //now use EXCEPT to exclude the part where both cohorts INTERSECT
40-
" Select "+cohortTableAlias2+".subject_id FROM "+resultSchemaName+".cohort as "+cohortTableAlias2+
41-
" INNER JOIN "+resultSchemaName+".cohort as "+cohortTableAlias3+" ON "+cohortTableAlias3+".subject_id = "+cohortTableAlias2+".subject_id "+
42-
" where "+cohortTableAlias2+".cohort_definition_id = ? AND "+cohortTableAlias3+".cohort_definition_id =? ) AS "+unionExceptAlias+" ON "+unionExceptAlias+".subject_id = "+mainObservationTableAlias+".person_id",
43-
filterCohortPair.CohortId1, filterCohortPair.CohortId2, filterCohortPair.CohortId1, filterCohortPair.CohortId2)
44-
}
4525
return query
4626
}
4727

openapis/swagger.yaml

-91
Original file line numberDiff line numberDiff line change
@@ -899,97 +899,6 @@ paths:
899899
description: 'Get statistics for size of given cohort id, broken down by concept values of the given breakdown concept id'
900900
tags:
901901
- data
902-
'/cohort-stats/check-overlap/by-source-id/{sourceid}/by-case-control-cohort-definition-ids/{casecohortid}/{controlcohortid}/filter-by-concept-id-and-value/{filterconceptid}/{filtervalue}':
903-
parameters:
904-
- $ref: '#/components/parameters/sourceid'
905-
- $ref: '#/components/parameters/casecohortid'
906-
- $ref: '#/components/parameters/controlcohortid'
907-
- $ref: '#/components/parameters/filterconceptid'
908-
- $ref: '#/components/parameters/filtervalue'
909-
post:
910-
summary: 'Post /cohort-stats/check-overlap/by-source-id/{sourceid}/by-case-control-cohort-definition-ids/{casecohortid}/{controlcohortid}/filter-by-concept-id-and-value/{filterconceptid}/{filtervalue}'
911-
operationId: post-cohort-stats-check-overlap-by-source-id-by-case-control-cohort-definition-ids-filter-by-concept-id-and-value
912-
security:
913-
- Bearer: []
914-
requestBody:
915-
content:
916-
application/json:
917-
schema:
918-
type: object
919-
properties:
920-
variables:
921-
type: array
922-
items:
923-
type: object
924-
examples:
925-
example-1:
926-
value:
927-
variables:
928-
- variable_type: concept
929-
concept_id: 2000000324
930-
- variable_type: concept
931-
concept_id: 2000006885
932-
- variable_type: custom_dichotomous
933-
cohort_ids:
934-
- 1
935-
- 2
936-
responses:
937-
'200':
938-
description: OK
939-
content:
940-
application/json:
941-
schema:
942-
description: ''
943-
type: object
944-
properties:
945-
cohort_overlap:
946-
type: object
947-
properties:
948-
case_control_overlap:
949-
type: number
950-
required:
951-
- cohort_overlap
952-
'400':
953-
description: Bad Request
954-
content:
955-
application/json:
956-
schema:
957-
description: ''
958-
type: object
959-
properties:
960-
message:
961-
type: string
962-
minLength: 1
963-
enum:
964-
- bad request - no request body
965-
- bad request
966-
example: bad request
967-
required:
968-
- message
969-
'401':
970-
description: Unauthorized
971-
'500':
972-
description: Internal Server Error
973-
content:
974-
application/json:
975-
schema:
976-
description: ''
977-
type: object
978-
properties:
979-
message:
980-
type: string
981-
minLength: 1
982-
enum:
983-
- Error retrieving concept details
984-
error:
985-
type: string
986-
minLength: 1
987-
required:
988-
- message
989-
- error
990-
description: Assesses the overlap between any two cohorts (here just labelled as "case" and "control" - but could have been "A" and "B"). It does this after filtering the cohorts and keeping only the persons that have data for each of the selected conceptIds and match the filterConceptId/filterConceptValue criterion.
991-
tags:
992-
- data
993902
'/cohort-stats/check-overlap/by-source-id/{sourceid}/by-cohort-definition-ids/{casecohortid}/{controlcohortid}':
994903
parameters:
995904
- name: sourceid

server/router.go

-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@ func NewRouter() *gin.Engine {
4949
// cohort stats and checks:
5050
cohortData := controllers.NewCohortDataController(*new(models.CohortData))
5151
// :casecohortid/:controlcohortid are just labels here and have no special meaning. Could also just be :cohortAId/:cohortBId here:
52-
authorized.POST("/cohort-stats/check-overlap/by-source-id/:sourceid/by-case-control-cohort-definition-ids/:casecohortid/:controlcohortid/filter-by-concept-id-and-value/:filterconceptid/:filtervalue", cohortData.RetrieveCohortOverlapStats)
5352
authorized.POST("/cohort-stats/check-overlap/by-source-id/:sourceid/by-cohort-definition-ids/:casecohortid/:controlcohortid", cohortData.RetrieveCohortOverlapStatsWithoutFilteringOnConceptValue)
5453

5554
// full data endpoints:

0 commit comments

Comments
 (0)