Skip to content

Commit 648f1bb

Browse files
committed
feat: refactor RetrieveDataBySourceIdAndCohortIdAndVariables
... and deprecate model method in favor of the histogram one that retrieves the same data but is already refactored.
1 parent c41437e commit 648f1bb

File tree

4 files changed

+47
-163
lines changed

4 files changed

+47
-163
lines changed

controllers/cohortdata.go

+27-26
Original file line numberDiff line numberDiff line change
@@ -109,29 +109,15 @@ func (u CohortDataController) RetrieveStatsForCohortIdAndConceptId(c *gin.Contex
109109
func (u CohortDataController) RetrieveDataBySourceIdAndCohortIdAndVariables(c *gin.Context) {
110110
// TODO - add some validation to ensure that only calls from Argo are allowed through since it outputs FULL data?
111111
// -> this concern is considered to be addressed by https://github.com/uc-cdis/cloud-automation/pull/1884
112-
113-
// parse and validate all parameters:
114-
sourceIdStr := c.Param("sourceid")
115-
log.Printf("Querying source: %s", sourceIdStr)
116-
cohortIdStr := c.Param("cohortid")
117-
log.Printf("Querying cohort for cohort definition id: %s", cohortIdStr)
118-
if sourceIdStr == "" || cohortIdStr == "" {
119-
c.JSON(http.StatusBadRequest, gin.H{"message": "bad request"})
120-
c.Abort()
121-
return
122-
}
123-
124-
conceptIdsAndValues, cohortPairs, err := utils.ParseConceptDefsAndDichotomousDefs(c)
125-
conceptIds := utils.ExtractConceptIdsFromCustomConceptVariablesDef(conceptIdsAndValues)
126-
112+
sourceId, cohortId, conceptDefsAndCohortPairs, err := utils.ParseSourceIdAndCohortIdAndVariablesAsSingleList(c)
127113
if err != nil {
128-
c.JSON(http.StatusInternalServerError, gin.H{"message": "Error parsing request body for prefixed concept ids and dichotomous Ids", "error": err.Error()})
114+
c.JSON(http.StatusBadRequest, gin.H{"message": "bad request", "error": err.Error()})
129115
c.Abort()
130116
return
131117
}
132118

133-
sourceId, _ := strconv.Atoi(sourceIdStr)
134-
cohortId, _ := strconv.Atoi(cohortIdStr)
119+
// parse cohortPairs separately as well, so we can validate permissions
120+
conceptDefs, cohortPairs := utils.GetConceptDefsAndValuesAndCohortPairsAsSeparateLists(conceptDefsAndCohortPairs)
135121

136122
validAccessRequest := u.teamProjectAuthz.TeamProjectValidation(c, []int{cohortId}, cohortPairs)
137123
if !validAccessRequest {
@@ -141,17 +127,32 @@ func (u CohortDataController) RetrieveDataBySourceIdAndCohortIdAndVariables(c *g
141127
return
142128
}
143129

144-
// call model method:
145-
cohortData, err := u.cohortDataModel.RetrieveDataBySourceIdAndCohortIdAndConceptIdsOrderedByPersonId(sourceId, cohortId, conceptIds)
146-
if err != nil {
147-
c.JSON(http.StatusInternalServerError, gin.H{"message": "Error retrieving concept details", "error": err.Error()})
148-
c.Abort()
149-
return
130+
// Iterate over conceptDefsAndCohortPairs and collect the concept values for each person:
131+
// {PersonId:1, ConceptId:1, ConceptValue: "A value with, comma!"},
132+
// {PersonId:1, ConceptId:2, ConceptValue: B},
133+
// {PersonId:2, ConceptId:1, ConceptValue: C},
134+
var variablesToQuery []interface{}
135+
var finalConceptDataset []*models.PersonConceptAndValue
136+
for _, item := range conceptDefsAndCohortPairs {
137+
variablesToQuery = append(variablesToQuery, item)
138+
// if item is of type CustomConceptVariableDef, get the data:
139+
if _, ok := item.(utils.CustomConceptVariableDef); ok {
140+
// use variablesToQuery to query an increasingly tight set (simulating the attrition table that generated this query)
141+
cohortData, err := u.cohortDataModel.RetrieveHistogramDataBySourceIdAndCohortIdAndConceptDefsPlusCohortPairs(sourceId, cohortId, variablesToQuery)
142+
if err != nil {
143+
c.JSON(http.StatusInternalServerError, gin.H{"message": "Error retrieving concept details", "error": err.Error()})
144+
c.Abort()
145+
return
146+
}
147+
// add to final concept data set:
148+
finalConceptDataset = append(finalConceptDataset, cohortData...)
149+
}
150150
}
151151

152-
partialCSV := GeneratePartialCSV(sourceId, cohortData, conceptIds)
152+
conceptIds := utils.ExtractConceptIdsFromCustomConceptVariablesDef(conceptDefs)
153+
partialCSV := GeneratePartialCSV(sourceId, finalConceptDataset, conceptIds) // use conceptdefs to improve column description? nah...no person is reading this table....just needs to be unique
153154

154-
personIdToCSVValues, err := u.RetrievePeopleIdAndCohort(sourceId, cohortId, cohortPairs, cohortData)
155+
personIdToCSVValues, err := u.RetrievePeopleIdAndCohort(sourceId, cohortId, cohortPairs, finalConceptDataset)
155156
if err != nil {
156157
c.JSON(http.StatusInternalServerError, gin.H{"message": "Error retrieving people ID to csv value map", "error": err.Error()})
157158
c.Abort()

models/cohortdata.go

-27
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ import (
99
)
1010

1111
type CohortDataI interface {
12-
RetrieveDataBySourceIdAndCohortIdAndConceptIdsOrderedByPersonId(sourceId int, cohortDefinitionId int, conceptIds []int64) ([]*PersonConceptAndValue, error)
1312
RetrieveCohortOverlapStats(sourceId int, caseCohortId int, controlCohortId int, filterConceptDefsAndCohortPairs []interface{}) (CohortOverlapStats, error)
1413
RetrieveDataByOriginalCohortAndNewCohort(sourceId int, originalCohortDefinitionId int, cohortDefinitionId int) ([]*PersonIdAndCohort, error)
1514
RetrieveHistogramDataBySourceIdAndCohortIdAndConceptDefsPlusCohortPairs(sourceId int, cohortDefinitionId int, filterConceptDefsAndCohortPairs []interface{}) ([]*PersonConceptAndValue, error)
@@ -72,32 +71,6 @@ func (h CohortData) RetrieveDataByOriginalCohortAndNewCohort(sourceId int, origi
7271
return personData, meta_result.Error
7372
}
7473

75-
// Retrieves observation data.
76-
// Assumption is that both OMOP and RESULTS schemas
77-
// are on same DB.
78-
func (h CohortData) RetrieveDataBySourceIdAndCohortIdAndConceptIdsOrderedByPersonId(sourceId int, cohortDefinitionId int, conceptIds []int64) ([]*PersonConceptAndValue, error) {
79-
log.Printf(">> Using inner join impl. for large cohorts")
80-
var dataSourceModel = new(Source)
81-
omopDataSource := dataSourceModel.GetDataSource(sourceId, Omop)
82-
83-
resultsDataSource := dataSourceModel.GetDataSource(sourceId, Results)
84-
85-
// get the observations for the subjects and the concepts, to build up the data rows to return:
86-
var cohortData []*PersonConceptAndValue
87-
query := omopDataSource.Db.Table(omopDataSource.Schema+".observation_continuous as observation"+omopDataSource.GetViewDirective()).
88-
Select("observation.person_id, observation.observation_concept_id as concept_id, concept.concept_class_id, value_as_concept.concept_name as observation_value_as_concept_name, observation.value_as_number as concept_value_as_number, observation.value_as_concept_id as concept_value_as_concept_id").
89-
Joins("INNER JOIN "+resultsDataSource.Schema+".cohort as cohort ON cohort.subject_id = observation.person_id").
90-
Joins("INNER JOIN "+omopDataSource.Schema+".concept as concept ON concept.concept_id = observation.observation_concept_id").
91-
Joins("LEFT JOIN "+omopDataSource.Schema+".concept as value_as_concept ON value_as_concept.concept_id = observation.value_as_concept_id").
92-
Where("cohort.cohort_definition_id = ?", cohortDefinitionId).
93-
Where("observation.observation_concept_id in (?)", conceptIds).
94-
Order("observation.person_id asc") // this order is important!
95-
query, cancel := utils.AddTimeoutToQuery(query)
96-
defer cancel()
97-
meta_result := query.Scan(&cohortData)
98-
return cohortData, meta_result.Error
99-
}
100-
10174
func (h CohortData) RetrieveHistogramDataBySourceIdAndCohortIdAndConceptDefsPlusCohortPairs(sourceId int, cohortDefinitionId int, filterConceptDefsAndCohortPairs []interface{}) ([]*PersonConceptAndValue, error) {
10275
var dataSourceModel = new(Source)
10376
omopDataSource := dataSourceModel.GetDataSource(sourceId, Omop)

tests/controllers_tests/controllers_test.go

+20-11
Original file line numberDiff line numberDiff line change
@@ -67,19 +67,14 @@ var cohortDefinitionControllerWithFailingTeamProjectAuthz = controllers.NewCohor
6767

6868
type dummyCohortDataModel struct{}
6969

70-
func (h dummyCohortDataModel) RetrieveDataBySourceIdAndCohortIdAndConceptIdsOrderedByPersonId(sourceId int, cohortDefinitionId int, conceptIds []int64) ([]*models.PersonConceptAndValue, error) {
71-
value := float32(0.0)
70+
func (h dummyCohortDataModel) RetrieveHistogramDataBySourceIdAndCohortIdAndConceptDefsPlusCohortPairs(sourceId int, cohortDefinitionId int, filterConceptDefsAndCohortPairs []interface{}) ([]*models.PersonConceptAndValue, error) {
71+
value := float32(5.1)
7272
cohortData := []*models.PersonConceptAndValue{
73-
{PersonId: 1, ConceptId: 10, ConceptClassId: "something", ObservationValueAsConceptName: "abc", ConceptValueAsNumber: &value},
73+
{PersonId: 1, ConceptId: 2000000324, ConceptClassId: "something", ObservationValueAsConceptName: "abc", ConceptValueAsNumber: &value},
7474
}
7575
return cohortData, nil
7676
}
7777

78-
func (h dummyCohortDataModel) RetrieveHistogramDataBySourceIdAndCohortIdAndConceptDefsPlusCohortPairs(sourceId int, cohortDefinitionId int, filterConceptDefsAndCohortPairs []interface{}) ([]*models.PersonConceptAndValue, error) {
79-
cohortData := []*models.PersonConceptAndValue{}
80-
return cohortData, nil
81-
}
82-
8378
func (h dummyCohortDataModel) RetrieveHistogramDataBySourceIdAndConceptId(sourceId int, histogramConceptId int64) ([]*models.PersonConceptAndValue, error) {
8479

8580
cohortData := []*models.PersonConceptAndValue{}
@@ -388,16 +383,30 @@ func TestRetrieveDataBySourceIdAndCohortIdAndVariablesCorrectParams(t *testing.T
388383
requestContext.Params = append(requestContext.Params, gin.Param{Key: "cohortid", Value: "1"})
389384
requestContext.Writer = new(tests.CustomResponseWriter)
390385
requestContext.Request = new(http.Request)
391-
requestBody := "{\"variables\":[{\"variable_type\": \"concept\", \"concept_id\": 2000000324},{\"variable_type\": \"custom_dichotomous\", \"cohort_ids\": [1, 3]}]}"
386+
requestBody := "{\"variables\":[{\"variable_type\": \"concept\", \"concept_id\": 2000000324},{\"variable_type\": \"custom_dichotomous\", \"cohort_ids\": [2, 3]}]}"
392387
requestContext.Request.Body = io.NopCloser(strings.NewReader(requestBody))
393388
cohortDataController.RetrieveDataBySourceIdAndCohortIdAndVariables(requestContext)
394389
// Params above are correct, so request should NOT abort:
395390
if requestContext.IsAborted() {
396391
t.Errorf("Did not expect this request to abort")
397392
}
398393
result := requestContext.Writer.(*tests.CustomResponseWriter)
399-
if !strings.Contains(result.CustomResponseWriterOut, "sample.id,") {
400-
t.Errorf("Expected output starting with 'sample.id,...'")
394+
expectedOut := "sample.id,ID_2000000324,ID_2_3\n1,abc,0\n"
395+
if !strings.Contains(result.CustomResponseWriterOut, expectedOut) {
396+
t.Errorf("Expected output to contain %s", expectedOut)
397+
}
398+
// test with different data
399+
requestBody = "{\"variables\":[{\"variable_type\": \"concept\", \"concept_id\": 1111111},{\"variable_type\": \"custom_dichotomous\", \"cohort_ids\": [1, 3]}]}"
400+
requestContext.Request.Body = io.NopCloser(strings.NewReader(requestBody))
401+
cohortDataController.RetrieveDataBySourceIdAndCohortIdAndVariables(requestContext)
402+
// Params above are correct, so request should NOT abort:
403+
if requestContext.IsAborted() {
404+
t.Errorf("Did not expect this request to abort")
405+
}
406+
result = requestContext.Writer.(*tests.CustomResponseWriter)
407+
expectedOut = "sample.id,ID_1111111,ID_1_3\n1,NA,NA\n"
408+
if !strings.Contains(result.CustomResponseWriterOut, expectedOut) {
409+
t.Errorf("Expected output to contain %s", expectedOut)
401410
}
402411

403412
// the same request should fail if the teamProject authorization fails:

tests/models_tests/models_test.go

-99
Original file line numberDiff line numberDiff line change
@@ -1239,105 +1239,6 @@ func TestQueryFilterByConceptDefsHelper(t *testing.T) {
12391239
}
12401240
}
12411241

1242-
func TestRetrieveDataBySourceIdAndCohortIdAndConceptIdsOrderedByPersonId(t *testing.T) {
1243-
setUp(t)
1244-
cohortDefinitions, _ := cohortDefinitionModel.GetAllCohortDefinitionsAndStatsOrderBySizeDesc(testSourceId, defaultTeamProject)
1245-
var sumNumeric float32 = 0
1246-
textConcat := ""
1247-
classIdConcat := ""
1248-
foundConceptValueAsNumberAsNil := false
1249-
for _, cohortDefinition := range cohortDefinitions {
1250-
1251-
cohortData, _ := cohortDataModel.RetrieveDataBySourceIdAndCohortIdAndConceptIdsOrderedByPersonId(
1252-
testSourceId, cohortDefinition.Id, allConceptIds)
1253-
1254-
// count nr observation records for cohort through an independent simpler query:
1255-
totalObservationsCohort := tests.GetCountWhere(tests.GetOmopDataSourceForSourceId(tests.GetTestSourceId()), "observation",
1256-
fmt.Sprintf("observation_concept_id > 0 and person_id in (Select b.subject_id from %s.cohort b where b.cohort_definition_id = %d)",
1257-
tests.GetResultsDataSource().Schema, cohortDefinition.Id))
1258-
1259-
// 1- the number of items in cohortsData and totalObservationsCohort should match:
1260-
if int64(len(cohortData)) != totalObservationsCohort {
1261-
t.Errorf("Expected %d observations, found %d", totalObservationsCohort, len(cohortData))
1262-
}
1263-
1264-
// 2- cohortData items > 0, assuming each cohort has a person wit at least one observation
1265-
if len(cohortData) <= 0 {
1266-
t.Errorf("Expected some cohort data")
1267-
}
1268-
// 3- check data size. Max size, if all persons have data for each concept, is cohort size x len(allConceptIds):
1269-
if len(cohortData) > cohortDefinition.CohortSize*len(allConceptIds) {
1270-
t.Errorf("Cohort data size larger than expected. Expected < %d, got %d",
1271-
cohortDefinition.CohortSize*len(allConceptIds), len(cohortData))
1272-
}
1273-
1274-
var previousPersonId int64 = -1
1275-
defaultHareValue := map[string]bool{"non-Hispanic Asian": true, "non-Hispanic Black": true, "non-Hispanic White": true, "Hispanic": true}
1276-
emptyDataCounter := 0
1277-
for _, cohortDatum := range cohortData {
1278-
// check for order: person_id is not smaller than previous person_id
1279-
if cohortDatum.PersonId < previousPersonId {
1280-
t.Errorf("Data not ordered by person_id!")
1281-
}
1282-
1283-
if cohortDatum.ConceptId == 2000007027 {
1284-
//Only one row has empty data
1285-
if !defaultHareValue[cohortDatum.ObservationValueAsConceptName] {
1286-
if emptyDataCounter == 0 && cohortDatum.ObservationValueAsConceptName == "" {
1287-
emptyDataCounter++
1288-
} else {
1289-
t.Errorf("Did not get concept value name correctly!")
1290-
}
1291-
}
1292-
}
1293-
1294-
previousPersonId = cohortDatum.PersonId
1295-
if cohortDatum.ConceptValueAsNumber != nil {
1296-
sumNumeric += *cohortDatum.ConceptValueAsNumber
1297-
} else {
1298-
foundConceptValueAsNumberAsNil = true
1299-
}
1300-
textConcat += cohortDatum.ObservationValueAsConceptName
1301-
classIdConcat += cohortDatum.ConceptClassId
1302-
}
1303-
}
1304-
// check for data: sum of all numeric values > 0
1305-
if sumNumeric == 0 {
1306-
t.Errorf("Expected some numeric cohort data")
1307-
}
1308-
// check for data: concat of all string values != ""
1309-
if textConcat == "" {
1310-
t.Errorf("Expected some string cohort data")
1311-
}
1312-
// check for data: some concepts have class id, so this should not be empty
1313-
if classIdConcat == "" {
1314-
t.Errorf("Expected query to return concept class id information")
1315-
}
1316-
// check if some numeric values were nil as expected:
1317-
if foundConceptValueAsNumberAsNil == false {
1318-
t.Errorf("Expected query to return some nil values for ConceptValueAsNumber")
1319-
}
1320-
1321-
}
1322-
1323-
func TestErrorForRetrieveDataBySourceIdAndCohortIdAndConceptIdsOrderedByPersonId(t *testing.T) {
1324-
// Tests if the method returns an error when query fails.
1325-
1326-
cohortDefinitions, _ := cohortDefinitionModel.GetAllCohortDefinitionsAndStatsOrderBySizeDesc(testSourceId, defaultTeamProject)
1327-
1328-
// break something in the Results schema to cause a query failure in the next method:
1329-
tests.BreakSomething(models.Results, "cohort", "cohort_definition_id")
1330-
// set last action to restore back:
1331-
// run test:
1332-
_, error := cohortDataModel.RetrieveDataBySourceIdAndCohortIdAndConceptIdsOrderedByPersonId(
1333-
testSourceId, cohortDefinitions[0].Id, allConceptIds)
1334-
if error == nil {
1335-
t.Errorf("Expected error")
1336-
}
1337-
// revert the broken part:
1338-
tests.FixSomething(models.Results, "cohort", "cohort_definition_id")
1339-
}
1340-
13411242
func TestRetrieveCohortOverlapStats(t *testing.T) {
13421243
// Tests if we get the expected overlap
13431244
setUp(t)

0 commit comments

Comments
 (0)