Skip to content

Commit d24648a

Browse files
authored
Add missing POC clinical data binning function (cBioPortal#10778)
* add missing poc clinical data binning function
1 parent 66b058c commit d24648a

File tree

5 files changed

+304
-96
lines changed

5 files changed

+304
-96
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
package org.cbioportal.web.columnar;
2+
3+
import org.cbioportal.model.*;
4+
import org.cbioportal.service.StudyViewColumnarService;
5+
import org.cbioportal.web.columnar.util.NewClinicalDataBinUtil;
6+
import org.cbioportal.web.parameter.*;
7+
import org.cbioportal.web.util.DataBinner;
8+
import org.springframework.beans.factory.annotation.Autowired;
9+
import org.springframework.stereotype.Component;
10+
11+
import java.util.*;
12+
import java.util.stream.Collectors;
13+
import java.util.stream.Stream;
14+
15+
@Component
16+
public class ClinicalDataBinner {
17+
private final StudyViewColumnarService studyViewColumnarService;
18+
private final DataBinner dataBinner;
19+
20+
@Autowired
21+
public ClinicalDataBinner(
22+
StudyViewColumnarService studyViewColumnarService,
23+
DataBinner dataBinner
24+
) {
25+
this.studyViewColumnarService = studyViewColumnarService;
26+
this.dataBinner = dataBinner;
27+
}
28+
29+
public List<ClinicalDataBin> fetchClinicalDataBinCounts(
30+
DataBinMethod dataBinMethod,
31+
ClinicalDataBinCountFilter dataBinCountFilter,
32+
boolean shouldRemoveSelfFromFilter
33+
) {
34+
List<ClinicalDataBinFilter> attributes = dataBinCountFilter.getAttributes();
35+
StudyViewFilter studyViewFilter = dataBinCountFilter.getStudyViewFilter();
36+
37+
if (shouldRemoveSelfFromFilter) {
38+
studyViewFilter = NewClinicalDataBinUtil.removeSelfFromFilter(dataBinCountFilter);
39+
}
40+
41+
List<String> attributeIds = attributes.stream().map(ClinicalDataBinFilter::getAttributeId).collect(Collectors.toList());
42+
43+
// a new StudyView filter to partially filter by study and sample ids only
44+
StudyViewFilter partialFilter = new StudyViewFilter();
45+
partialFilter.setStudyIds(studyViewFilter.getStudyIds());
46+
partialFilter.setSampleIdentifiers(studyViewFilter.getSampleIdentifiers());
47+
48+
// filter only by study id and sample identifiers, ignore rest
49+
// we need this additional partial filter because we always need to know the bins generated for the initial state
50+
// which allows us to keep the number of bins and bin ranges consistent even if there are additional data filters.
51+
// we only want to update the counts for each bin, we don't want to regenerate the bins for the filtered data.
52+
// NOTE: partial filter is only needed when dataBinMethod == DataBinMethod.STATIC but that's always the case
53+
// for the frontend implementation. we can't really use dataBinMethod == DataBinMethod.DYNAMIC because of the
54+
// complication it brings to the frontend visualization and filtering
55+
List<Sample> unfilteredSamples = studyViewColumnarService.getFilteredSamples(partialFilter);
56+
List<Sample> filteredSamples = studyViewColumnarService.getFilteredSamples(studyViewFilter);
57+
58+
// TODO make sure unique sample and patient keys don't need to be distinct
59+
List<String> unfilteredUniqueSampleKeys = unfilteredSamples.stream().map(Sample::getUniqueSampleKey).collect(Collectors.toList());
60+
List<String> filteredUniqueSampleKeys = filteredSamples.stream().map(Sample::getUniqueSampleKey).collect(Collectors.toList());
61+
List<String> unfilteredUniquePatientKeys = unfilteredSamples.stream().map(Sample::getUniquePatientKey).collect(Collectors.toList());
62+
List<String> filteredUniquePatientKeys = filteredSamples.stream().map(Sample::getUniquePatientKey).collect(Collectors.toList());
63+
64+
// TODO make sure we don't need a distinction between sample vs patient attribute ids here
65+
// ideally we shouldn't because we have patient clinical data separated from sample clinical data in clickhouse
66+
67+
// we need the clinical data for the partial filter in order to generate the bins for initial state
68+
// we use the filtered data to calculate the counts for each bin, we do not regenerate bins for the filtered data
69+
List<ClinicalData> unfilteredClinicalDataForSamples = studyViewColumnarService.getSampleClinicalData(partialFilter, attributeIds);
70+
List<ClinicalData> filteredClinicalDataForSamples = studyViewColumnarService.getSampleClinicalData(studyViewFilter, attributeIds);
71+
List<ClinicalData> unfilteredClinicalDataForPatients = studyViewColumnarService.getPatientClinicalData(partialFilter, attributeIds);
72+
List<ClinicalData> filteredClinicalDataForPatients = studyViewColumnarService.getPatientClinicalData(studyViewFilter, attributeIds);
73+
74+
Map<String, ClinicalDataType> attributeDatatypeMap = NewClinicalDataBinUtil.toAttributeDatatypeMap(
75+
unfilteredClinicalDataForSamples.stream().map(ClinicalData::getAttrId).collect(Collectors.toList()),
76+
unfilteredClinicalDataForPatients.stream().map(ClinicalData::getAttrId).collect(Collectors.toList()),
77+
Collections.emptyList() // TODO ignoring conflictingPatientAttributeIds for now
78+
);
79+
80+
List<Binnable> unfilteredClinicalData = Stream.of(
81+
unfilteredClinicalDataForSamples,
82+
unfilteredClinicalDataForPatients
83+
// unfilteredClinicalDataForConflictingPatientAttributes /// TODO ignoring conflictingPatientAttributeIds for now
84+
).flatMap(Collection::stream).collect(Collectors.toList());
85+
86+
List<Binnable> filteredClinicalData = Stream.of(
87+
filteredClinicalDataForSamples,
88+
filteredClinicalDataForPatients
89+
// filteredClinicalDataForConflictingPatientAttributes // TODO ignoring conflictingPatientAttributeIds for now
90+
).flatMap(Collection::stream).collect(Collectors.toList());
91+
92+
Map<String, List<Binnable>> unfilteredClinicalDataByAttributeId =
93+
unfilteredClinicalData.stream().collect(Collectors.groupingBy(Binnable::getAttrId));
94+
95+
Map<String, List<Binnable>> filteredClinicalDataByAttributeId =
96+
filteredClinicalData.stream().collect(Collectors.groupingBy(Binnable::getAttrId));
97+
98+
List<ClinicalDataBin> clinicalDataBins = Collections.emptyList();
99+
100+
if (dataBinMethod == DataBinMethod.STATIC) {
101+
if (!unfilteredSamples.isEmpty() && !unfilteredClinicalData.isEmpty()) {
102+
clinicalDataBins = NewClinicalDataBinUtil.calculateStaticDataBins(
103+
dataBinner,
104+
attributes,
105+
attributeDatatypeMap,
106+
unfilteredClinicalDataByAttributeId,
107+
filteredClinicalDataByAttributeId,
108+
unfilteredUniqueSampleKeys,
109+
unfilteredUniquePatientKeys,
110+
filteredUniqueSampleKeys,
111+
filteredUniquePatientKeys
112+
);
113+
}
114+
}
115+
else { // dataBinMethod == DataBinMethod.DYNAMIC
116+
// TODO we should consider removing dynamic binning support
117+
// we never use dynamic binning in the frontend because number of bins and the bin ranges can change
118+
// each time there is a new filter which makes the frontend implementation complicated
119+
if (!filteredClinicalData.isEmpty()) {
120+
clinicalDataBins = NewClinicalDataBinUtil.calculateDynamicDataBins(
121+
dataBinner,
122+
attributes,
123+
attributeDatatypeMap,
124+
filteredClinicalDataByAttributeId,
125+
filteredUniqueSampleKeys,
126+
filteredUniquePatientKeys
127+
);
128+
}
129+
}
130+
131+
return clinicalDataBins;
132+
}
133+
}

src/main/java/org/cbioportal/web/columnar/StudyViewColumnStoreController.java

+4-5
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
import org.cbioportal.web.parameter.ClinicalDataFilter;
1616
import org.cbioportal.web.parameter.DataBinMethod;
1717
import org.cbioportal.web.parameter.StudyViewFilter;
18-
import org.cbioportal.web.util.ClinicalDataBinUtil;
1918
import org.springframework.beans.factory.annotation.Autowired;
2019
import org.springframework.http.HttpStatus;
2120
import org.springframework.http.MediaType;
@@ -43,13 +42,13 @@ public class StudyViewColumnStoreController {
4342

4443
private final StudyViewColumnarService studyViewColumnarService;
4544
private final StudyViewService studyViewService;
46-
private final ClinicalDataBinUtil clinicalDataBinUtil;
45+
private final ClinicalDataBinner clinicalDataBinner;
4746

4847
@Autowired
49-
public StudyViewColumnStoreController(StudyViewColumnarService studyViewColumnarService, StudyViewService studyViewService, ClinicalDataBinUtil clinicalDataBinUtil) {
48+
public StudyViewColumnStoreController(StudyViewColumnarService studyViewColumnarService, StudyViewService studyViewService, ClinicalDataBinner clinicalDataBinner) {
5049
this.studyViewColumnarService = studyViewColumnarService;
5150
this.studyViewService = studyViewService;
52-
this.clinicalDataBinUtil = clinicalDataBinUtil;
51+
this.clinicalDataBinner = clinicalDataBinner;
5352
}
5453

5554
@PreAuthorize("hasPermission(#involvedCancerStudies, 'Collection<CancerStudyId>', T(org.cbioportal.utils.security.AccessLevel).READ)")
@@ -112,7 +111,7 @@ public ResponseEntity<List<ClinicalDataBin>> fetchClinicalDataBinCounts(
112111
@RequestAttribute(required = false, value = "involvedCancerStudies") Collection<String> involvedCancerStudies,
113112
@RequestAttribute(required = false, value = "interceptedClinicalDataBinCountFilter") ClinicalDataBinCountFilter interceptedClinicalDataBinCountFilter
114113
) {
115-
List<ClinicalDataBin> clinicalDataBins = clinicalDataBinUtil.fetchClinicalDataBinCounts(
114+
List<ClinicalDataBin> clinicalDataBins = clinicalDataBinner.fetchClinicalDataBinCounts(
116115
dataBinMethod,
117116
interceptedClinicalDataBinCountFilter,
118117
true
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
package org.cbioportal.web.columnar.util;
2+
3+
import org.cbioportal.model.Binnable;
4+
import org.cbioportal.model.ClinicalDataBin;
5+
import org.cbioportal.model.DataBin;
6+
import org.cbioportal.web.parameter.ClinicalDataBinCountFilter;
7+
import org.cbioportal.web.parameter.ClinicalDataBinFilter;
8+
import org.cbioportal.web.parameter.ClinicalDataType;
9+
import org.cbioportal.web.parameter.StudyViewFilter;
10+
import org.cbioportal.web.util.DataBinner;
11+
12+
import java.util.ArrayList;
13+
import java.util.HashMap;
14+
import java.util.List;
15+
import java.util.Map;
16+
17+
import static java.util.Collections.emptyList;
18+
import static java.util.stream.Collectors.toList;
19+
20+
public class NewClinicalDataBinUtil {
21+
public static StudyViewFilter removeSelfFromFilter(ClinicalDataBinCountFilter dataBinCountFilter) {
22+
List<ClinicalDataBinFilter> attributes = dataBinCountFilter.getAttributes();
23+
StudyViewFilter studyViewFilter = dataBinCountFilter.getStudyViewFilter();
24+
25+
if (attributes.size() == 1) {
26+
NewStudyViewFilterUtil.removeSelfFromFilter(attributes.get(0).getAttributeId(), studyViewFilter);
27+
}
28+
29+
return studyViewFilter;
30+
}
31+
32+
public static ClinicalDataBin dataBinToClinicalDataBin(ClinicalDataBinFilter attribute, DataBin dataBin) {
33+
ClinicalDataBin clinicalDataBin = new ClinicalDataBin();
34+
clinicalDataBin.setAttributeId(attribute.getAttributeId());
35+
clinicalDataBin.setCount(dataBin.getCount());
36+
if (dataBin.getEnd() != null) {
37+
clinicalDataBin.setEnd(dataBin.getEnd());
38+
}
39+
if (dataBin.getSpecialValue() != null) {
40+
clinicalDataBin.setSpecialValue(dataBin.getSpecialValue());
41+
}
42+
if (dataBin.getStart() != null) {
43+
clinicalDataBin.setStart(dataBin.getStart());
44+
}
45+
return clinicalDataBin;
46+
}
47+
48+
public static Map<String, ClinicalDataType> toAttributeDatatypeMap(
49+
List<String> sampleAttributeIds,
50+
List<String> patientAttributeIds,
51+
List<String> conflictingPatientAttributeIds
52+
) {
53+
Map<String, ClinicalDataType> attributeDatatypeMap = new HashMap<>();
54+
55+
sampleAttributeIds.forEach(attribute -> {
56+
attributeDatatypeMap.put(attribute, ClinicalDataType.SAMPLE);
57+
});
58+
patientAttributeIds.forEach(attribute -> {
59+
attributeDatatypeMap.put(attribute, ClinicalDataType.PATIENT);
60+
});
61+
conflictingPatientAttributeIds.forEach(attribute -> {
62+
attributeDatatypeMap.put(attribute, ClinicalDataType.SAMPLE);
63+
});
64+
65+
return attributeDatatypeMap;
66+
}
67+
68+
public static List<ClinicalDataBin> calculateStaticDataBins(
69+
DataBinner dataBinner,
70+
List<ClinicalDataBinFilter> attributes,
71+
Map<String, ClinicalDataType> attributeDatatypeMap,
72+
Map<String, List<Binnable>> unfilteredClinicalDataByAttributeId,
73+
Map<String, List<Binnable>> filteredClinicalDataByAttributeId,
74+
List<String> unfilteredUniqueSampleKeys,
75+
List<String> unfilteredUniquePatientKeys,
76+
List<String> filteredUniqueSampleKeys,
77+
List<String> filteredUniquePatientKeys
78+
) {
79+
List<ClinicalDataBin> clinicalDataBins = new ArrayList<>();
80+
81+
for (ClinicalDataBinFilter attribute : attributes) {
82+
if (attributeDatatypeMap.containsKey(attribute.getAttributeId())) {
83+
ClinicalDataType clinicalDataType = attributeDatatypeMap.get(attribute.getAttributeId());
84+
List<String> filteredIds = clinicalDataType == ClinicalDataType.PATIENT ? filteredUniquePatientKeys
85+
: filteredUniqueSampleKeys;
86+
List<String> unfilteredIds = clinicalDataType == ClinicalDataType.PATIENT
87+
? unfilteredUniquePatientKeys
88+
: unfilteredUniqueSampleKeys;
89+
90+
List<ClinicalDataBin> dataBins = dataBinner
91+
.calculateClinicalDataBins(attribute, clinicalDataType,
92+
filteredClinicalDataByAttributeId.getOrDefault(attribute.getAttributeId(),
93+
emptyList()),
94+
unfilteredClinicalDataByAttributeId.getOrDefault(attribute.getAttributeId(),
95+
emptyList()),
96+
filteredIds, unfilteredIds)
97+
.stream()
98+
.map(dataBin -> dataBinToClinicalDataBin(attribute, dataBin))
99+
.collect(toList());
100+
101+
clinicalDataBins.addAll(dataBins);
102+
}
103+
}
104+
105+
return clinicalDataBins;
106+
}
107+
108+
public static List<ClinicalDataBin> calculateDynamicDataBins(
109+
DataBinner dataBinner,
110+
List<ClinicalDataBinFilter> attributes,
111+
Map<String, ClinicalDataType> attributeDatatypeMap,
112+
Map<String, List<Binnable>> filteredClinicalDataByAttributeId,
113+
List<String> filteredUniqueSampleKeys,
114+
List<String> filteredUniquePatientKeys
115+
) {
116+
List<ClinicalDataBin> clinicalDataBins = new ArrayList<>();
117+
118+
for (ClinicalDataBinFilter attribute : attributes) {
119+
120+
// if there is clinical data for requested attribute
121+
if (attributeDatatypeMap.containsKey(attribute.getAttributeId())) {
122+
ClinicalDataType clinicalDataType = attributeDatatypeMap.get(attribute.getAttributeId());
123+
List<String> filteredIds = clinicalDataType == ClinicalDataType.PATIENT
124+
? filteredUniquePatientKeys
125+
: filteredUniqueSampleKeys;
126+
127+
List<ClinicalDataBin> dataBins = dataBinner
128+
.calculateDataBins(attribute, clinicalDataType,
129+
filteredClinicalDataByAttributeId.getOrDefault(attribute.getAttributeId(),
130+
emptyList()),
131+
filteredIds)
132+
.stream()
133+
.map(dataBin -> dataBinToClinicalDataBin(attribute, dataBin))
134+
.collect(toList());
135+
clinicalDataBins.addAll(dataBins);
136+
}
137+
}
138+
139+
return clinicalDataBins;
140+
}
141+
}

0 commit comments

Comments
 (0)