|
| 1 | +package org.cbioportal.web.columnar; |
| 2 | + |
| 3 | +import org.cbioportal.model.*; |
| 4 | +import org.cbioportal.service.StudyViewColumnarService; |
| 5 | +import org.cbioportal.web.columnar.util.NewClinicalDataBinUtil; |
| 6 | +import org.cbioportal.web.parameter.*; |
| 7 | +import org.cbioportal.web.util.DataBinner; |
| 8 | +import org.springframework.beans.factory.annotation.Autowired; |
| 9 | +import org.springframework.stereotype.Component; |
| 10 | + |
| 11 | +import java.util.*; |
| 12 | +import java.util.stream.Collectors; |
| 13 | +import java.util.stream.Stream; |
| 14 | + |
| 15 | +@Component |
| 16 | +public class ClinicalDataBinner { |
| 17 | + private final StudyViewColumnarService studyViewColumnarService; |
| 18 | + private final DataBinner dataBinner; |
| 19 | + |
| 20 | + @Autowired |
| 21 | + public ClinicalDataBinner( |
| 22 | + StudyViewColumnarService studyViewColumnarService, |
| 23 | + DataBinner dataBinner |
| 24 | + ) { |
| 25 | + this.studyViewColumnarService = studyViewColumnarService; |
| 26 | + this.dataBinner = dataBinner; |
| 27 | + } |
| 28 | + |
| 29 | + public List<ClinicalDataBin> fetchClinicalDataBinCounts( |
| 30 | + DataBinMethod dataBinMethod, |
| 31 | + ClinicalDataBinCountFilter dataBinCountFilter, |
| 32 | + boolean shouldRemoveSelfFromFilter |
| 33 | + ) { |
| 34 | + List<ClinicalDataBinFilter> attributes = dataBinCountFilter.getAttributes(); |
| 35 | + StudyViewFilter studyViewFilter = dataBinCountFilter.getStudyViewFilter(); |
| 36 | + |
| 37 | + if (shouldRemoveSelfFromFilter) { |
| 38 | + studyViewFilter = NewClinicalDataBinUtil.removeSelfFromFilter(dataBinCountFilter); |
| 39 | + } |
| 40 | + |
| 41 | + List<String> attributeIds = attributes.stream().map(ClinicalDataBinFilter::getAttributeId).collect(Collectors.toList()); |
| 42 | + |
| 43 | + // a new StudyView filter to partially filter by study and sample ids only |
| 44 | + StudyViewFilter partialFilter = new StudyViewFilter(); |
| 45 | + partialFilter.setStudyIds(studyViewFilter.getStudyIds()); |
| 46 | + partialFilter.setSampleIdentifiers(studyViewFilter.getSampleIdentifiers()); |
| 47 | + |
| 48 | + // filter only by study id and sample identifiers, ignore rest |
| 49 | + // we need this additional partial filter because we always need to know the bins generated for the initial state |
| 50 | + // which allows us to keep the number of bins and bin ranges consistent even if there are additional data filters. |
| 51 | + // we only want to update the counts for each bin, we don't want to regenerate the bins for the filtered data. |
| 52 | + // NOTE: partial filter is only needed when dataBinMethod == DataBinMethod.STATIC but that's always the case |
| 53 | + // for the frontend implementation. we can't really use dataBinMethod == DataBinMethod.DYNAMIC because of the |
| 54 | + // complication it brings to the frontend visualization and filtering |
| 55 | + List<Sample> unfilteredSamples = studyViewColumnarService.getFilteredSamples(partialFilter); |
| 56 | + List<Sample> filteredSamples = studyViewColumnarService.getFilteredSamples(studyViewFilter); |
| 57 | + |
| 58 | + // TODO make sure unique sample and patient keys don't need to be distinct |
| 59 | + List<String> unfilteredUniqueSampleKeys = unfilteredSamples.stream().map(Sample::getUniqueSampleKey).collect(Collectors.toList()); |
| 60 | + List<String> filteredUniqueSampleKeys = filteredSamples.stream().map(Sample::getUniqueSampleKey).collect(Collectors.toList()); |
| 61 | + List<String> unfilteredUniquePatientKeys = unfilteredSamples.stream().map(Sample::getUniquePatientKey).collect(Collectors.toList()); |
| 62 | + List<String> filteredUniquePatientKeys = filteredSamples.stream().map(Sample::getUniquePatientKey).collect(Collectors.toList()); |
| 63 | + |
| 64 | + // TODO make sure we don't need a distinction between sample vs patient attribute ids here |
| 65 | + // ideally we shouldn't because we have patient clinical data separated from sample clinical data in clickhouse |
| 66 | + |
| 67 | + // we need the clinical data for the partial filter in order to generate the bins for initial state |
| 68 | + // we use the filtered data to calculate the counts for each bin, we do not regenerate bins for the filtered data |
| 69 | + List<ClinicalData> unfilteredClinicalDataForSamples = studyViewColumnarService.getSampleClinicalData(partialFilter, attributeIds); |
| 70 | + List<ClinicalData> filteredClinicalDataForSamples = studyViewColumnarService.getSampleClinicalData(studyViewFilter, attributeIds); |
| 71 | + List<ClinicalData> unfilteredClinicalDataForPatients = studyViewColumnarService.getPatientClinicalData(partialFilter, attributeIds); |
| 72 | + List<ClinicalData> filteredClinicalDataForPatients = studyViewColumnarService.getPatientClinicalData(studyViewFilter, attributeIds); |
| 73 | + |
| 74 | + Map<String, ClinicalDataType> attributeDatatypeMap = NewClinicalDataBinUtil.toAttributeDatatypeMap( |
| 75 | + unfilteredClinicalDataForSamples.stream().map(ClinicalData::getAttrId).collect(Collectors.toList()), |
| 76 | + unfilteredClinicalDataForPatients.stream().map(ClinicalData::getAttrId).collect(Collectors.toList()), |
| 77 | + Collections.emptyList() // TODO ignoring conflictingPatientAttributeIds for now |
| 78 | + ); |
| 79 | + |
| 80 | + List<Binnable> unfilteredClinicalData = Stream.of( |
| 81 | + unfilteredClinicalDataForSamples, |
| 82 | + unfilteredClinicalDataForPatients |
| 83 | + // unfilteredClinicalDataForConflictingPatientAttributes /// TODO ignoring conflictingPatientAttributeIds for now |
| 84 | + ).flatMap(Collection::stream).collect(Collectors.toList()); |
| 85 | + |
| 86 | + List<Binnable> filteredClinicalData = Stream.of( |
| 87 | + filteredClinicalDataForSamples, |
| 88 | + filteredClinicalDataForPatients |
| 89 | + // filteredClinicalDataForConflictingPatientAttributes // TODO ignoring conflictingPatientAttributeIds for now |
| 90 | + ).flatMap(Collection::stream).collect(Collectors.toList()); |
| 91 | + |
| 92 | + Map<String, List<Binnable>> unfilteredClinicalDataByAttributeId = |
| 93 | + unfilteredClinicalData.stream().collect(Collectors.groupingBy(Binnable::getAttrId)); |
| 94 | + |
| 95 | + Map<String, List<Binnable>> filteredClinicalDataByAttributeId = |
| 96 | + filteredClinicalData.stream().collect(Collectors.groupingBy(Binnable::getAttrId)); |
| 97 | + |
| 98 | + List<ClinicalDataBin> clinicalDataBins = Collections.emptyList(); |
| 99 | + |
| 100 | + if (dataBinMethod == DataBinMethod.STATIC) { |
| 101 | + if (!unfilteredSamples.isEmpty() && !unfilteredClinicalData.isEmpty()) { |
| 102 | + clinicalDataBins = NewClinicalDataBinUtil.calculateStaticDataBins( |
| 103 | + dataBinner, |
| 104 | + attributes, |
| 105 | + attributeDatatypeMap, |
| 106 | + unfilteredClinicalDataByAttributeId, |
| 107 | + filteredClinicalDataByAttributeId, |
| 108 | + unfilteredUniqueSampleKeys, |
| 109 | + unfilteredUniquePatientKeys, |
| 110 | + filteredUniqueSampleKeys, |
| 111 | + filteredUniquePatientKeys |
| 112 | + ); |
| 113 | + } |
| 114 | + } |
| 115 | + else { // dataBinMethod == DataBinMethod.DYNAMIC |
| 116 | + // TODO we should consider removing dynamic binning support |
| 117 | + // we never use dynamic binning in the frontend because number of bins and the bin ranges can change |
| 118 | + // each time there is a new filter which makes the frontend implementation complicated |
| 119 | + if (!filteredClinicalData.isEmpty()) { |
| 120 | + clinicalDataBins = NewClinicalDataBinUtil.calculateDynamicDataBins( |
| 121 | + dataBinner, |
| 122 | + attributes, |
| 123 | + attributeDatatypeMap, |
| 124 | + filteredClinicalDataByAttributeId, |
| 125 | + filteredUniqueSampleKeys, |
| 126 | + filteredUniquePatientKeys |
| 127 | + ); |
| 128 | + } |
| 129 | + } |
| 130 | + |
| 131 | + return clinicalDataBins; |
| 132 | + } |
| 133 | +} |
0 commit comments