Skip to content

Commit 15b8700

Browse files
committed
Subsample within experiments
1 parent 9ae12c2 commit 15b8700

File tree

1 file changed

+27
-8
lines changed

1 file changed

+27
-8
lines changed

reconstruction/create_subsamples.py

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -64,16 +64,24 @@ def getArgs():
6464

6565
# Construct maps from dataset coordinates
6666
organismCellMap = {} # organism -> list of indices of cells belonging to the organism
67+
experimentMap = {} # experiment -> list of organisms in experiment
6768
treatmentMap = {} # treatment -> list of organisms in treatment group
6869
cellTypeMap = {} # cell type -> list of indices of cells of that type
6970
for index, cellData in enumerate(data.cell):
7071
organism = cellData.organism.item()
7172
if organism not in organismCellMap:
7273
organismCellMap[organism] = []
74+
75+
experiment = cellData.experiment.item()
76+
if experiment not in experimentMap:
77+
experimentMap[experiment] = []
78+
experimentMap[experiment].append(organism)
79+
7380
treatment = cellData.treatment.item()
7481
if treatment not in treatmentMap:
7582
treatmentMap[treatment] = []
7683
treatmentMap[treatment].append(organism)
84+
7785
cellType = cellData.cellType.item()
7886
if cellType not in cellTypeMap:
7987
cellTypeMap[cellType] = []
@@ -102,10 +110,12 @@ def subsampleCells(organisms):
102110
organismProportion = args.proportion if args.organism else args.organismProportion
103111
for i in range(organismNSubsamples):
104112
organismSubsample = []
105-
for treatmentOrganisms in treatmentMap.values():
106-
treatmentSubsampleSize = round(organismProportion * len(treatmentOrganisms))
107-
treatmentSubsample = random.sample(treatmentOrganisms, treatmentSubsampleSize)
108-
organismSubsample.extend(treatmentSubsample)
113+
for experimentOrganisms in experimentMap.values():
114+
for treatmentOrganisms in treatmentMap.values():
115+
matchingOrganisms = set(experimentOrganisms).intersection(set(treatmentOrganisms))
116+
subsetSubsampleSize = round(organismProportion * len(matchingOrganisms))
117+
subsetSubsample = random.sample(matchingOrganisms, subsetSubsampleSize)
118+
organismSubsample.extend(subsetSubsample)
109119
if args.organism:
110120
subsample = list(itertools.chain(*[organismCellMap[organism] for organism in organismSubsample]))
111121
subsample.sort()
@@ -120,19 +130,28 @@ def subsampleCells(organisms):
120130
else:
121131
data = dataset.get_table("originalData")
122132

133+
# Construct maps from dataset coordinates
134+
experimentMap = {} # experiment -> list of indices of organisms in experiment
123135
treatmentMap = {} # treatment -> list of indices of organisms in treatment group
124136
for index, organism in enumerate(data.organism):
137+
experiment = organism.experiment.item()
138+
if experiment not in experimentMap:
139+
experimentMap[experiment] = []
140+
experimentMap[experiment].append(organism)
141+
125142
treatment = organism.treatment.item()
126143
if treatment not in treatmentMap:
127144
treatmentMap[treatment] = []
128145
treatmentMap[treatment].append(index)
129146

130147
for i in range(args.nsubsamples):
131148
subsample = []
132-
for treatmentOrganismIndices in treatmentMap.values():
133-
treatmentSubsampleSize = round(args.proportion * len(treatmentOrganismIndices))
134-
treatmentSubsample = random.sample(treatmentOrganismIndices, treatmentSubsampleSize)
135-
subsample.extend(treatmentSubsample)
149+
for experimentOrganismIndices in experimentMap.values():
150+
for treatmentOrganismIndices in treatmentMap.values():
151+
matchingOrganismIndices = set(experimentOrganismIndices).intersection(set(treatmentOrganismIndices))
152+
subsetSubsampleSize = round(args.proportion * len(subsetOrganismIndices))
153+
subsetSubsample = random.sample(matchingOrganismIndices, subsetSubsampleSize)
154+
subsample.extend(subsetSubsample)
136155
subsample.sort()
137156
subsamples.append(subsample)
138157

0 commit comments

Comments
 (0)