@@ -64,16 +64,24 @@ def getArgs():
64
64
65
65
# Construct maps from dataset coordinates
66
66
organismCellMap = {} # organism -> list of indices of cells belonging to the organism
67
+ experimentMap = {} # experiment -> list of organisms in experiment
67
68
treatmentMap = {} # treatment -> list of organisms in treatment group
68
69
cellTypeMap = {} # cell type -> list of indices of cells of that type
69
70
for index , cellData in enumerate (data .cell ):
70
71
organism = cellData .organism .item ()
71
72
if organism not in organismCellMap :
72
73
organismCellMap [organism ] = []
74
+
75
+ experiment = cellData .experiment .item ()
76
+ if experiment not in experimentMap :
77
+ experimentMap [experiment ] = []
78
+ experimentMap [experiment ].append (organism )
79
+
73
80
treatment = cellData .treatment .item ()
74
81
if treatment not in treatmentMap :
75
82
treatmentMap [treatment ] = []
76
83
treatmentMap [treatment ].append (organism )
84
+
77
85
cellType = cellData .cellType .item ()
78
86
if cellType not in cellTypeMap :
79
87
cellTypeMap [cellType ] = []
@@ -102,10 +110,12 @@ def subsampleCells(organisms):
102
110
organismProportion = args .proportion if args .organism else args .organismProportion
103
111
for i in range (organismNSubsamples ):
104
112
organismSubsample = []
105
- for treatmentOrganisms in treatmentMap .values ():
106
- treatmentSubsampleSize = round (organismProportion * len (treatmentOrganisms ))
107
- treatmentSubsample = random .sample (treatmentOrganisms , treatmentSubsampleSize )
108
- organismSubsample .extend (treatmentSubsample )
113
+ for experimentOrganisms in experimentMap .values ():
114
+ for treatmentOrganisms in treatmentMap .values ():
115
+ matchingOrganisms = set (experimentOrganisms ).intersection (set (treatmentOrganisms ))
116
+ subsetSubsampleSize = round (organismProportion * len (matchingOrganisms ))
117
+ subsetSubsample = random .sample (matchingOrganisms , subsetSubsampleSize )
118
+ organismSubsample .extend (subsetSubsample )
109
119
if args .organism :
110
120
subsample = list (itertools .chain (* [organismCellMap [organism ] for organism in organismSubsample ]))
111
121
subsample .sort ()
@@ -120,19 +130,28 @@ def subsampleCells(organisms):
120
130
else :
121
131
data = dataset .get_table ("originalData" )
122
132
133
+ # Construct maps from dataset coordinates
134
+ experimentMap = {} # experiment -> list of indices of organisms in experiment
123
135
treatmentMap = {} # treatment -> list of indices of organisms in treatment group
124
136
for index , organism in enumerate (data .organism ):
137
+ experiment = organism .experiment .item ()
138
+ if experiment not in experimentMap :
139
+ experimentMap [experiment ] = []
140
+ experimentMap [experiment ].append (organism )
141
+
125
142
treatment = organism .treatment .item ()
126
143
if treatment not in treatmentMap :
127
144
treatmentMap [treatment ] = []
128
145
treatmentMap [treatment ].append (index )
129
146
130
147
for i in range (args .nsubsamples ):
131
148
subsample = []
132
- for treatmentOrganismIndices in treatmentMap .values ():
133
- treatmentSubsampleSize = round (args .proportion * len (treatmentOrganismIndices ))
134
- treatmentSubsample = random .sample (treatmentOrganismIndices , treatmentSubsampleSize )
135
- subsample .extend (treatmentSubsample )
149
+ for experimentOrganismIndices in experimentMap .values ():
150
+ for treatmentOrganismIndices in treatmentMap .values ():
151
+ matchingOrganismIndices = set (experimentOrganismIndices ).intersection (set (treatmentOrganismIndices ))
152
+ subsetSubsampleSize = round (args .proportion * len (subsetOrganismIndices ))
153
+ subsetSubsample = random .sample (matchingOrganismIndices , subsetSubsampleSize )
154
+ subsample .extend (subsetSubsample )
136
155
subsample .sort ()
137
156
subsamples .append (subsample )
138
157
0 commit comments