Skip to content

Commit 5268084

Browse files
committed
Use a new subsample of organisms and cells for each nested subsample
1 parent 15b8700 commit 5268084

File tree

1 file changed

+17
-21
lines changed

1 file changed

+17
-21
lines changed

reconstruction/create_subsamples.py

Lines changed: 17 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def getArgs():
1818
requiredArgGroup.add_argument("-n", "--nsubsamples", type=int, required=True, help="Number of subsamples to generate.")
1919
optionalArgGroup.add_argument("-s", "--singlecell", action="store_true", default=False, help="Create subsamples for single-cell data instead of aggregate data. By default, this will subsample cells rather than organisms. To override this behavior, use the -o/--organism option to subsample organisms or the -b/--both-organisms-and-cells option to subsample both.")
2020
optionalArgGroup.add_argument("-o", "--organism", action="store_true", default=False, help="Subsample over organisms, even for single-cell data (this is the default behavior for aggregate data)")
21-
optionalArgGroup.add_argument("-b", "--both-organisms-and-cells", dest="bothOrganismsAndCells", nargs=2, metavar=("ORGANISM_NSUBSAMPLES", "ORGANISM_PROPORTION"), help="Subsample over organisms, then over cells (only applicable to single-cell, -s). The additional arguments are the number of times to sample the organisms and the proportion of organisms in each treatment to use for each subsample. For each sample of organisms, the cells in each organism will be sampled within their cell type NSUBSAMPLES (-n/--nsubsamples) times with proportion PROPORTION (-p/--proportion). This will produce a total of ORGANISM_NSUBSAMPLES * NSUBSAMPLES subsamples. Proportion must satisfy 0 < p < 1.")
21+
optionalArgGroup.add_argument("-b", "--both-organisms-and-cells", dest="bothOrganismsAndCells", nargs=1, metavar="ORGANISM_PROPORTION", help="Subsample over organisms and cells (only applicable to single-cell, -s). The additional argument is the proportion of organisms in each treatment to use for each subsample and must satisfy 0 < p < 1.")
2222
optionalArgGroup.add_argument("-h", "--help", action="help", help="Show this help message and exit")
2323
args = parser.parse_args()
2424

@@ -43,8 +43,7 @@ def getArgs():
4343
raise Exception("Can only use -b/--both-organisms-and-single-cells with single-cell data (-s/--singlecell).")
4444

4545
if bothOrganismsAndCells:
46-
args.organismNSubsamples = int(args.bothOrganismsAndCells[0])
47-
args.organismProportion = float(args.bothOrganismsAndCells[1])
46+
args.organismProportion = float(args.bothOrganismsAndCells[0])
4847
if args.organismProportion <= 0 or args.organismProportion >= 1:
4948
raise Exception("Proportion of organisms out of bounds. Must satisfy 0 < p < 1.")
5049

@@ -89,26 +88,22 @@ def getArgs():
8988
organismCellMap[organism].append(index)
9089

9190
def subsampleCells(organisms):
92-
subsamples = []
93-
for i in range(args.nsubsamples):
94-
subsample = []
95-
for organism in organisms:
96-
organismCellIndices = set(organismCellMap[organism])
97-
for cellType in cellTypeMap:
98-
cellTypeCellIndices = set(cellTypeMap[cellType])
99-
matchingCellIndices = organismCellIndices.intersection(cellTypeCellIndices)
100-
subsetSubsampleSize = round(args.proportion * len(matchingCellIndices))
101-
subsetSubsample = random.sample(matchingCellIndices, subsetSubsampleSize)
102-
subsample.extend(subsetSubsample)
103-
subsample.sort()
104-
subsamples.append(subsample)
105-
return subsamples
91+
subsample = []
92+
for organism in organisms:
93+
organismCellIndices = set(organismCellMap[organism])
94+
for cellType in cellTypeMap:
95+
cellTypeCellIndices = set(cellTypeMap[cellType])
96+
matchingCellIndices = organismCellIndices.intersection(cellTypeCellIndices)
97+
subsetSubsampleSize = round(args.proportion * len(matchingCellIndices))
98+
subsetSubsample = random.sample(matchingCellIndices, subsetSubsampleSize)
99+
subsample.extend(subsetSubsample)
100+
subsample.sort()
101+
return subsample
106102

107103
if args.organism or args.bothOrganismsAndCells:
108104
# Take a sample of the organisms from each treatment
109-
organismNSubsamples = args.nsubsamples if args.organism else args.organismNSubsamples
110105
organismProportion = args.proportion if args.organism else args.organismProportion
111-
for i in range(organismNSubsamples):
106+
for i in range(args.nsubsamples):
112107
organismSubsample = []
113108
for experimentOrganisms in experimentMap.values():
114109
for treatmentOrganisms in treatmentMap.values():
@@ -122,11 +117,12 @@ def subsampleCells(organisms):
122117
subsamples.append(subsample)
123118
elif args.bothOrganismsAndCells:
124119
# Now that we have a sample of the organisms, take a sample of each selected organism's cells within each cell type
125-
subsamples.extend(subsampleCells(organismSubsample))
120+
subsamples.append(subsampleCells(organismSubsample))
126121
else:
127122
raise Exception("Argument state invalid.")
128123
else:
129-
subsamples.extend(subsampleCells(organismCellMap.keys()))
124+
for i in range(args.nsubsamples):
125+
subsamples.append(subsampleCells(organismCellMap.keys()))
130126
else:
131127
data = dataset.get_table("originalData")
132128

0 commit comments

Comments
 (0)