clustering_3.R

#!/usr/bin/env r

# This is Free Software - You can use and distribute it under
# the terms of the GNU General Public License, version 3 or later
# (c) Massimo Cavallaro (m.cavallaro@warwick.ac.uk)

lim<-function(f, f1) {
  Min<-min(exprs(f)[,f1])
  Max<-max(exprs(f)[,f1])
  return(c(Min, Max))
}

DNA.threshold<-500

library("flowCore")
library("flowClust")

## decomment to use with littler:
#file.name<-argv
#file.name<-'./HBB_0742016/Experiment_pA- 80_007.fcs'

writeLines('########### 3')
writeLines(file.name)
writeLines('########### 3')

ff<-read.FCS(file.name)
file.base.name<-strsplit(base.name, '.fcs')

## 1st-step: remove reads out of scale
varNames<-colnames(ff)
rectGate<-rectangleGate(filterId = "Fluorescence Region", "FSC-A" = c(0, 180000))
ff1<-Subset(ff, rectGate)

## 2nd step: remove reads corresponding to dead cells and duplets triplets
res2<-flowClust(ff1, varNames=varNames[c(2, 3, 5, 6)], K = 3, B = 1000)
level<-0.4
zc<-0.9
ruleOutliers(res2)<-list(level = level)
ruleOutliers(res2)<-list(z.cutoff = zc)

## 2.1: split the flowFrame and select the chunk
## with intermediate size (FSC-A) and granularity (SSC-A)
ff1.splitted<-split(ff1, res2, level = level, z.cutoff = zc, population = list(sc1=1, sc2=2, sc3=3))

f1<-summary(ff1)[c(4,22)]
f1.sc1<-summary(ff1.splitted$sc1)[c(4,22)]
f1.sc2<-summary(ff1.splitted$sc2)[c(4,22)]
f1.sc3<-summary(ff1.splitted$sc3)[c(4,22)]

dists<-c(dist(rbind(f1, f1.sc1)), dist(rbind(f1, f1.sc2)), dist(rbind(f1, f1.sc3)))

switch(which.min(dists),
  {ff2<-ff1.splitted$sc1;
   ff2_<-ff1.splitted$sc2;
   ff2__<-ff1.splitted$sc3;
    },
  {ff2<-ff1.splitted$sc2;
   ff2_<-ff1.splitted$sc1;
   ff2__<-ff1.splitted$sc3;
    },
  {ff2<-ff1.splitted$sc3;
   ff2_<-ff1.splitted$sc1;
   ff2__<-ff1.splitted$sc2;
    })


## 2.2: plot the clusters
sub.dir<-'G1G2__'
dir.create(file.path(dir.name, sub.dir), showWarnings=FALSE)
png(filename=file.path(dir.name, sub.dir, paste(file.base.name, "_k_3.png", sep = '')),
  width = 480*3, height = 680*2)
par(mfrow = c(4,3), mar = c(4,4,3,2) + 0.1)

plot(res2, data = ff1, subset = c(1, 3), level = level, z.cutoff = zc,
     ylim = c(0,9000), xlim = c(0,240000))
plot(res2, data = ff1, subset = c(2, 4), level = level, z.cutoff = zc,
     ylim = c(50000,140000), xlim = c(60000,160000))

plot(ff2, c("FSC-W", "SSC-W"), smooth = FALSE,
     ylim = c(50000,140000), xlim = c(60000,160000), main = 'selected')

xlim<-lim(ff2_, 'R640-670/14-A')
ylim<-lim(ff2_, 'UV355-450/50-A')
plot(ff2_, c('R640-670/14-A', 'UV355-450/50-A'), smooth = FALSE, main = 'discarted', ylim = ylim, xlim = xlim)
xlim<-lim(ff2__, 'R640-670/14-A')
ylim<_lim(ff2__, 'UV355-450/50-A')
plot(ff2__, c('R640-670/14-A', 'UV355-450/50-A'), smooth = FALSE, main = 'discarted', ylim = ylim, xlim = xlim)
xlim<-lim(ff2, 'R640-670/14-A')
ylim<-lim(ff2, 'UV355-450/50-A')
plot(ff2, c('R640-670/14-A', 'UV355-450/50-A'), smooth = FALSE, main = 'chosen', ylim = ylim, xlim = xlim)
abline(h=DNA.threshold)

xlim<-lim(ff2_, 'FSC-A')
ylim<-lim(ff2_, 'R640-670/14-A')
plot(ff2_, c('FSC-A', 'R640-670/14-A'), smooth = FALSE, main = 'discarted', ylim = ylim, xlim = xlim)
xlim<-lim(ff2__, 'FSC-A')
ylim<-lim(ff2__, 'R640-670/14-A')
plot(ff2__, c('FSC-A', 'R640-670/14-A'), smooth = FALSE, main = 'discarted', ylim = ylim, xlim = xlim)
xlim<-lim(ff2, 'FSC-A')
ylim<-lim(ff2, 'R640-670/14-A')
plot(ff2, c('FSC-A', 'R640-670/14-A'), smooth = FALSE, main = 'chosen', ylim = ylim, xlim = xlim)

xlim<-lim(ff2_, 'FSC-A')
ylim<-lim(ff2_, 'UV355-450/50-A')
plot(ff2_, c('FSC-A', 'UV355-450/50-A'), smooth = FALSE, main = 'discarted', ylim = ylim, xlim = xlim)
xlim<-lim(ff2__, 'FSC-A')
ylim<-lim(ff2__, 'UV355-450/50-A')
plot(ff2__, c('FSC-A', 'UV355-450/50-A'), smooth = FALSE, main = 'discarted', ylim = ylim, xlim = xlim)
xlim<-lim(ff2, 'FSC-A')
ylim<-lim(ff2, 'UV355-450/50-A')
plot(ff2, c('FSC-A', 'UV355-450/50-A'), smooth = FALSE, main = 'chosen', ylim = ylim, xlim = xlim)
abline(h=DNA.threshold)

dev.off()


## 3rd step: remove reads with no DNA UV355-450/50-A
rectGate<-rectangleGate(filterId="DNA.threshold",
  "UV355-450/50-A" = c(DNA.threshold, max(exprs(ff2)[,'UV355-450/50-A'])))
ff3<-Subset(ff2, rectGate)


## 4th step: export
write.csv(ff3.data.frame,
  file=file.path(dir.name, sub.dir, paste(file.base.name, "_k_3.csv", sep = ""))
  )

write.table(ff3.data.frame$"R640-670/14-A",
  file=file.path(dir.name, sub.dir, paste(file.base.name, "_k_3_mRNA.csv", sep = "")),
  row.names = FALSE, col.names = FALSE)

writeLines(length(ff3.data.frame$"R640-670/14-A"))