Skip to content

Instantly share code, notes, and snippets.

@zbjornson
Forked from mlinderm/output.R
Created October 5, 2012 03:37
Show Gist options
  • Save zbjornson/3837927 to your computer and use it in GitHub Desktop.
Save zbjornson/3837927 to your computer and use it in GitHub Desktop.
Script for up-sampling additional FCS files - v1.1
#!/usr/bin/env Rscript
# Run this script in your SPADE analysis directory. You will need
# to copy some lines from your runSPADE.R file.
# Set the panel files as the vector of all the files, with the "parent" file as the reference_file.
PANELS <- list(
list(
panel_files=c("20071001-u937.002.fcs"),
median_cols=NULL,
reference_files=NULL,
fold_cols=NULL
)
)
# Copy these lines from your runSPADE.R file
CLUSTERING_MARKERS <- c("FL1-A","FL1-H","FL2-H","FL3-H","FL4-H")
TRANSFORMS=flowCore::arcsinhTransform(a=0, b=0.006667)
# You shouldn't need to change anything below here
OUTPUT_DIR <- "output"
LAYOUT_FILE <- "layout.table"
GRAPH_FILE <- "mst.gml"
NODE_SIZE_SCALE_FACTOR <- 1.2
library(spade)
out_dir <- paste(OUTPUT_DIR,.Platform$file, sep="")
cells_file <- paste(out_dir, "clusters.fcs", sep="")
layout_table <- read.table(paste(out_dir,LAYOUT_FILE,sep=""))
graph <- read.graph(paste(out_dir,GRAPH_FILE,sep=""), format="gml")
panels <- PANELS
cluster_cols <- CLUSTERING_MARKERS
pctile_color=c(0.02,0.98)
comp=TRUE
transforms=TRANSFORMS
# Validate structure of panels
if (!is.null(panels)) {
if (!is.list(panels))
stop("Invalid panels argument, see function documentation")
lapply(panels, function(x) {
if (!is.list(x) || !all(c("panel_files", "median_cols") %in% names(x)))
stop("Invalid panel found, see function documentation for proper panel structure")
if (!is.null(x$reference_files) && !all(x$reference_files %in% x$panel_files))
stop("Panel reference files must be a subset of panel files")
});
}
files <- unique(unlist(lapply(PANELS, function(x) { x$panel_files })))
sampled_files <- c()
for (f in files) {
message("Upsampling file: ",f)
f_sampled <- paste(out_dir, f,".density.fcs.cluster.fcs",sep="")
SPADE.addClusterToFCS(f, f_sampled, cells_file, cols=cluster_cols, transforms=transforms,comp=comp)
sampled_files <- c(sampled_files, f_sampled)
}
# Track all attributes to compute global limits
attr_values <- list();
for (p in panels) {
reference_medians <- NULL
if (!is.null(p$reference_files)) {
# Note assuming the ordering of files and sampled_files are identical...
reference_files <- sapply(as.vector(p$reference_files), function(f) { sampled_files[match(f,basename(files))[1]] })
reference_medians <- SPADE.markerMedians(reference_files, igraph:::vcount(graph), cols=p$fold_cols, transforms=transforms, cluster_cols=cluster_cols, comp=comp)
}
for (f in as.vector(p$panel_files)) {
# Note assuming the ordering of files and sampled_files are identical...
f <- sampled_files[match(f, basename(files))[1]]
# Compute the median marker intensities in each node, including the overall cell frequency per node
message("Computing medians for file: ",f)
anno <- SPADE.markerMedians(f, igraph:::vcount(graph), cols=p$median_cols, transforms=transforms, cluster_cols=cluster_cols, comp=comp)
if (!is.null(reference_medians)) { # If a reference file is specified
# Compute the fold change compared to reference medians
message("Computing fold change for file: ",f)
fold_anno <- SPADE.markerMedians(f, igraph:::vcount(graph), cols=p$fold_cols, transforms=transforms, cluster_cols=cluster_cols, comp=comp)
fold <- fold_anno$medians - reference_medians$medians
pratio <- log10(fold_anno$percenttotal / reference_medians$percenttotal);
colnames(pratio) <- c("percenttotalratiolog")
is.na(pratio) <- fold_anno$count == 0 | reference_medians$count == 0
cratio <- (fold_anno$count / reference_medians$count);
colnames(cratio) <- c("countratio")
is.na(cratio) <- reference_medians$count == 0
# Merge the fold-change columns with the count, frequency, and median columns
anno <- c(anno, list(percenttotalratiolog = pratio, countratio = cratio, fold = fold))
}
SPADE.write.graph(SPADE.annotateGraph(graph, layout=layout_table, anno=anno), paste(f,".medians.gml",sep=""), format="gml")
# We save an R native version of the annotations to simpify plotting, and other downstream operations
anno <- SPADE.flattenAnnotations(anno)
for (c in colnames(anno)) { attr_values[[c]] <- c(attr_values[[c]], anno[,c]) }
save(anno, file=paste(f,"anno.Rsave",sep="."))
}
}
# Compute the global limits (cleaning up attribute names to match those in GML files)
attr_ranges <- t(sapply(attr_values, function(x) { quantile(x, probs=c(0.00, pctile_color, 1.00), na.rm=TRUE) }))
rownames(attr_ranges) <- sapply(rownames(attr_ranges), function(x) { gsub("[^A-Za-z0-9_]","",x) })
write.table(attr_ranges, paste(out_dir,"global_boundaries.table",sep=""), col.names=FALSE)
SPADE.plot.trees(graph,out_dir,file_pattern="*fcs*Rsave",layout=as.matrix(layout_table),out_dir=paste(out_dir,"pdf",sep=.Platform$file),size_scale_factor=NODE_SIZE_SCALE_FACTOR)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment