Last active
January 27, 2016 17:59
-
-
Save ejain/24197316610c70f66a60 to your computer and use it in GitHub Desktop.
Converts Second Genome data files to a uBiome taxonomy file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
dnOTU_15 | AV15-1068 | 16733 | |
---|---|---|---|
dnOTU_16 | AV15-1068 | 20403 | |
dnOTU_17 | AV15-1068 | 32939 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"use strict"; | |
var csv = require("ya-csv"); | |
loadTaxa("stooldb_taxonomy_v1.0.tsv") | |
.then(taxa => processSamples(process.openStdin(), taxa)) | |
.then(toUBiome) | |
.then(printJson); | |
function loadTaxa(path) { | |
return new Promise((resolve, reject) => { | |
let taxa = {}; | |
let reader = csv.createCsvFileReader(path, { columnsFromHeader : true, "separator" : "\t" }); | |
reader.on("data", (row) => { | |
let id = row[""]; | |
let taxon = {}; | |
for (let field in row) { | |
if (field) { | |
let value = row[field].replace(/^.__/, ""); | |
if (value !== "unclassified") { | |
taxon[field.toLowerCase()] = value; | |
} | |
} | |
} | |
taxa[id] = taxon; | |
}); | |
reader.on("end", () => { | |
resolve(taxa); | |
}); | |
}); | |
} | |
function processSamples(stream, taxa) { | |
return new Promise((resolve, reject) => { | |
let samples = new Map(); | |
let reader = csv.createCsvStreamReader(process.openStdin(), { "separator" : "\t" }); | |
reader.on("data", (row) => { | |
let id = row[0]; | |
let count = Number(row[2]); | |
let taxon = taxa[id]; | |
for (let rank in taxon) { | |
let key = rank + ":" + taxon[rank]; | |
samples.set(key, (samples.get(key) || 0) + count); | |
} | |
}); | |
reader.on("end", () => { | |
resolve(samples); | |
}); | |
}); | |
} | |
function toUBiome(samples) { | |
let counts = [] | |
let total = Math.max.apply(null, Array.from(samples.values())); | |
for (let entry of samples) { | |
let key = entry[0].split(":"); | |
let value = entry[1]; | |
counts.push({ | |
"count" : value, | |
"count_norm" : Math.round(1000000 * value / total), | |
"tax_rank" : key[0], | |
"tax_name" : key[1] | |
}); | |
} | |
return Promise.resolve({ "ubiome_bacteriacounts" : counts }); | |
} | |
function printJson(value) { | |
console.log(JSON.stringify(value, null, 4)); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Kingdom | Phylum | Class | Order | Family | Genus | Species | ||
---|---|---|---|---|---|---|---|---|
dnOTU_15 | k__Bacteria | p__Firmicutes | c__Clostridia | o__Clostridiales | f__Ruminococcaceae | g__unclassified | s__unclassified | |
dnOTU_16 | k__Bacteria | p__Firmicutes | c__Erysipelotrichi | o__Erysipelotrichales | f__Erysipelotrichaceae | g__unclassified | s__unclassified | |
dnOTU_17 | k__Bacteria | p__Firmicutes | c__Clostridia | o__Clostridiales | f__Lachnospiraceae | g__unclassified | s__unclassified |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment