Created
October 1, 2016 18:06
-
-
Save SimonGoring/24fb1228204f768f3f0020f37060db18 to your computer and use it in GitHub Desktop.
Matching Neotoma taxa
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Coding to obtain a long list of fully resolved taxonomies for taxa in the Netoma Paleoecological Database. | |
# This work is to support (generally) export to other databases, allowing researchers to match at multiple | |
# levels, and across resources, providing a clear and unambiguous reference for taxonomies within Neotoma. | |
# Coded by: Simon Goring | |
# Neotoma Paleoecological Database: http://neotomadb.org | |
library(neotoma) | |
library(taxize) | |
neotoma_taxa <- neotoma::get_table("taxa") | |
neo_runs <- seq(1, nrow(neotoma_taxa), by = 10) | |
get_class <- function(x) { | |
# This just clears up some of the "uncertainty" fields in the taxon names. | |
# This doesn't catch things like "sensu stricto" and others. | |
taxa <- gsub("(\\?|\\-type|cf\\.\\s|aff\\.|\\sundiff\\.)", "", x, perl=TRUE) | |
taxize::classification(taxa, db="itis", rows = 1) | |
} | |
all_taxa_list <- list() | |
for (i in (i-1):nrow(neotoma_taxa)) { | |
cat(paste0(neotoma_taxa$TaxaGroupID[i], ": ", neotoma_taxa$TaxonName[i], | |
' - ', round(i/nrow(neotoma_taxa) * 100, 2), '% complete . . . ')) | |
all_taxa_list[[i]] <- list(neotoma_taxa[i,], | |
suppressMessages(get_class(neotoma_taxa$TaxonName[i]))) | |
if (!is.na(all_taxa_list[[i]][[2]])) { | |
cat('Success!\n') | |
} else { | |
cat('Ugh. :(\n') | |
} | |
# Output throughout the run, it's slower, but then we don't run into issues later. | |
saveRDS(all_taxa_list, file = "../TaxonAlignment/all_taxa.RDS") | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment