SimonGoring/match_neotoma_taxa.R

## match_neotoma_taxa.R
#  Coding to obtain a long list of fully resolved taxonomies for taxa in the Netoma Paleoecological Database.
#  This work is to support (generally) export to other databases, allowing researchers to match at multiple
#  levels, and across resources, providing a clear and unambiguous reference for taxonomies within Neotoma.
#  Coded by: Simon Goring
#  Neotoma Paleoecological Database: http://neotomadb.org

library(neotoma)
library(taxize)

neotoma_taxa <- neotoma::get_table("taxa")

neo_runs <- seq(1, nrow(neotoma_taxa), by = 10)

get_class <- function(x) {

  # This just clears up some of the "uncertainty" fields in the taxon names.
  # This doesn't catch things like "sensu stricto" and others.
  taxa <- gsub("(\\?|\\-type|cf\\.\\s|aff\\.|\\sundiff\\.)", "", x, perl=TRUE)

  taxize::classification(taxa, db="itis", rows = 1)

}

all_taxa_list <- list()

for (i in (i-1):nrow(neotoma_taxa)) {

  cat(paste0(neotoma_taxa$TaxaGroupID[i], ": ", neotoma_taxa$TaxonName[i],
             ' - ', round(i/nrow(neotoma_taxa) * 100, 2), '% complete . . . '))

  all_taxa_list[[i]] <- list(neotoma_taxa[i,],
                             suppressMessages(get_class(neotoma_taxa$TaxonName[i])))

  if (!is.na(all_taxa_list[[i]][[2]])) {
    cat('Success!\n')
  } else {
    cat('Ugh. :(\n')
  }

  # Output throughout the run, it's slower, but then we don't run into issues later.
  saveRDS(all_taxa_list, file = "../TaxonAlignment/all_taxa.RDS")
}
	# Coding to obtain a long list of fully resolved taxonomies for taxa in the Netoma Paleoecological Database.
	# This work is to support (generally) export to other databases, allowing researchers to match at multiple
	# levels, and across resources, providing a clear and unambiguous reference for taxonomies within Neotoma.
	# Coded by: Simon Goring
	# Neotoma Paleoecological Database: http://neotomadb.org

	library(neotoma)
	library(taxize)

	neotoma_taxa <- neotoma::get_table("taxa")

	neo_runs <- seq(1, nrow(neotoma_taxa), by = 10)

	get_class <- function(x) {

	# This just clears up some of the "uncertainty" fields in the taxon names.
	# This doesn't catch things like "sensu stricto" and others.
	taxa <- gsub("(\\?\|\\-type\|cf\\.\\s\|aff\\.\|\\sundiff\\.)", "", x, perl=TRUE)

	taxize::classification(taxa, db="itis", rows = 1)

	}

	all_taxa_list <- list()

	for (i in (i-1):nrow(neotoma_taxa)) {

	cat(paste0(neotoma_taxa$TaxaGroupID[i], ": ", neotoma_taxa$TaxonName[i],
	' - ', round(i/nrow(neotoma_taxa) * 100, 2), '% complete . . . '))

	all_taxa_list[[i]] <- list(neotoma_taxa[i,],
	suppressMessages(get_class(neotoma_taxa$TaxonName[i])))

	if (!is.na(all_taxa_list[[i]][[2]])) {
	cat('Success!\n')
	} else {
	cat('Ugh. :(\n')
	}

	# Output throughout the run, it's slower, but then we don't run into issues later.
	saveRDS(all_taxa_list, file = "../TaxonAlignment/all_taxa.RDS")
	}