Created
June 14, 2016 09:45
-
-
Save AugustT/aeddbe54b5f70efadf7087562460e94f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
install.packages('zoon') | |
library(zoon) | |
vignette('Building_a_module') | |
############# | |
## Simples ## | |
############# | |
# I want to share my data via a ZOON module | |
# I uploaded it to figshare to make it accessible | |
# Here is the URL | |
URL <- "https://ndownloader.figshare.com/files/2519918" | |
# Here is the data | |
out <- read.csv(URL) | |
# Let's have a look | |
View(out) | |
# We don't need the date column | |
out <- out[, c("longitude", "latitude")] | |
# Our data is missing some of the required columns | |
out$value <- 1 # all our data are presences (0s would mean absence) | |
out$type <- 'presence' | |
out$fold <- 1 # we don't add any folds | |
# Now let's have a look | |
View(out) | |
# We can now write this as a function | |
Tom_Lorem_ipsum_UK <- function(){ | |
# Get data | |
URL <- "https://ndownloader.figshare.com/files/2519918" | |
out <- read.csv(URL) | |
out <- out[, c("longitude", "latitude")] | |
# Add in the columns we dont have | |
out$value <- 1 # all our data are presences | |
out$type <- 'presence' | |
out$fold <- 1 # we wont add any folds | |
return(out) | |
} | |
# We can test it in a workflow to make sure it works as | |
# we would expect | |
library(zoon) | |
workl1 <- workflow(occurrence = Tom_Lorem_ipsum_UK, | |
covariate = UKBioclim, | |
process = OneHundredBackground, | |
model = LogisticRegression, | |
output = InteractiveMap) | |
# At the moment we have a function, to convert this into | |
# a module that we can share with others we use the | |
# BuildModule function | |
BuildModule(object = Tom_Lorem_ipsum_UK, | |
type = 'occurrence', | |
title = 'A dataset of Lorem ipsum occurrences', | |
description = paste0('The module retrieves a dataset of', | |
'Lorem ipsum records from figshare. This dataset contains', | |
'precence only data and was collected between 1990 and', | |
'2000 by members of to Lorem ipsum appreciation society'), | |
details = 'This dataset is fake, Lorem ipsum does not exist', | |
author = 'A.B. Ceidi', | |
email = '[email protected]', | |
dataType = 'presence-only') | |
rm(list = 'Tom_Lorem_ipsum_UK') | |
# This is how you would use a module that a colleague has sent you | |
LoadModule(module = 'Tom_Lorem_ipsum_UK.R') | |
work2 <- workflow(occurrence = Tom_Lorem_ipsum_UK, | |
covariate = UKAir, | |
process = Background(n = 50), | |
model = LogisticRegression, | |
output = PrintMap) | |
## SLIDE ## | |
###################### | |
## More complicated ## | |
###################### | |
# I want to build a module that creates a random raster | |
# of a user defined extent and resolution | |
# Note parameters and defaults | |
NaiveRandomRaster <- function(extent = c(-10, 10, 45, 65), res = 0.5, seed = NULL){ | |
if(length(extent) != 4 | !inherits(extent, 'numeric')) stop("extent must be a numeric vector of length 4") | |
if(extent[1] >= extent[2]) stop('in extent - min x must be smaller than max x') | |
if(extent[3] >= extent[4]) stop('in extent - min y must be smaller than max y') | |
nrow <- (extent[2] - extent[1])/res | |
ncol <- (extent[4] - extent[3])/res | |
if(!is.null(seed)) set.seed(seed) | |
rasMat <- matrix(data = runif(n = nrow * ncol, | |
min = 0, | |
max = 100), | |
nrow = nrow, | |
ncol = ncol) | |
ras <- raster::raster(x = rasMat, | |
xmn = extent[1], | |
xmx = extent[2], | |
ymn = extent[3], | |
ymx = extent[4], | |
crs = "+proj=longlat +datum=WGS84") | |
return(ras) | |
} | |
# This time when we build the module we need to define the | |
# parameters | |
BuildModule(NaiveRandomRaster, | |
type = 'covariate', | |
title = 'Naive Random Raster', | |
description = 'Creates a random raster layer within the extent given', | |
author = 'ZOON Developers', | |
email = '[email protected]', | |
paras = list(extent = 'A numeric vector of length 4 giving the coordinates of the rectangular region within which to create the raaster. order: xmin, xmax, ymin, ymax. By default the extent of the UK', | |
res = 'Numeric giving the resolution of the raster to produce. 1 is one cell per degree, 0.5 means one cell per half degree.', | |
seed = 'Used with set.seed to set a seed. Default NULL, no seed is used')) | |
rm(list = 'NaiveRandomRaster') | |
LoadModule('NaiveRandomRaster.R') | |
# You can see the metadata above is used in the help file | |
ModuleHelp('NaiveRandomRaster') | |
# Here it is in action | |
work3 <- workflow(occurrence = UKAnophelesPlumbeus, | |
covariate = list(NaiveRandomRaster(extent = c(-10, 2, 49, 58), | |
res = 1), | |
NaiveRandomRaster(extent = c(-10, 2, 49, 58), | |
res = 0.5), | |
NaiveRandomRaster(extent = c(-10, 2, 49, 58), | |
res = 0.1)), | |
process = Background(n = 50), | |
model = LogisticRegression, | |
output = PrintMap) | |
par(mfrow = c(1,3)) | |
for(i in Output(work3)) plot(i) | |
## SLIDE ## | |
################## | |
## Model module ## | |
################## | |
# Model modules havea default argument .df which is a dataframe | |
# This looks similar to the occurrence dataset but with the | |
# covariate data added as extra columns | |
# Here is an example | |
.df <- Process(work3)[[1]]$df | |
View(.df) | |
# Lets write our function | |
GamGam <- function(.df){ # We need to add in the default arguments | |
# Specify the packages we need using the function | |
# GetPackage | |
zoon::GetPackage("gam") | |
# Create a data.frame of covariate data | |
covs <- .df[colnames(.df) %in% attr(.df, 'covCols')] | |
# do a bit of copy-pasting to define smooth terms for each covariate | |
f <- sprintf('.df$value ~ s(%s)', | |
paste(colnames(covs), | |
collapse = ') + s(')) | |
# Run our gam model | |
m <- gam::gam(formula = formula(f), | |
data = covs, | |
family = binomial) | |
# Create a ZoonModel object to return. | |
# this includes our model, predict method | |
# and the packages we need. | |
ZoonModel(model = m, | |
code = { | |
# create empty vector of predictions | |
p <- rep(NA, nrow(newdata)) | |
# omit NAs in new data | |
newdata_clean <- na.omit(newdata) | |
# get their indices | |
na_idx <- attr(newdata_clean, 'na.action') | |
# if there are no NAs then the index should | |
# include all rows, else it should name the | |
# rows to ignore | |
if (is.null(na_idx)){ | |
idx <- 1:nrow(newdata) | |
} else { | |
idx <- -na_idx | |
} | |
# Use the predict function in gam to predict | |
# our new values | |
p[idx] <- gam::predict.gam(model, | |
newdata_clean, | |
type = 'response') | |
return (p) | |
}, | |
packages = 'gam') | |
} | |
############## | |
## Cite Me! ## | |
############## | |
ZoonCitation('NaiveRandomRaster') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment