User Tools

Site Tools


en:history:2015-04-03-anadatr

2015-04-03 Revisiting homework, constrained ordination, Monte-Carlo permutation test and forward selection

anadatr03042015.R
#### Recalling the homework no. 2 - Exercise 2 from Environmental variables section
 
vltava.spe <- read.delim ('https://raw.githubusercontent.com/zdealveindy/anadat-r/master/data/vltava-spe.txt', row.names = 1)
vltava.env <- read.delim ('https://raw.githubusercontent.com/zdealveindy/anadat-r/master/data/vltava-env.txt')
library (vegan)
 
vltava.spe.hell <- decostand (log1p (vltava.spe), method = 'hell')
 
# this is the same as the following
#vltava.spe.log <- log1p (vltava.spe)
#vltava.spe.hell <- decostand (vltava.spe.log, method = 'hell')
 
pca <- rda (vltava.spe.hell)
 
# to see the structure of pca object
str (pca)
 
# eigenvalues of individual axes
eigen <- pca$CA$eig
barplot (eigen)
 
# variation explained by individual axes
variation <- pca$CA$eig/pca$tot.chi*100
barplot (variation)
 
# ordination diagram with symbols and colors separated according to groups
ordiplot (pca, type = 'n', display = 'sites')
points (pca, col = vltava.env$GROUP, pch = vltava.env$GROUP)
 
# generated matrix of random variables
# this will set up always the same random number
set.seed (1234)
#hist (runif (1000))
 
# this generates matrix with 9 times 97 random values (9 variables x 97 samples)
rand.env <- matrix (rnorm (9*97), ncol = 9)
colnames (rand.env) <- paste ('rand', 1:9)    # plus change names of the variables
 
# fit the random variables to ordination axes
ef <- envfit (pca, rand.env)
ef
 
# draw ordination diagram with points with different colors + symbols
ordiplot (pca, type = 'n', display = 'sites')
points (pca, col = vltava.env$GROUP, pch = vltava.env$GROUP)
plot (ef)
 
# adjust the p values using Bonferroni's correction for multiple testing
#str (ef)
p.values <- ef$vectors$pvals
p.values.adj <- p.adjust (p.values, method = 'bonf')
 
ef.adj <- ef
ef.adj$vectors$pvals <- p.values.adj
 
 
######
# Direct ordination with wetland data
vasc <- read.delim ('https://raw.githubusercontent.com/zdealveindy/anadat-r/master/data/vasc_plants.txt', row.names = 1)
chem <- read.delim ('https://raw.githubusercontent.com/zdealveindy/anadat-r/master/data/chemistry.txt', row.names = 1)
 
# what kind of data we have
range (vasc) # values between 0 and 9, ordinal scale, not necessary to transform
decorana (vasc)  # the first DCA axis is rather short (~ 3 SD), but still better to use method for heterogeneous data                                                                
# we decided to use tbRDA with Hellinger transformation
vasc.hell <- decostand (vasc, 'hell')
 
# RDA with two explanatory variables, using formula (or matrix) interface
rda.vasc <- rda (vasc.hell ~ Ca + Mg, data = chem)  # formula interface
# rda (vasc.hell, chem[,c('Ca', 'Mg')])  # matrix interface
 
# draw ordination diagram
ordiplot (rda.vasc)
 
# select eigenvalues and draw barplot of eigenvalues and explained variation by individual axes
str (rda.vasc)
var.constr <- rda.vasc$CCA$eig/rda.vasc$tot.chi
var.unconstr <- rda.vasc$CA$eig/rda.vasc$tot.chi
#length (var.constr)
#length (var.unconstr)
barplot (var.constr)
barplot (var.unconstr)
 
# this is barplot of variation of both constrained and unconstrained ordination axes
barplot (c(var.constr, var.unconstr))
 
# testing the significance of direct ordination (full model with two expl. var.)
anova (rda.vasc)
 
#class (rda.vasc)
#anova (rda.vasc)
 
# other Monte Carlo permutation tests: for each axis, by terms and partial variation explained by each variable:
anova (rda.vasc, by = 'axis')
anova (rda.vasc, by = 'term')
anova (rda.vasc, by = 'margin')
 
# testing the significance of the global model
rda.vasc.all <- rda (vasc.hell ~ ., data = chem)
# global model MC permutation test
anova (rda.vasc.all)
anova (rda.vasc.all, by = 'axis', parallel = 8)  # this takes long, parallel process of calculation
 
### Forward selection using the same data
rda.vasc.0 <- rda (vasc.hell ~ 1, data = chem)  # define the null model (no explanatory variables, but formula interface)
rda.vasc.all <- rda (vasc.hell ~ ., data = chem)   # define global model (all variables, formula interface)
 
# use function ordistep to proceed forward selection
rda.vasc.fs <- ordistep (object = rda.vasc.0, scope = formula (rda.vasc.all), direction = 'forward') 
 
ordiplot (rda.vasc.fs)
anova (rda.vasc.fs)
anova (rda.vasc.fs, by = 'axis')
 
# forward selection on the same species data, but with randomly generated explanatory variables
rand.env <- as.data.frame (matrix (rnorm (30*70), ncol = 30))
 
rda.vasc.rand.0 <- rda (vasc.hell ~ 1, data = rand.env)
rda.vasc.rand.all <- rda (vasc.hell ~ ., data = rand.env)
 
# from 30 randomly generated explanatory variables in average ~ 2 of them are selected as significant:
rda.vasc.rand.fs <- ordistep (object = rda.vasc.rand.0, scope = formula (rda.vasc.rand.all), direction = 'forward')
 
# run the global test
anova (rda.vasc.rand.all)  # but the global test is not significant, so actually the forward selection shoulnd not have been done
en/history/2015-04-03-anadatr.txt · Last modified: 2018/03/30 23:04 (external edit)