 Analysis of community ecology data in R

David Zelený

Others

Author: David Zelený 2015-04-03 Revisiting homework, constrained ordination, Monte-Carlo permutation test and forward selection

#### Recalling the homework no. 2 - Exercise 2 from Environmental variables section

vltava.spe <- read.delim ('https://raw.githubusercontent.com/zdealveindy/anadat-r/master/data/vltava-spe.txt', row.names = 1)
library (vegan)

vltava.spe.hell <- decostand (log1p (vltava.spe), method = 'hell')

# this is the same as the following
#vltava.spe.log <- log1p (vltava.spe)
#vltava.spe.hell <- decostand (vltava.spe.log, method = 'hell')

pca <- rda (vltava.spe.hell)

# to see the structure of pca object
str (pca)

# eigenvalues of individual axes
eigen <- pca\$CA\$eig
barplot (eigen)

# variation explained by individual axes
variation <- pca\$CA\$eig/pca\$tot.chi*100
barplot (variation)

# ordination diagram with symbols and colors separated according to groups
ordiplot (pca, type = 'n', display = 'sites')
points (pca, col = vltava.env\$GROUP, pch = vltava.env\$GROUP)

# generated matrix of random variables
# this will set up always the same random number
set.seed (1234)
#hist (runif (1000))

# this generates matrix with 9 times 97 random values (9 variables x 97 samples)
rand.env <- matrix (rnorm (9*97), ncol = 9)
colnames (rand.env) <- paste ('rand', 1:9)    # plus change names of the variables

# fit the random variables to ordination axes
ef <- envfit (pca, rand.env)
ef

# draw ordination diagram with points with different colors + symbols
ordiplot (pca, type = 'n', display = 'sites')
points (pca, col = vltava.env\$GROUP, pch = vltava.env\$GROUP)
plot (ef)

# adjust the p values using Bonferroni's correction for multiple testing
#str (ef)
p.values <- ef\$vectors\$pvals
p.values.adj <- p.adjust (p.values, method = 'bonf')

######
# Direct ordination with wetland data
vasc <- read.delim ('https://raw.githubusercontent.com/zdealveindy/anadat-r/master/data/vasc_plants.txt', row.names = 1)
chem <- read.delim ('https://raw.githubusercontent.com/zdealveindy/anadat-r/master/data/chemistry.txt', row.names = 1)

# what kind of data we have
range (vasc) # values between 0 and 9, ordinal scale, not necessary to transform
decorana (vasc)  # the first DCA axis is rather short (~ 3 SD), but still better to use method for heterogeneous data
# we decided to use tbRDA with Hellinger transformation
vasc.hell <- decostand (vasc, 'hell')

# RDA with two explanatory variables, using formula (or matrix) interface
rda.vasc <- rda (vasc.hell ~ Ca + Mg, data = chem)  # formula interface
# rda (vasc.hell, chem[,c('Ca', 'Mg')])  # matrix interface

# draw ordination diagram
ordiplot (rda.vasc)

# select eigenvalues and draw barplot of eigenvalues and explained variation by individual axes
str (rda.vasc)
var.constr <- rda.vasc\$CCA\$eig/rda.vasc\$tot.chi
var.unconstr <- rda.vasc\$CA\$eig/rda.vasc\$tot.chi
#length (var.constr)
#length (var.unconstr)
barplot (var.constr)
barplot (var.unconstr)

# this is barplot of variation of both constrained and unconstrained ordination axes
barplot (c(var.constr, var.unconstr))

# testing the significance of direct ordination (full model with two expl. var.)
anova (rda.vasc)

#class (rda.vasc)
#anova (rda.vasc)

# other Monte Carlo permutation tests: for each axis, by terms and partial variation explained by each variable:
anova (rda.vasc, by = 'axis')
anova (rda.vasc, by = 'term')
anova (rda.vasc, by = 'margin')

# testing the significance of the global model
rda.vasc.all <- rda (vasc.hell ~ ., data = chem)
# global model MC permutation test
anova (rda.vasc.all)
anova (rda.vasc.all, by = 'axis', parallel = 8)  # this takes long, parallel process of calculation

### Forward selection using the same data
rda.vasc.0 <- rda (vasc.hell ~ 1, data = chem)  # define the null model (no explanatory variables, but formula interface)
rda.vasc.all <- rda (vasc.hell ~ ., data = chem)   # define global model (all variables, formula interface)

# use function ordistep to proceed forward selection
rda.vasc.fs <- ordistep (object = rda.vasc.0, scope = formula (rda.vasc.all), direction = 'forward')

ordiplot (rda.vasc.fs)
anova (rda.vasc.fs)
anova (rda.vasc.fs, by = 'axis')

# forward selection on the same species data, but with randomly generated explanatory variables
rand.env <- as.data.frame (matrix (rnorm (30*70), ncol = 30))

rda.vasc.rand.0 <- rda (vasc.hell ~ 1, data = rand.env)
rda.vasc.rand.all <- rda (vasc.hell ~ ., data = rand.env)

# from 30 randomly generated explanatory variables in average ~ 2 of them are selected as significant:
rda.vasc.rand.fs <- ordistep (object = rda.vasc.rand.0, scope = formula (rda.vasc.rand.all), direction = 'forward')

# run the global test
anova (rda.vasc.rand.all)  # but the global test is not significant, so actually the forward selection shoulnd not have been done 