David Zelený

# 2015-04-24 Cluster analysis

```## 1. Agglomerative cluster analysis using Vltava dataset

library (vegan)
vltava.spe.t <- log1p (vltava.spe)

# first, calculate distance matrix (Bray-Curtis dissimilarities between all pairs of samples)
dis <- vegdist (vltava.spe.t, method = 'bray')

# then, apply the hclust function with different clustering algorithms
cluster.single <- hclust (dis, method = 'single')
cluster.complete <- hclust (dis, method = 'complete')
cluster.average <- hclust (dis, method = 'average')

# and draw the results together into one diagram
windows ()  # this will open native R graphical window (you may not need this)
par (mfrow = c(1,3))  # this separates ploting window into one row with three columns
plot (cluster.single)
plot (cluster.complete)
plot (cluster.average)

# The following draws only one result (average-linkage clustering algorithm) with rectangles around groups of samples
windows ()
plot (cluster.average)
rect.hclust (tree = cluster.average, k = 4)
rect.hclust (tree = cluster.average, k = 6, border = 'navy')

# function cutree defines which sample belongs to which group:
result.cluster <- cutree (cluster.average, k = 4)

# and finally, to project the result into ordination diagram (NMDS) using the same distance (Bray-Curtis):
NMDS <- metaMDS (comm = dis)
ordiplot (NMDS, type = 'n')
points (NMDS, col = result.cluster, pch = result.cluster)

## 2.TWINSPAN
# This function is available in R only in form of experimental library, which needs to be installed from GitHub
install.packages ('devtools')
library (devtools)
install_github ('zdealveindy/twinspanR')
library (twinspanR)

# let's use the example data Danube
data (danube)

# calculate standard TWINSPAN with only two levels of cutting:
tw <- twinspan (danube\$spe, levels = 2)
print (tw, what = 'table')  # this prints the resulting two-way sorted table
result.twinspan <- cut (tw) # and this generates the vector with assignment of samples into groups

# Finally, we may visualized this result using DCA, and compare it with original subjective classification made by Ellenberg

# first calculate DCA
DCA <- decorana (danube\$spe)

# than draw two ordination diagrams, each with different meaning of colors
windows ()
par (mfrow = c(1,2))
ordiplot (DCA, display = 'sites', type = 'n', main = 'Result of TWINSPAN')
points (DCA, col = result.twinspan, pch = result.twinspan)  # this shows results of TWINSPAN

ordiplot (DCA, display = 'sites', type = 'n', main = 'Ellenberg (1956)')
points (DCA, col = as.numeric (danube\$env\$veg.type),
pch = as.numeric (danube\$env\$veg.type))   # this shows the original assignment of plots to vegetation types A to B

## 3. Example - clustering European countries according to their characteristics (see Example 2 in Hierarchical agglomerative)

countries <- read.table ("clipboard", sep = ',', row.names = 1,