### Introduction

### Theory, Examples & Exercises

en:history:2015-04-24-anadatr

- anadatr24042015.R
## 1. Agglomerative cluster analysis using Vltava dataset library (vegan) vltava.spe <- read.delim ('https://raw.githubusercontent.com/zdealveindy/anadat-r/master/data/vltava-spe.txt', row.names = 1) vltava.spe.t <- log1p (vltava.spe) # first, calculate distance matrix (Bray-Curtis dissimilarities between all pairs of samples) dis <- vegdist (vltava.spe.t, method = 'bray') # then, apply the hclust function with different clustering algorithms cluster.single <- hclust (dis, method = 'single') cluster.complete <- hclust (dis, method = 'complete') cluster.average <- hclust (dis, method = 'average') # and draw the results together into one diagram windows () # this will open native R graphical window (you may not need this) par (mfrow = c(1,3)) # this separates ploting window into one row with three columns plot (cluster.single) plot (cluster.complete) plot (cluster.average) # The following draws only one result (average-linkage clustering algorithm) with rectangles around groups of samples windows () plot (cluster.average) rect.hclust (tree = cluster.average, k = 4) rect.hclust (tree = cluster.average, k = 6, border = 'navy') # function cutree defines which sample belongs to which group: result.cluster <- cutree (cluster.average, k = 4) # and finally, to project the result into ordination diagram (NMDS) using the same distance (Bray-Curtis): NMDS <- metaMDS (comm = dis) ordiplot (NMDS, type = 'n') points (NMDS, col = result.cluster, pch = result.cluster) ## 2.TWINSPAN # This function is available in R only in form of experimental library, which needs to be installed from GitHub install.packages ('devtools') library (devtools) install_github ('zdealveindy/twinspanR') library (twinspanR) # let's use the example data Danube data (danube) # calculate standard TWINSPAN with only two levels of cutting: tw <- twinspan (danube$spe, levels = 2) print (tw, what = 'table') # this prints the resulting two-way sorted table result.twinspan <- cut (tw) # and this generates the vector with assignment of samples into groups # Finally, we may visualized this result using DCA, and compare it with original subjective classification made by Ellenberg # first calculate DCA DCA <- decorana (danube$spe) # than draw two ordination diagrams, each with different meaning of colors windows () par (mfrow = c(1,2)) ordiplot (DCA, display = 'sites', type = 'n', main = 'Result of TWINSPAN') points (DCA, col = result.twinspan, pch = result.twinspan) # this shows results of TWINSPAN ordiplot (DCA, display = 'sites', type = 'n', main = 'Ellenberg (1956)') points (DCA, col = as.numeric (danube$env$veg.type), pch = as.numeric (danube$env$veg.type)) # this shows the original assignment of plots to vegetation types A to B ## 3. Example - clustering European countries according to their characteristics (see Example 2 in Hierarchical agglomerative) countries <- read.table ("clipboard", sep = ',', row.names = 1, header = T) countries.s <- scale (countries) distance <- dist (countries.s) clust.countries <- hclust (distance, method = 'ward') plot (clust.countries) rect.hclust (clust.countries, k = 4)

en/history/2015-04-24-anadatr.txt · Last modified: 2018/03/30 23:04 (external edit)