Introduction
Theory, Examples & Exercises
# Histogram of distribution - symmetrical, right-skewed, left-skewed png ('types_of_distribution.png', width = 8, height = 8/3, units = 'in', res = 300) par (mfrow = c(1,3)) normal <- vegan:::decostand (rnorm (1000), 'range')*100 hist (normal, main = list ('symmetrical', cex = 2), xlab = list ('Variable X', cex = 1.5), ylab = list ('Frequency', cex = 1.5), col = 'yellow') right <- vegan:::decostand (normal^3, 'range')*100 hist (right, main = list ('right-skewed', cex = 2), xlab = list ('Variable X', cex = 1.5), ylab = list ('Frequency', cex = 1.5), col = 'red') left <- vegan:::decostand (log1p (normal), 'range')*100 hist (left, main = list ('left-skewed', cex = 2), xlab = list ('Variable X', cex = 1.5), ylab = list ('Frequency', cex = 1.5), col = 'lightblue') dev.off ()
# raw vs log-transformed population data #Data from here: https://commons.wikimedia.org/wiki/File:Population_vs_area.svg # Population data from Wikipedia pop <- c(33681000, 1134000, 61811, 5110000, 6600, 3350400, 13388910, 52000, 79221000, 107000, 1185000, 40135000, 9863000, 19522000, 15757000, 23837000, 25721000, 506000, 2053355, 14027000, 3767000, 2985000, 6316000, 45828172, 3476608, 16517532, 6349000, 2719000, 2845000, 43739000, 163000, 57000, 1475000, 1500, 33000, 4312067, 23580000, 89300, 166649000, 3170000, 179000, 546200, 5743000, 4599000, 21075000, 1165040000, 8629900, 2067000, 109000, 39802000, 48333000, 6952000, 178000, 71517100, 28150000, 849000, 87000, 3291000, 523000, 172000, 30800, 2671000, 65073482, 9998000, 5413548, 9133000, 29165000, 6320000, 10000, 84000, 4821137, 15263000, 20000, 8935000, 111000, 513000, 11204000, 624000, 3683000, 6619000, 3230100, 10090000, 46143700, 791000, 104000, 5336330, 6420000, 230330000, 4422000, 306664000, 9276509, 88069000, 23000, 13010000, 800, 7602100, 1288000, 21496700, 18498000, 10631800, 48697000, 1400, 801600, 35593, 1409000, 28200000, 12534000, 22894000, 32710000, 127580000, 15290000, 191293104, 3000, 48841, 10069000, 3454000, 762000, 4579000, 491700, 67000, 86000, 31000, 21906000, 20000, 154729000, 13995000, 162221000, 400000, 21809733, 70495782, 34895000, 6163000, 10000, 50, 10474600, 62000, 16922000, 3982000, 10741000, 63389730, 10033000, 322100, 7008900, 5696000, 4382100, 5515287, 92226600, 3572700, 31491578, 2171000, 1611000, 98000, 7719100, 104000, 80000, 11206000, 1340341, 3361000, 676000, 4224000, 27488000, 10327800, 864000, 65000, 88000, 67000, 44952732, 8303000, 33000, 256000, 19625000, 60090400, 697000, 42272000, 29331000, 412600, 309000, 520000, 56000, 15000, 28359313, 198000, 7411500, 3761646, 12935000, 8356707, 6732000, 12523000, 82062200, 240000, 15571506, 38130300, 5073000, 5482000, 30747000, 5900, 23906000, 1339000, 319326, 2257300, 10029900, 9671900, 7466000, 109610000, 76762112, 2048900, 4839400, 9850000, 676000, 61612300, 11262500, 20238000, 4432000, 1950000, 1331115200, 141812991) ## Area data from wikipedia. area <- c(9984670, 14874, 78, 488100, 122, 65300, 181035, 261, 1104300, 180, 17364, 2780400, 1098581, 475442, 274222, 238533, 2149690, 4033, 20273, 108889, 51197, 17818, 89342, 505992, 111369, 41543, 406752, 10991, 309500, 945087, 964, 2166086, 267668, 260, 1, 270467, 527968, 116, 881912, 28748, 2831, 29, 120340, 83600, 322463, 3287263, 86600, 30355, 389, 580367, 99678, 143100, 549, 783562, 652090, 18274, 464, 1025520, 28896, 539, 61, 1564100, 632760, 26338, 49035, 637657, 1285216, 236800, 21, 455, 323802, 118484, 236, 112622, 702, 266000, 109886, 13812, 342000, 56785, 29743, 48310, 603500, 741, 747, 338145, 1759540, 1904569, 622984, 9629091, 450295, 331689, 151, 1240192, 0, 110879, 2040, 238391, 1246700, 92090, 1221037, 12, 9251, 160, 11586, 329847, 196722, 801590, 241038, 377915, 1267000, 8514877, 12173, 1393, 245857, 75517, 214969, 51100, 2586, 199, 468, 6, 185180, 459, 923768, 283561, 143998, 5765, 7692024, 1648195, 2381741, 21041, 26, 5, 78867, 181, 756102, 8870, 30528, 513120, 27750, 22966, 1108, 71740, 69700, 43094, 300000, 33851, 446550, 824292, 36125, 726, 41284, 344, 572, 1284000, 45228, 176215, 28051, 10400, 447400, 163610, 23200, 54, 442, 751, 1138914, 27834, 948, 430, 587041, 301318, 38394, 2505813, 147181, 316, 298, 163820, 264, 91, 912050, 800, 22072, 6020, 752618, 83871, 462840, 390757, 357022, 12189, 2724900, 312685, 117600, 199951, 438317, 102, 120538, 5130, 103000, 64589, 93028, 208000, 112492, 1964375, 1002000, 25713, 699, 88361, 2235, 242900, 131957, 65610, 56594, 582000, 9639688, 17098242) png ('raw-vs-log-population-area.png', width = 8, height = 4, units = 'in', res = 300, pointsize = 10) par (mfrow = c(1,2)) plot (pop ~ area, main = list ('Raw data', cex = 2), xlab = list (expression (Area~(km^2)), cex = 1.5), ylab = list ('Population', cex = 1.5), pch = 16) plot (pop ~ area, main = list ('Log-transformed data', cex = 2), xlab = list (expression (Area~(km^2)), cex = 1.5), ylab = list ('Population', cex = 1.5), log = 'xy', pch = 16) dev.off ()
# digitalize pig # library (pixmap) # p <- read.pnm ('c:\\Users\\Zeleny\\Dropbox\\uceni\\NumEcol\\figures\\pig for transformation.ppm') # windows () # plot.new () # plot.window (xlim = c(0, 100), ylim = c(0, 67)) # axis (1) # axis (2) # addlogo (p, c(0,100), c(0, 67)) # pig <- locator () # pig.df <- as.data.frame (pig) # plot (pig.df) # plot (pig.df, type = 'l', lwd = 10, log = 'xy', ann = T) # #save (pig.df, file = 'c:\\Users\\Zeleny\\Dropbox\\uceni\\NumEcol\\figures\\pix.df.r') # load (file = 'c:\\Users\\Zeleny\\Dropbox\\uceni\\NumEcol\\figures\\pix.df.r') # pig.df <- pig.df/20 # write.table (pig.df, 'normal.pig.txt', sep = '\t', row.names = F) pig.df <- read.delim ('https://raw.githubusercontent.com/zdealveindy/anadat-r/master/data/normal.pig.txt') exp.pics <- as.matrix (expand.grid (a = c('log (x)', 'x^(1/3)', 'sqrt (x)', 'x', 'x^2', 'x^3', 'e^x'), b = c('log (y)', 'y^(1/3)', 'sqrt (y)', 'y', 'y^2', 'y^3', 'e^y'))) pig.df$'log (x)' <- log (pig.df$x) pig.df$'x^(1/3)' <- pig.df$x^(1/3) pig.df$'sqrt (x)' <- sqrt (pig.df$x) pig.df$'x^2' <- pig.df$x^2 pig.df$'x^3' <- pig.df$x^3 pig.df$'e^x' <- exp (pig.df$x) pig.df$'log (y)' <- log (pig.df$y) pig.df$'y^(1/3)' <- pig.df$y^(1/3) pig.df$'sqrt (y)' <- sqrt (pig.df$y) pig.df$'y^2' <- pig.df$y^2 pig.df$'e^y' <- exp (pig.df$y) pig.df$'y^3' <- pig.df$y^3 right_skewed_pig <- pig.df[, c('e^x', 'y')] names (right_skewed_pig) <- c('x', 'y') write.table (right_skewed_pig, 'right-skewed.pig.txt', row.names = F, sep = '\t') mat <- matrix (0, ncol = 8, nrow = 8) mat [2:8, 2:8] <- matrix (1:49, ncol = 7, byrow = T) mat [1, 2:8] <- 50:56 mat [2:8, 1] <- 57:63 png ('trans.pig.png', width = 8, height = 5.44, res = 300, units = 'in', pointsize = 4) par (mar = c(2,2,2,2)) layout (mat, widths = c(2, rep (4, 7)), height = c(2, rep (4, 7) )) #apply (as.matrix (exp.pics), 1, FUN = function (pic) plot (x = pig.df[,pic[1]], y = pig.df[,pic[2]], axes = F, ann = F, type = 'l', lwd = 5)) for (i in seq (1, 49)) { plot (x = pig.df[,exp.pics[i, 1]], y = pig.df[,exp.pics[i, 2]], axes = F, ann = F, type = 'l', lwd = 2) if (i==25) { box () axis (1, cex.axis = 2, tick = F) axis (2, cex.axis = 2, las = 1, tick = F) } } lapply (rev (c('log (x)', 'sqrt (y, 3)', 'sqrt (x)', 'x', 'x^2', 'x^3', 'e^x')), FUN = function (x) {plot.new (); plot.window (xlim = c(0,1), ylim = c(0,1)); text (0.5,0.5,labels = parse (text = x), cex = 4)}) lapply (rev (c('log (y)', 'sqrt (y, 3)', 'sqrt (y)', 'y', 'y^2', 'y^3', 'e^y')), FUN = function (x) {plot.new (); plot.window (xlim = c(0,1), ylim = c(0,1)); text (0.5,0.5,labels = parse (text = x), cex = 4)}) dev.off ()