Commit a23ecf8b authored by sonitaR's avatar sonitaR
Browse files

added final codes of Monday work

parent 6776306a
......@@ -236,8 +236,115 @@ env <- read.table("environmental_quant.txt", h = T, sep = "\t")
# Note that this file is different from the one you have worked with
# as wave exposure is quantitative rather than categorical
# turn the matrix into a numerical only dataframe
env1 <- data.frame(env[,-1], row.names=env[,1])
# (2 & 3) Compute the Euclidean distance matrix on standardised environmental data:
env.de<-dist(scale(env1))
# (4) Clustering analysis on the distance matrix
# AVERAGE LINKAGE
env.clust.av <- hclust(env.de, method = "average")
plot(env.clust.av)
# SIMPLE LINKAGE
env.clust.sim <- hclust(env.de, method = "single")
plot(env.clust.sim)
# COMPLETE LINKAGE
env.clust.com <- hclust(env.de, method = "complete")
plot(env.clust.com)
# WARD
env.clust.ward <- hclust(env.de, method = "ward.D2")
plot(env.clust.ward)
# Choosing the best option
# Load necessary package
require(cluster)
require(stats)
# (5) CHOOSING AND PLOTTING THE BEST DENDROGRAM
# Find the dendrogram that retains the closest relationship
# to the ditance matrix through Cophenetic correlation
# Compute the cophenetic correlation for each method
# Simple
simple.coph <- cophenetic(env.clust.sim)
cor(env.de, simple.coph)
# Complete
complete.coph <- cophenetic(env.clust.com)
cor(env.de, complete.coph)
# average
average.coph <- cophenetic(env.clust.av)
cor(env.de, average.coph)
# WARD
ward.coph <- cophenetic(env.clust.ward)
cor(env.de, ward.coph)
# Best choice in this case is also hierarchical clustering with
# average linkage method. take this forward
# Finding the optimal number of interpretable clusters in the best clustering:
# applying the Mantel test
grpdist<-function(X)
{
require(cluster)
gr<-as.data.frame(as.factor(X))
distgr<-daisy(gr,"gower")
distgr
}
kt <-data.frame(k=1:nrow(env1), r=0)
for(i in 2:(nrow(env1)-1)) {
gr <-cutree(env.clust.av, i)
distgr<-grpdist(gr)
mt<-cor(env.de, distgr, method="pearson")
kt[i,2] <-mt
}
kt
k.best<-which.max(kt$r)
plot(kt$k, kt$r, type="h", main="Mantel optimal number of clusters - average",
xlab="K(number of groups", ylab="pearsons correlation")
axis(1, k.best, paste ("optimum", k.best, sep="\n"), col="red",
font=2, col.axis="red")
points(k.best, max(kt$r), pch=16, col="red", cex=1.5)
# This analysis confirms that there are 3 clusters
# lets find out whether sites cluster according to their environmental
# characteristics in the same way they do based on their community structure:
# Silhouette PLot of the final partition
k<-3
# Silhouette plot
cutg <-cutree(env.clust.av, k=k)
sil<-silhouette(cutg,env.de)
silo<-sortSilhouette(sil)
rownames(silo)<-row.names(env1)[attr(silo,"iOrd")]
plot(silo, main="silohouette plot - Clustering Average",
cex.names=0.8, col=cutg+1, nmax.lab = 100)
# All coloured horizontal bars represent positive values,
# therefore this means that all 3 groups are coherent, and there
# are no misclassified sites
# Plot the final dendrogram:
# a) first reorder sites so that their order in the distance matrix is
# respected as much as posible
ordclust<-reorder(env.clust.av, env.de)
plot(ordclust, hang=-1, xlab="3 groups", sub="", ylab="height",
main= "Final Dendrogram", labels=cutree(ordclust, k=k))
rect.hclust(ordclust, k=k)
dend<-as.dendrogram(ordclust)
heatmap(as.matrix(env.de), Rowv=dend, symm=TRUE, margin=c(3,3))
# (6) INTERPRET. How to find what is different in one group from the others?
# USe the vegan function "tabasco"
tabasco(env1, ordclust)
# THE END ################################################################################
##########################################################################################
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment