Commit 78bb1d72 authored by sonitaR's avatar sonitaR
Browse files

added Wednesday files

parent 2c02295f
#####################################################
# MULTIVARIATE STATISTICS #
#####################################################
#####################################################
# PART 3. INTERPRETATION OF ECOLOGICAL STRUCTURES #
#####################################################
#####################################################
# E3. REDUNDANCY ANALYSIS #
#####################################################
# preparation steps
rm(list=ls())
require(reshape)
require(ade4)
require(vegan)
require(packfor)
require(MASS)
require(ellipse)
require(FactoMineR)
# PREPARING THE BIOLOGICAL DATASET FOR MULTIVARIATE ANALYSIS ###################################
# A REPEAT OF YESTERDAY'S code
setwd("C://multivar")
biomass <- read.table("fish_biomass.txt", h = T, sep = "\t")
biomass.wide<-cast(biomass,site+transect~species,sum)
biomass.melted <- melt(biomass.wide)
row.names(biomass.melted) <- 1:length(biomass.melted[,1])
names(biomass.melted) <- c(names(biomass.melted)[1:2],'biomass',names(biomass.melted)[4])
biomass.melted <- subset(biomass.melted,select=c(1:2,4,3))
mean.biomass <- aggregate(biomass ~ site+species,FUN=mean,data=biomass.melted)
meanbiomass.wide<-cast(biomass,site~species,sum)
meanbiomass.wide.df <- data.frame(meanbiomass.wide[,-1], row.names=meanbiomass.wide[,1])
# Load some graphic functions designed by Brocard et al (2011) we will need:
source("evplot.R")
source("cleanplot.pca.R")
# Hellinger-transform the species dataset
spe.hel<-decostand(meanbiomass.wide.df, "hellinger")
# Load the environmental dataset:
env.data <- read.table("environmental_quant.txt", h = T, sep = "\t")
# remove the label of sites to make it a numbers-only matrix to
# later be able to standardise
env.data <- data.frame(env.data[,-1], row.names=env.data[,1])
# Check for collinearity of explanatory:
# We do this in non-standardised variables
# Write a function to compute correlation coefficients:
# source you.tube class: https://www.youtube.com/watch?v=wXFDIgaTdLw
panel.cor <- function(x, y, digits=2, prefix="", cex.cor)
{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r <- abs(cor(x, y, use="complete.obs"))
txt <- format(c(r, 0.123456789), digits=digits)[1]
txt <- paste(prefix, txt, sep="")
if(missing(cex.cor)) cex <- 0.8/strwidth(txt)
test <- cor.test(x,y)
Signif <- symnum(test$p.value, corr=FALSE, na=FALSE,
cutpoints = c(0, 0.05, 0.1, 1),
symbols = c("*", ".", " "))
text(0.5, 0.5, txt, cex = cex * r)
text (.8, .8, Signif, cex=cex, col=2)
}
# plot every pair of variables, asking for the plot to include correlation coefficients
pairs(env.data[ ,1:5], lower.panel = panel.smooth, upper.panel = panel.cor)
# There are high levels of collinearity between pairs of variables:
# Between:
# Wave exposure and productivity
# Rugosity and algal cover
# Rugosity and coral cover
# Algal cover and coral cover
# We could therefore make some arbitrary decisions to move forward:
# Retain wave exposure exclude productivity (but consider that WE is informative of productivity)
# Retain rugosity, exclude coral cover (but consider that high rugosity = high coral.cover = low algal.cover)
# As we have retained rugosity, we must exclude algal.cover
# So our environmental matrix should contain:
# Wave exposure and rugosity
# NOTE: To make these decisions you need to make sure you
# base them in your ecological knowledge on the dataset
# A less arbitrary way to check for collinearity is to
# examine the variance inflation values (VIFS).
# These numbers indicate the proportion by which the variance of
# a regresion coefficient is inflated by the presence of another one
# to compute the VIFs:
require(faraway)
sort(vif(env.data))
# According to Borcard et al (2011) VIFs > 10 should be avoided,
# so based on this test we could opt for droping just
# algal cover and coral cover:
# checking out the names of the variables of our env.data
names(env.data)
# Reduce this matrix to include just columns 1 and 3:
subset.env<-env.data[c(TRUE, TRUE, TRUE, FALSE, FALSE)]
# and check the new VIFs:
sort(vif(subset.env))
# all good, no value >10
# standardise the reduced environmental dataset:
stand.env<-scale(subset.env)
stand.env<-as.data.frame(stand.env)
# Run the simple RDA
formulaRDA<-rda(spe.hel ~. , data = stand.env)
summary(formulaRDA)
# Some explanations of the output:
# (1) Partitioning of variance: the overall variance is partitioned
# into "constrained" and "unconstrained" fractions.
# The "constrained" fraction is the amount of variance of the Y matrix explained
# by the X explanatory matrix. Expressed as a proportion,it is equivalent to an R2
# in univariate multiple regression.
# For this analysis the amount of explained variance is (complete):
# R/ 33%
# This means that the remaining "unconstrained" variance (complete)
# R/ 67 %
# remains unexplained by the variables included in the model
# (2) Eigenvalues and their contribution to the variance:
# This analysis yielded (how many?) canonical axes labelled RDA1 to....?
# R/ 3 - to RDA3
# and an aditional (how many?) canonical axes labelled PC1 to ...?
# R/ 8 - to PC8
# Toghether the RDA1, RDA2, RDA3 added up = 33%
# Values for each RDA are the proportions contributed by each to the 33%,
# not to the 100%
# Toghether the PC1-PC8 added up = 68%
# Values of "proportion explained" under PC1-PC8 represent
# the amount of variance represented by residual PCs but NOT EXPLAINED
# by the RDA model
# (3) Note that the amount of variance explained by PC1 (which is residual
# i.e. not attributable to wave exposure or to rugosity) is larger, than
# the amount of variability explained by RDA3, in this case, the researcher
# should exploit this information and propose a hypothesis about the
# causes for this (outside the scope of this course but fyi).
# (4) Species scores, they are the coordinates of the tips of the
# species arrows
# (5) Site scores, they are the coordinates of the site dots in the
# reduced space generated with the response (site x species) matrix
# (6) Site constraints (linear combinations of constraining variables):
# coordinates of the sites in the reduced space generated with the
# matrix of explanatory (sites x environmental) variables
# (7) Scores for each of the explanatory (environmental) variables
# to be represented as lines in the RDA ordination plot
########################################################################
# NOW GET ALL THAT INFO OUT TO MAKE THE RDA PLOT
########################################################################
(R2<-RsquareAdj(formulaRDA)$r.squared)
(R2adj <-RsquareAdj(formulaRDA)$adj.r.squared)
# The second value is the unbiased amount of variation explained
# by the model
# As you can see this is very small
# only 8% of the variation is explained by these variables
# In this case plotting an RDA is not meaningful
# given that the species-environment relationship is so poor
# but we will do it anyway to learn how
#########################################################################
plot(formulaRDA, scaling=1, main="RDA analysis of Palau fish biomass")
spe.sc<-scores(formulaRDA, choices=1:2, scaling=1, display="sp")
arrows(0, 0, spe.sc[,1], spe.sc[,2], length = 0, lty=1, col="red")
# Test of significance of the RDA results through permutation
anova.cca(formulaRDA, step=1000)
anova.cca(formulaRDA, by ="axis", step=1000)
# Here we see that the RDA1 is significant
#########################################################################
# TRY NOW TO RUN THE MODEL EXCLUDING OTHER EXPLANATORY VARIABLES
#########################################################################
#########################################################################
# Tips for the report
# The questions you need to ask of this analysis:
# Do environmental characteristics play an important role in driving the
# spatial patters of fish community structure in Palau?
# Are there some environmental characteristics that play a more
# important role than others?
# Is there any remaining variation in community composition that appears
# unexplained by the variables the researcher quantified?
# Method:
# Say which data transformations you used (briefly justify why you need the
# transformation)
# Say which method you used (RDA)
# Results: Report the plot, use the output to interpret what it means
########################################################################
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment