Commit 5ec55224 authored by sonitaR's avatar sonitaR
Browse files

uploaded least worse RDA solution with new info on interpreting plots

parent 8ebd946d
#####################################################
# MULTIVARIATE STATISTICS #
#####################################################
#####################################################
# PART 3. INTERPRETATION OF ECOLOGICAL STRUCTURES #
#####################################################
#####################################################
# E3. REDUNDANCY ANALYSIS #
#####################################################
# preparation steps
rm(list=ls())
require(reshape)
require(ade4)
require(vegan)
require(packfor)
require(MASS)
require(ellipse)
require(FactoMineR)
# PREPARING THE BIOLOGICAL DATASET FOR MULTIVARIATE ANALYSIS ###################################
# A REPEAT OF YESTERDAY'S code
setwd("C://multivar")
biomass <- read.table("fish_biomass.txt", h = T, sep = "\t")
biomass.wide<-cast(biomass,site+transect~species,sum)
biomass.melted <- melt(biomass.wide)
row.names(biomass.melted) <- 1:length(biomass.melted[,1])
names(biomass.melted) <- c(names(biomass.melted)[1:2],'biomass',names(biomass.melted)[4])
biomass.melted <- subset(biomass.melted,select=c(1:2,4,3))
mean.biomass <- aggregate(biomass ~ site+species,FUN=mean,data=biomass.melted)
meanbiomass.wide<-cast(biomass,site~species,sum)
meanbiomass.wide.df <- data.frame(meanbiomass.wide[,-1], row.names=meanbiomass.wide[,1])
# Load some graphic functions designed by Brocard et al (2011) we will need:
source("evplot.R")
source("cleanplot.pca.R")
# Hellinger-transform the species dataset
spe.hel<-decostand(meanbiomass.wide.df, "hellinger")
# Load the environmental dataset:
env.data <- read.table("environmental_quant.txt", h = T, sep = "\t")
# remove the label of sites to make it a numbers-only matrix to
# later be able to standardise
env.data <- data.frame(env.data[,-1], row.names=env.data[,1])
# Check for collinearity of explanatory:
# We do this in non-standardised variables
# Write a function to compute correlation coefficients:
# source you.tube class: https://www.youtube.com/watch?v=wXFDIgaTdLw
panel.cor <- function(x, y, digits=2, prefix="", cex.cor)
{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r <- abs(cor(x, y, use="complete.obs"))
txt <- format(c(r, 0.123456789), digits=digits)[1]
txt <- paste(prefix, txt, sep="")
if(missing(cex.cor)) cex <- 0.8/strwidth(txt)
test <- cor.test(x,y)
Signif <- symnum(test$p.value, corr=FALSE, na=FALSE,
cutpoints = c(0, 0.05, 0.1, 1),
symbols = c("*", ".", " "))
text(0.5, 0.5, txt, cex = cex * r)
text (.8, .8, Signif, cex=cex, col=2)
}
# plot every pair of variables, asking for the plot to include correlation coefficients
pairs(env.data[ ,1:5], lower.panel = panel.smooth, upper.panel = panel.cor)
# There are high levels of collinearity between pairs of variables:
# Wave exposure and productivity (retain wave exposure)
# Rugosity and algal cover (retain rugosity)
# Retaining rugosity means getting rid of coral cover as well
# For any model we are left with: wave exposure and rugosity
# checking out the names of the variables of our env.data
names(env.data)
# Therefore remove just coral cover for now:
subset.env<-env.data[c(TRUE, FALSE, TRUE, FALSE, FALSE)]
# standardise the reduced environmental dataset:
stand.env<-scale(subset.env)
stand.env<-as.data.frame(stand.env)
# Run the simple RDA
formulaRDA<-rda(spe.hel ~. , data = stand.env)
summary(formulaRDA)
# Some explanations of the output:
# (1) Partitioning of variance: the overall variance is partitioned
# into "constrained" and "unconstrained" fractions.
# The "constrained" fraction is the amount of variance of the Y matrix explained
# by the X explanatory matrix. Expressed as a proportion,it is equivalent to an R2
# in univariate multiple regression.
# For this analysis the amount of explained variance is:
# R/ 24.6%
# This means that the remaining "unconstrained" variance:
# R/ 75.4% remains unexplained by the variables included in the model
# (2) Eigenvalues and their contribution to the variance:
# This analysis yielded 2 canonical axes labelled RDA1 to RDA2
# (note that there will always be as many RDAs as explanatory variables)
# and an aditional (9) principal component axes labelled PC1 to PC9
# Toghether the RDA1 and RDA2 explain = 24.6%
# Values for each RDA are the proportions contributed by each to the 24.6%,
# RDA1 explains 67% (of the 24.7% explained variation)
# RDA2 explains 32% (of the 24.7% explained variation)
# Toghether the PC1-PC7 explain the remaining 75.4% needed to
# explain the whole of variation among sites
# Values of "proportion explained" under PC1-PC9 represent
# the amount of variance represented by residual PCs but NOT EXPLAINED
# by the RDA model
# (3) Note that the amount of variance explained by PC1 (which is residual
# i.e. not attributable to wave exposure or to rugosity) is larger, than
# the amount of variability explained by the RDAs, in this case, the researcher
# should exploit this information and propose a hypothesis about the
# causes for this (outside the scope of this course but fyi).
# (4) Species scores: they are the coordinates of the tips of the
# species arrows
# (5) Site scores: they are the coordinates of the site dots in the
# reduced space generated with the response (site x species) matrix
# (6) Site constraints (linear combinations of constraining variables):
# fitted values
# (7) Scores for each of the explanatory (environmental) variables
# to be represented as lines in the RDA ordination plot
# extract the canonical coefficient from RDA object:
coef(formulaRDA)
# these are the strengt and direction (given by sign) of the
# correlation between each variables and the RDA (will later be represented)
# on blue right and top axes of the rda plot and indicated by the angles
# of the arrows (representing the environmental variables) in relation
# to the RDA1 and RDA2
# Check whether the whole model is statistically significant:
set.seed(111)
anova.cca(formulaRDA, step=1000)
# Marginally significant (p = 0.065)
# compute the R2 and adjusted R2 which would indicate the
(R2<-RsquareAdj(formulaRDA)$r.squared)
(R2adj <-RsquareAdj(formulaRDA)$adj.r.squared)
# The second value is the unbiased amount of variation explained
# by the model
# As you can see this is very small
# only 7% of the variation is explained by these variables
# Test the significance of all the canonical axes
set.seed(111)
anova.cca(formulaRDA, by = "axis", step=1000)
# There is one significant RDA (RDA1)
# This RDA is positively although not very strongly correlated with wave exposure (23%)
# and positively (a little more strongly related to rugosity) (28%)
# More on how to describe this output see this paper:
# Testing the significance of canonical axes in redundancy analysis (2011)
# Methods in Ecology and Evolution 2(3):269-277
# (available in Git Hub)
# Plot1 (this scaling should be used to focus mainly the angles of environmental vectors)
plot(formulaRDA, scaling=1, main="RDA analysis of Palau fish biomass")
spe.sc<-scores(formulaRDA, choices=1:2, scaling=1, display="sp")
arrows(0, 0, spe.sc[,1], spe.sc[,2], length = 0, lty=1, col="red")
# Things to interpret from plot1:
# You have sites, species (as red lines) and environmental variables (as blue arrows)
# Distances between sites represent a two-dimensional approximation
# of their fitted Euclidean distances
# Projecting the sites on the blue environmental variables tells you
# roughly which values (low or high) the sites have of each variable.
# Angles between lines of species and explanatory variables reflect their corre
# lation (value of which is indicated in blue right and top axis)
# Note that the blue axis on the right and on the top, are centred in 0
# and run from -1 to 1, and there you can see the correlation between
# environmental variables with the RDA1 (on right axis)
# or with the RD2 (on top axis)
# e.g. wave exposure is positively correlated with RDA2
# e.g. rugosity is positively correlated with RDA1 negatively correlated to RDA2
# Sites can also be projected perpendicularly on the species
# or explanatory lines and approximate the fitted value of the site along the
# variable
# Plot2 (this should be used to focus better on relationship between environmental
# variables and the RDA axes)
plot(formulaRDA, main="RDA analysis of Palau fish biomass")
var2.sc<-scores(formulaRDA, choices=1:2, display="sp")
arrows(0, 0, var2.sc[,1], var2.sc[,2], length = 0, lty=1, col="red")
# Here you can really visualise the angle between the
# arrows and the dotted lines in the middle represent the
# relationshps of the variables with the RDA1 and RDA2.
# Once more sites can be projected onto both species and environmental
# vectors indicating the respective values of the sites on species abundance
# and on the scale of the different environmental variables.
# The angle between pairs of lines (species) in this plot indicates
# how correlated they are.
# Find more technical explanation on how to interpret these plots
# on Legendre and Legendre 2012: 639-641
# The previous plots represent species positions as "weigthed averages"
# there is an ongoing discussion as to whether these should rather
# represent linear combinations of explanatory variables,
# Read more here:
vignette("decision-vegan", package ="vegan")
# and in Borcard et al p163-164
# See plot here:
plot(formulaRDA, scaling=1, display=c("sp", "lc", "cn"),
main="triplot with LC scores")
arrows(0, 0, spe.sc[, 1], spe.sc[ ,2], length=0, lty=1, col="red")
# Test the global significance of all model
set.seed(111)
anova.cca(formulaRDA, step=1000)
# However the model is just marginally significant (0.06)
# Test the significance of individual axes:
set.seed(111)
anova.cca(formulaRDA, step=1000, by="axis")
# RDA1 is significant (to interpret bear in mind which environmental variables
# is it related to)
# plot this:
par(cex=1.5)
plot(formulaRDA, scaling=1, main ="Triplot RDA-weighted average scores")
var_pars_sc<-scores(formulaRDA, choices=1:2, scaling=1, display="sp")
arrows(0,0, var_pars_sc[, 1], var_pars_sc[, 2], length=0, lty=1, col="red")
# in this plot the lenght of the arrows is important
# the longest one (rugosity) contributes the most to the
# position of the species on the sites
# check that vifs are <10:
vif.cca(var_rda_pars)
# all good
#########################################################################
# Tips for the report
# The questions you need to ask of this analysis:
# Do environmental characteristics play an important role in driving the
# spatial patters of fish community structure in Palau?
# Are there some environmental characteristics that play a more
# important role than others?
# Is there any remaining variation in community composition that appears
# unexplained by the variables the researcher quantified?
# Method:
# Say which data transformations you used (briefly justify why you need the
# transformation)
# Say which method you used (RDA)
# Results: Report the plot, use the output to interpret what it means
########################################################################
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment