Commit 8ebd946d authored by sonitaR's avatar sonitaR
Browse files

uploaded more support files for RDA and thursday material

parent 78bb1d72
#####################################################
# MULTIVARIATE STATISTICS #
#####################################################
#####################################################
# PART 4. BIODIVERSITY INDICES #
#####################################################
#####################################################
# E4. TAXONOMIC DIVERSITY INDICES #
#####################################################
# PREPARATION STEP:
# download Biodiversity RGUI()
# Type this in your browser to access the instructions:
# http://proveg.org/attachments/article/103/ANNEXE%206_Installation%20of%20BiodiversityR%20in%20Windows.pdf
# MAC USERS:
# Try to follow the same pdf of instructions
# if you have problems try checking some online queries:
# https://cran.r-project.org/web/packages/BiodiversityR/index.html
# https://stackoverflow.com/questions/27950603/biodiversityr-and-rcmdr-issues-in-mac
# https://stat.ethz.ch/pipermail/r-sig-mac/2008-September/005314.html
# https://socialsciences.mcmaster.ca/jfox/Misc/Rcmdr/installation-notes.html
# if none of these work please team up with a non-mac user for the
# following steps (you can return to your computer when you begin the univariate models)
# START OF THE PROCESS
rm(list=ls()) # cleaning memory
# Prepare again, the community file (as in previous days but this time, include a final step to save it as a file)
require(reshape)
setwd("C://multivar")
biomass <- read.table("fish_biomass.txt", h = T, sep = "\t")
biomass.wide<-cast(biomass,site+transect~species,sum)
biomass.melted <- melt(biomass.wide)
row.names(biomass.melted) <- 1:length(biomass.melted[,1])
names(biomass.melted) <- c(names(biomass.melted)[1:2],'biomass',names(biomass.melted)[4])
biomass.melted <- subset(biomass.melted,select=c(1:2,4,3))
mean.biomass <- aggregate(biomass ~ site+species,FUN=mean,data=biomass.melted)
meanbiomass.wide<-cast(biomass,site~species,sum)
meanbiomass.wide.df <- data.frame(meanbiomass.wide[,-1], row.names=meanbiomass.wide[,1])
# final step to save this as a file to your multivar folder in C
# (or to your own working directory if different)
write.table(meanbiomass.wide.df,file="speciesbiomass.txt", sep="\t")
# load the "environmental data":
env.data <- read.table("environmental_quant.txt", h = T, sep = "\t")
# Ok ready to compute biodiversity with the two source files:
# speciesbiomass.txt
# environmental_quant.txt
# Load the package (will take some time and at the end display a Pop-up window)
library(BiodiversityR)
BiodiversityRGUI()
library(Rcmdr)
# (1) "import" the datasets speciesbiomass.txt and environmental_quant.txt into Biodiversity:
# Under the top menu "Data" click "import data" and select the option "import from txt file"
# Another window will pop up, and there you will have to enter a simple name
# for each dataset try "species" and "siteinfo" respectively for biological and environmental
# datasets and chose "select community dataset"
# Under "Field Separator" indicate that separator is "Tab"
# (2) "Load" or activate the two datasets:
# Under the tab BiodiversityR, scroll down to "community matrix" and from the menu displayed choose "select community dataset"
# and from the display lists choose the "species"
# Under the tab BiodiversityR, scroll down to "environmental matrix and repeat as above selecting "siteinfo"
# this time
# (3) Compute biodiversity indices:
# Scroll down to "analysis of diversity" and chose "diversity indices" from all options
# give a name to the result element: "shannon.palau"
# Under biodiversity index select: Shannon
# Under calculation method: each.site
# Under subset options: site
# Tick= "save results" and "label results"
# Repeat process for "richness" and "evenness" giving different names to outputs
# (4) Adding indices to the environmental dataset in a new file
# Under BiodiversityR scroll down again to "analysis of diversity"
# but this time select "add diversity values to dataset"
# this will add the indices to the environmental element called "site info"
# (5) Take the element "site info" out of the BiodiversityR window and save it as a file
write.table(siteinfo,file="palauenv&indices.txt", sep=",")
# (6) Read this table into R now:
allbiodiversity <- read.table("palauenv&indices.txt", h = T, sep = ",")
# Work on adding all indices to this file before we begin the univariate modelling.
#####################################################################################
# Start of univariate models with Shannon&Wiener diversity
#####################################################################################
# (1) Data exploration
# 1.1. Checking if you have outliers in your response variable (Diversity)
par(mfrow = c(1, 2))
boxplot(allbiodiversity$Shannon,
main = "TaxDiversity")
dotchart(allbiodiversity$Shannon,
xlab = "Range of data",
ylab = "Values")
# We do not have outliers, these would show as sites
# far away from others
# 1.2. Checking if you have outliers in your explanatory variables
# lets remind ourselves of the names of variables:
names(allbiodiversity)
par(mfrow = c(2, 3), mar = c(4, 3, 3, 2))
dotchart(allbiodiversity$wave.exposure, main = "WE")
dotchart(allbiodiversity$productivity.turf.mm.day, main = "PROD")
dotchart(allbiodiversity$rugosity, main = "RUG")
dotchart(allbiodiversity$algal.cover, main = "ACOV")
dotchart(allbiodiversity$coral.cover, main = "CCOV")
# we seem to have some 3 extreme values of wave exposure
# the rest is all ok
# Apply a transformation to Wave Exposure to alleviate this:
allbiodiversity$wave.exposure <- log10(allbiodiversity$wave.exposure)
# 1.3. Check for collinearity (you also did this yesterday)
panel.cor <- function(x, y, digits=2, prefix="", cex.cor)
{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r <- abs(cor(x, y, use="complete.obs"))
txt <- format(c(r, 0.123456789), digits=digits)[1]
txt <- paste(prefix, txt, sep="")
if(missing(cex.cor)) cex <- 0.8/strwidth(txt)
test <- cor.test(x,y)
Signif <- symnum(test$p.value, corr=FALSE, na=FALSE,
cutpoints = c(0, 0.05, 0.1, 1),
symbols = c("*", ".", " "))
text(0.5, 0.5, txt, cex = cex * r)
text (.8, .8, Signif, cex=cex, col=2)
}
# plot every pair of variables, asking for the plot to include correlation coefficients
pairs(allbiodiversity[ ,1:7], lower.panel = panel.smooth, upper.panel = panel.cor)
# As yesterday:
# Wave exposure is related to productivity (retain wave exposure)
# Rugosity is related to algal cover (retain rugosity)
# Keeping rugosity means removing coral cover
# For any model we want to fit we are left with: Wave exposure and Rugosity
# If we focus on the relationships between shannon and the environmental variables
# we have very poor apparent relationship, so we dont expect
# a great model ahead.
# 1.4. Check for Zero inflation of the response variable (too many zeroes)
sum(allbiodiversity$Shannon == 0)
sum(allbiodiversity$Shannon == 0) / nrow(allbiodiversity)
plot(table(allbiodiversity$Shannon)) #More useful for count data
# not a problem here. not many values are 0 (in the Y axis)
# 1.5. No need to test for interactions because you dont have factors
# but if you analysed the categorical wave exposure data,
# you would have to repeat this code for every quantitative variable:
# coplot(Shannon ~ rugosity | factor(wave.exposure),
# data = allbiodiversity)
# however in this particular dataset we have very few sites to test
# for interactions a lot more survey effort would have been
# required to survey a range of e.g. rugosities within different
# wave exposures.
# We are ready for model fitting carrying these decisions to the model
# (important for reporting)
# explanatory (wave exposure was log transformed because of outliers)
# For any model we want to fit we are left with: Wave exposure and Rugosity
# there is no zero inflation so no need for zero-inflated models
# Our response variable is continuous (so no need for generalised linear models):
# 2. Fitting the model (including all possible factors and interactions)
# in this case no interactions are possible:
# only consider including interactions when there are sound ecological
# hypotheses behind them, and when you have enough replication within the
# levels of the interaction.
modeldiv<-lm(Shannon~wave.exposure+rugosity, data=allbiodiversity)
summary(modeldiv)
# as expected the model has a very bad Rsquared, and is not significant,
# but before we trust it lets run diagnostic plots:
# The model should show homogeneity of variance (heteroscedasticity):
# (i.e. the first plot residuals vs. fitted should show no pattern
# all dots spread across all biplot)
par(mfrow = c(2, 2))
plot(modeldiv)
resid.vals.mod <- resid(modeldiv)
fitted.vals.mod <- fitted(modeldiv)
plot(x = F2, y = E2)
abline(h = 0, v = 0, lty = 2)
# In this case there is a U pattern which indicates some non-linearity
# (non linear modes outside scope of this course)
# We conclude that taxonomic diversity is not driven by wave exposure,
# rugosity (unlikely to be driven by any of the collinear variables)
# END ###################################################################
# Your task: Compute species richness and evenness and try to
# find whether these are explained by any of the environmental variables
#########################################################################
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment