Commit 3776d9cc authored by Christiane Hassenrück's avatar Christiane Hassenrück
Browse files

added solution for R scripts week 1 and plotting scripts from group work

parent 3be80e30
##Scatter Plot
setwd("----")
require(reshape)
#Read Data
turf_productivity <-read.table("turf_productivity.txt",h=T,sep = "\t")
biomass<-read.table("fish_biomass.txt", h= T, sep= "\t")
wave<-read.table("sites_metadata.txt", h=T, sep = "\t")
##Turf Mean & Standard Deviation
turf.mean <- c(by(data = turf_productivity$productivity.turf.mm.day,
INDICES = turf_productivity$site, FUN = mean))
turf.SD <- c(by(data = turf_productivity$productivity.turf.mm.day,
INDICES = turf_productivity$site,FUN = sd))
all.turf.data <-data.frame(turf.mean, turf.SD)
#include row names as a columm of data frame
Turf <- data.frame(site = row.names(all.turf.data), all.turf.data)
#N. of species per transect per site
#add colummn and fill with 1
biomass$Occurance<-1
species<- cast(biomass, site + transect~ .,
value = "Occurance", fun.aggregate = length)
colnames(species)<- c("site","transect","count")
##Number of Species per site mean and Standard Deviation
species.mean <- c(by(data = species$count,
INDICES = species$site,FUN = mean))
species.SD <- c(by(data = species$count,
INDICES = species$site,FUN = sd))
all.species.data <-data.frame(species.mean,species.SD)
SpeciesNum <- data.frame(site = row.names(all.species.data), all.species.data)
#Before creating data frame
##order data
TurfOrdered <- Turf[match(as.character(wave$site),
as.character(Turf$site)),]
SpeciesOrdered <-SpeciesNum[match(as.character(wave$site),
as.character(SpeciesNum$site)),]
all.equal(Turf$site,SpeciesNum$site)
all.equal(SpeciesOrdered$site,wave$site)
##Date frame of all data
PlotData <-data.frame(TurfOrdered,
SpeciesOrdered[,-1],
wave.exposure= wave$wave.exposure)
dev.off()
##Graphing----
Graph<- plot(PlotData$turf.mean,PlotData$species.mean,
xlab= "Turf Productivity",
ylab= "Number of Species",
xlim = c(0.01,.11),
ylim = c(3,11),
col= c("red","blue","green")[PlotData$wave.exposure],
pch=19)
legend(x="topright",legend = levels(PlotData$wave.exposure),
col= c("red","blue","green"),pch=19)
par(mar= c(6, 5, 3, 2))
arrows(PlotData$turf.mean, PlotData$species.mean-PlotData$species.SD,
PlotData$turf.mean, PlotData$species.mean+PlotData$species.SD,
length=0.05, angle=90, code=3)
arrows(PlotData$turf.mean-PlotData$turf.SD, PlotData$species.mean,
PlotData$turf.mean+PlotData$turf.SD, PlotData$species.mean,
length=0.05, angle=90, code=3)
segments(
x0 = seq(PlotData$turf.mean,1, 0.1) + PlotData$turf.SE, # specify the coordinates
y0 = seq(PlotData$turf.mean,1, 0.1) ,
x1 = seq(PlotData$turf.mean,1, 0.1) - PlotData$turf.SE,
y1 = seq(PlotData$turf.mean,1, 0.1)
)
\ No newline at end of file
######################################################################################
# Group 6: Mondi, Ben, Patrick, Paulina ##############################################
######################################################################################
turf.sd <- c(
by(
data = turf_productivity$productivity.turf.mm.day, #data to summarize
INDICES = turf_productivity$site,#grouping factor
FUN = sd#function to apply
)
)
##
turf.mean <-turf.mean[match(as.character(env.all$site), names(turf.mean))]
turf.sd <-turf.sd[match(as.character(env.all$site), names(turf.sd))]
##
x <- jitter(as.numeric(env.all$wave.exposure)) #use jitter to spread Low/medium/high data points along x axis
color <- env.all$wave.exposure
levels(color) <- c("blue","orange","red")
plot(x,turf.mean,#setting jitter function <-x and plotting x stops jitter from recalculating every time we plot function
main = "Turf_Plot", #sets main title of plot
xlab = "Wave Exposure", #labels x axis
ylab = "Turf Productivity",#labels y axis,
col = as.character(color),#this seperates our data points into different colors via wave exposure
pch = 19, #data point type (solid dot)
xaxt='n',#suppresses numeric values on x-axis
ylim=c(0,0.11) #sets scale of y axis
)
axis(
1,
mgp = c(2, 0.7, 0),
at = c(1.0, 2.0, 3),
labels = c("low", "medium", "high")
) #gives us our low, medium, high labels
segments(
x,
turf.mean - turf.sd,
x,
turf.mean + turf.sd
)
segments(x-.05,turf.mean - turf.sd,x+.05,turf.mean - turf.sd)
segments(x-.05,turf.mean + turf.sd,x+.05,turf.mean + turf.sd)
# hints:
# R commands: sd, by, match, jitter, segments
# defining colors by changing factor levels
match()
### barplot (beside = T) for coral and algal cover
# error bars = sd per wave exposure category (based on mean per site)
# including legend
# (1 group)
# hints:
# R commands: aggregate, barplot, segments, legend
# save object returned by barplot() to get x coordinates of bars
### stacked barplot of fish biomass per site (on genus level)
# with double x axis labels showing sites and wave exposure
# (1 group)
# hints:
# R commands: aggregate, par, segments, cumsum, apply, mtext, legend, text, table
# save object returned by barplot() to get x coordinates of bars
### heatmap of biomass on species level
# color bar showing wave exposure and genus
# (1 group)
# hints (option 1):
# R package: gplots
# R commands: ordered, t, heatmap.2, order
# defining colors by changing factor levels
# hints (option 2):
# R package: OceanView
# R commands: image2D, order, axis, text, segment, cumsum, apply, table
### ranked barplot of number of species per site
# with error bars showing standard deviation (among transects)
# bars colored by wave exposure
# (1 group)
# hints:
# R commands: by, match, barplot, segments, legend, order
# save object returned by barplot() to get x coordinates of bars
# scatterplot numer of species vs. turf productivity
# points colored by wave exposure
# error bars in both directions showing standard deviation per site
# (1 group)
plot(env.all$wave.exposure,turf.mean)
ylim=range(c(turf.mean-sd, turf.mean+sd))
# hints:
# R commands: segments, points
# defining colors by changing factor levels
# map of sampling area (tomorrow)
# (1 group)
\ No newline at end of file
......@@ -231,8 +231,10 @@ env.all <- data.frame(
env.meta[, -1] # What does this line do and why?
)
# show factor levels of wave exposure and order them by increasing exposure
# show factor levels of wave exposure and
# order them by increasing exposure
levels(env.all$wave.exposure)
env.all$wave.exposure <- ordered(
env.all$wave.exposure,
levels = c("low", "med", "high")
......@@ -242,22 +244,33 @@ env.all$wave.exposure <- ordered(
# correlation of environmental parameters
pairs(env.all[, 2:5])
# How many species are there in total?
?length
?levels
length(levels(biomass$species))
str(biomass$species)
# converting biomass table into wide data format
# independent observations in rows, species in columns
# call it biomass.wide
biomass.wide <- cast(
biomass,
"site + transect ~ species",
value = "biomass"
)
# Why didn't we need the fun.aggregate argument?
# Does the output look like expected?
str(biomass.wide)
# What went wrong and how do we fix it?
biomass.wide <- as.data.frame(biomass.wide)
biomass.wide$transect <- as.factor(biomass.wide$transect)
# Replace 'NA' with 0
# What is the structure of the output?
# Did we expect this?
biomass.wide[is.na(biomass.wide)] <- 0
# write output to file
write.table(
......@@ -270,6 +283,7 @@ write.table(
### save workspace
save.image("fish.Rdata")
# load("fish.Rdata")
#####################################################################################
......
......@@ -230,7 +230,7 @@ pairs(env.all[, 2:5])
length(levels(biomass$species))
# converting biomass table into wide data format
biomass.wide <- cast(biomass, "site + transect ~ species", value = "biomass")
biomass.wide <- cast(biomass, "site + transect ~ species", value = "biomass", fill = 0)
# Why didn't we need the fun.aggregate argument?
# Does the output look like expected?
# What went wrong and how do we fix it?
......
#########################################################
# ISATEC R introduction: data visualization #
#########################################################
### basic plot function in R ####
# set-up of standard plotting device: plotting parameters
# margins
par("mar")
# outer margins
par("oma")
# plotting area in user coordinates
par("usr")
# plotting on margins allowed?
par("xpd")
# multipanel figures
par("mfrow")
par("mfcol")
# position of axis title, axis tick marks, axis
par("mgp")
# plot titles?
par("ann")
# extra spacing ad beginning of axes?
par("xaxs")
par("yaxs")
# plotting parameters can be set before the plot to defin the plotting area, etc.
# there are some parameters which are then used by the plotting commands
# those can also be specified later (as part of the command)
# please refer to the examples in the plotting commands
# to delete any changes to default plotting parameters
# close all plotting devices
dev.off()
# or same default settings
op <- par()
# and reset
par(op)
# lets's have a look at a tyipcal plotting area of a simple 2 panel figure
par(
mfrow = c(1, 2),
oma = c(4, 0, 4, 0)
)
# for each plot in the multipanel layout
for(j in 1:2) {
# open new plot
plot.new()
# draw box around plotting region defined by user coordinates
box("plot")
# box around panel
box("figure")
# allow plotting on margins
par(xpd = NA)
# show inner margin lines
for(i in 1:4) {
mtext(
text = paste("line", 0:(floor(par("mar")[i]) - 1)),
side = i,
line = 0:(floor(par("mar")[i]) - 1)
)
}
}
# show outer margin lines
for(i in c(1, 3)) {
mtext(
text = paste("line", 0:3),
side = i,
line = 0:3,
outer = T
)
}
# you can navigate to any point in the plotting device
# as long as you know (guess) the coordinates!
### more sophisticated layout options
# more versatile plot arrangements
layout(
mat = matrix(
c(1, 1, 1,
2, 2, 3,
4, 5, 6),
3,
3,
byrow = T
),
widths = c(1, 1, 0.5),
heights = c(1.3, 1, 1)
)
# this will produce a the following multipanel plot
# 6 plots in 3 rows
# row 1: first plot over full width of plotting device and one third as high as the others
# row 2: second and third plot, the second taking up 4 times as much space as the third
# row 3: the last 3 plots, with plot 4 and 5 each taking up twice as much space as plot 6
# Let's have a look:
# use smaller margins
par(mar = c(1, 1, 1, 1))
for(i in 1:6) {
plot.new()
box("plot")
}
### plotting elements
# we will be using some of the following commands to add sepcific elements to a plot
# open new plot
dev.off()
par(mar = c(4, 4, 1, 1), ann = F)
# plot.new()
plot(
0,
0,
type = "n",
ylim = c(-0.5, 1.5),
xlim = c(0, 1),
axes = F
)
# get plot dimensions in user coordinates
par("usr")
# individual data points, requires x and y coordinates
points(
x = seq(0, 1, 0.1),
y = seq(0, 1, 0.1),
pch = 16, # point character, i.e. symbol
cex = seq(0.5, 2, length.out = 11), # point size
col = rainbow(11) # point color
)
# individual text labels (e.g. for data points), requires x and y coordinates
text(
x = seq(0, 1, 0.1),
y = seq(0, 1, 0.1),
labels = LETTERS[1:11],
pos = c(rep(3, 5), rep(1, 6)) # position of label relativ to xy
)
text(
0.5, # x
0.8, # y
labels = "value without point",
font = 2 # bold text
)
# connect individual data points (in the order of input), requires x and y coordinates
lines(
x = seq(0, 1, 0.1),
y = seq(0, 1, 0.1),
lty = 3, # line type dashed
lwd = 3 # line width
)
# line connectors between 2 points, requires start and end x and y coordinates
segments(
x0 = seq(0, 1, 0.1),
y0 = seq(0, 1, 0.1) - 0.2,
x1 = seq(0, 1, 0.1),
y1 = seq(0, 1, 0.1) + 0.2
)
# rectangular box, requires x and y coordinates of lower left and upper right hand corner
rect(
0.3,
0.75,
0.7,
0.85,
border = "red",
lty = 2,
lwd = 2
)
# connect individual data points (in the order of input) to form a polygon (fill optional),
# requires x and y coordinates
# similar to lines()
polygon(
x = c(0, 0.2, 0.2, 0.3, 0.2, 0.2, 0),
y = c(0.75, 0.75, 0.7, 0.8, 0.9, 0.85, 0.85),
col = "red"
)
# like segments, but with arrow head
arrows(
x0 = rep(0, 3),
y0 = rep(0, 3),
x1 = c(0.5, 0.6, 0.7),
y1 = c(0.1, 0.2, 0.3),
length = 0.1 # length of arrowhead
)
# draw box (rectangle) around plotting area (defined by user coordinates)
box("plot", lwd = 0.5)
# add axis to plot
# specify which side of plot to add axis
axis(
2,
mgp = c(2, 0.7, 0),
las = 1 # always horizontal
)
axis(
1,
mgp = c(2, 0.7, 0),
at = c(0.2, 0.5, 0.8),
labels = c("small", "medium", "large")
)
# add text labels on margin
mtext(
text = "Size",
side = 1, # side of plot (underneath)
line = 2 # margin line
)
# also works with
title(ylab = "Value")
# add linear curve to plot,
# e.g. linear fit, horizontal or vertical lines with specific intersection with axis
abline(a = 0.2, b = 0.5) # intercept and slope
abline(h = 0.5)
abline(v = 0.85)
### other plotting functions/packages
require(gplots)
# ggplot2
# have a look at https://github.com/chassenr/Tutorials/tree/master/R_crash_course_2013_Buttigieg
### visualizing your data ####
### scatter plot with error bars for environmental parameters
# individual points: mean per station, whiskers: sd/range
# points jittered on levels of exposure categories
# color by wave exposure
# include site names as point labels
# (4 groups)
# hints:
# R commands: sd, by, match, jitter, segments
# defining colors by changing factor levels
turf.sd <- c(by(turf$productivity.turf.mm.day, turf$site, sd))
turf.sd <- turf.sd[match(as.character(env.all$site), names(turf.sd))]
plot(
0, 0,
type = "n",
xlim = c(0.5, 3.5),
ylim = c(0, max(env.all$turf.mean + turf.sd)),
xlab = "Wave exposure",
ylab = "Turf productivity",
axes = F
)
axis(2, las = 1)
axis(1, at = 1:3, labels = levels(env.all$wave.exposure))
x <- jitter(as.numeric(env.all$wave.exposure), 1.5)
color <- env.all$wave.exposure
levels(color) <- c("blue", "orange", "darkred")
segments(
x,
env.all$turf.mean - turf.sd,
x,
env.all$turf.mean + turf.sd
)
segments(
x - 0.1,
env.all$turf.mean - turf.sd,
x + 0.1,
env.all$turf.mean - turf.sd
)
segments(
x - 0.1,
env.all$turf.mean + turf.sd,
x + 0.1,
env.all$turf.mean + turf.sd
)
points(
x,
env.all$turf.mean,
pch = 21,
col = "black",
bg = as.character(color),
cex = 1.5
)
box("plot")
### barplot (beside = T) for coral and algal cover
# error bars = sd per wave exposure category (based on mean per site)
# including legend
# (1 group)
# hints:
# R commands: aggregate, barplot, segments, legend
# save object returned by barplot() to get x coordinates of bars
y.mean <- aggregate(
env.all[, 2:3],
by = list(env.all$wave.exposure),
FUN = mean
)
y.sd <- aggregate(
env.all[, 2:3],
by = list(env.all$wave.exposure),
FUN = sd
)
bp <- barplot(
t(y.mean[, 2:3]),
beside = T,
las = 1,
ylim = c(0, 100),
names.arg = y.mean$Group.1,
ylab = "Coverage [%]",
xlab = "Wave exposure",