R seminar series section 2 # 3

Last update on March 30, 2017.


Download the datavis csv here



# Faith Musili

# R brownbag seminar November 10th, 2016

#Set your working directory

###Install package dplyr
install.packages("dplyr")

###Load dplyr package into R
library(dplyr)

###Read your csv data into R
data<-read.csv("datavis.csv")
class(data)
head(data)
tail(data)
dim(data)
colnames(data)


###Select function
#Select a set of columns: the Country, Site and VegStructure columns.

Select_data1 <- select(data, Country, Site,VegStructure)
Select_data1 

#Select all the columns except a specific column i.e 

Select_data2<-select(data,-SEVEREERO)
head(Select_data2)

# select a range of columns by name
Select_data3<-select(data,Country:avSlope)
tail(Select_data3)

#Select all columns that start with the character string ???C???
Select_data4<-select(data, starts_with("C"))
head(Select_data4)

#Select all columns that  end with  the character string ???n???
Select_data5<-select(data, ends_with("n"))
tail(Select_data5)

#Select all columns that contain the character string ???H???
Select_data6<-select(data,contains("H"))
tail(Select_data6)

#####filter function 
#Filter the rows for Sites whose vegetation structure is Wooded grassland
filter1<-filter(data, VegStructure == "Wooded grassland")
filter1

#Filter the rows for Sites whose vegetation structure is Wooded grassland and have a Clay percentage of greater than 70.
filter2<-filter(data, VegStructure == "Wooded grassland",Clay >= 72)
filter2

###Pipe operator: %>%-----joins together function into one
Select_data1 <- data %>% select(Country, Site,VegStructure)%>%filter(VegStructure=="Forest")
Select_data1 

#To arrange (or re-order) rows by a particular column
d<-arrange(data,VegStructure)

#combining arrange() and select() functions using pipe operator
data %>% 
  select(Country, Site,VegStructure) %>%
  arrange(Site,VegStructure)

#combining arrange() ,select() and filter() functions using pipe operator
data %>% 
  select(Country, Site,VegStructure,Clay) %>%
  arrange(Site,VegStructure) %>% 
  filter(Clay >= 75)

#combining arrange()i.e in descending order ,select() and filter() functions using pipe operator
data %>% 
  select(Country, Site,VegStructure,Clay) %>%
  arrange(Site,desc(VegStructure)) %>% 
  filter(Clay >= 75)

###mutate function
#Create a new column called Carbon_PH which is the ratio of Carbon  to  PH.
data %>% 
  mutate(Carbon_PH = Carbon / pH) %>%
  head

#adding more than one column at once
data %>% 
  mutate(Carbon_PH = Carbon / pH,
         avSlope_avTreeDen = avSlope/avTreeDen ) %>%
  head

###Summarise function 
#Compute the average Carbon by applying the mean() function to the column Carbon and call the summary value avg_Carbon.
data %>% 
  summarise(avg_Carbon = mean(Carbon))

#use mean,min, and max to summarise Carbon
data %>% 
  summarise(avg_Carbon = mean(Carbon), 
            min_Carbon = min(Carbon),
            max_Carbon = max(Carbon),
            total = n())

###group_by function
#Split the data frame by some variable (e.g.VegStructure ), apply a function to the individual data frames and then combine the output.
data %>% 
  group_by(Site) %>%
  summarise(avg_Carbon = mean(Carbon), 
            min_Carbon = min(Carbon),
            max_Carbon = max(Carbon),
            total = n())

Next entry

Previous entry

Related entries

Similar entries

Pingbacks

Pingbacks are closed.

Comments

No comments yet.

Post your comment