R seminar series section 2 #2

Last update on March 30, 2017.






#####Setting working directory

setwd("/Users/FMusili/Documents/2.2")

#########Creating objects ##########################

n <- 150
n

58 -> n
n

x <- 1.4
x

X <- 19
X

################Importing Data of different formats############

######### read.csv---for csv
species_csv<-read.csv(file="/Users/FMusili/Documents/2.2/Data/species.csv")
species_csv<-read.csv(file="Data/species.csv")
class(species_csv)
dim(species_csv)
summary(species_csv)

#########Text files (.txt)
species_txt<-read.table(file="Data/species.txt",sep="\t",header=T)
class(species_txt)
dim(species_txt)
summary(species_txt)

########Read excel data files (.xlsx, xls)
install.packages("xlsx")
library(xlsx)
species_xlsx<-read.xlsx (file="Data/species.xlsx",sheetName="Sheet1")
class(species_xlsx)
dim(species_xlsx)
summary(species_xlsx)

########## Read stata file
library(foreign)
animals_stata<-read.dta (file="Data/Animals.dta")
class(animals_stata)
dim(animals_stata)
summary(animals_stata)

######### Read SPSS file
animals_spss<-read.spss(file="",to.data.frame=T)

##### Read fixed-format data files using Fortran-style format specifications.
animal.fwf<-read.fwf(file="Data/Animals_fwf.txt",width=c(16,5,6),header=T)
class(animal.fwf)
dim(animal.fwf)
summary(animal.fwf)

##################Read about the other data types not demonstrated here#########

#####DATA HANDLING#####
###### see part of the data
head(species_csv)
tail(species_csv)

colnames(species_csv)###which are the column names

colnames(species_csv)<-c('Tree_species','Height(m)','Nitrogen_fixing','seed_mass')####renaming my column names
names(species_csv)[1]<-"premium"##
colnames(species_csv)

sapply(species_csv,class) #### What is the data type of each Column
sapply(species_txt,class)#### What is the data type of each Column
sapply(animal.fwf,class)#### What is the data type of each Column

animal.fwf$animal<-as.character(animal.fwf$animal)####change the animal column from factor to a character
sapply(animal.fwf,class)

species_csv$Tree_species<-as.character(species_csv$Tree_species)####change the tree species column from factor to a character
sapply(species_csv,class)

a<-c(0,9,3,3,3,3,33,3,2,2,2)#####create new column
d<-cbind(species_xlsx,a)###add the new column to existing dataframe4

my_species<-species_csv#Duplicate data

# subset data
species<-subset(species_csv,select=c(Tree_species,seed_mass))## show Tree species and seedmass columns only
species

species_num<-species_csv[,c(2,3)] #show column 2 and 3 only
species_num

species_<-species_txt[,-1] #Show all columns except column 1
species_

#Filtering data by conditioning

Filtered_species<-subset(species_csv,(Tree_species=="Acacia abyssinica"|Tree_species=="Ficus ovata"|Tree_species=="Sesbania spinosa")) #variables
Filtered_species

#Checking for duplicates
species_csv[duplicated(species_csv), ] ####show the duplicated rows

species_csv<-species_csv[!duplicated(species_csv), ] #### remove all duplicated rows
species_csv

#Replace a data point given a condition
species_csv$`Height(m)`[species_csv$`Height(m)`=="10"]<-10.1

#####NA's
is.na(species_csv) ;summary(species_csv) #Checking any missing values
species_csv<-data.frame(na.omit(species_csv))
species_csv

#----------------------------------------------------------------

# Explore different data handling methods eg:
#merging
#appending/binding
#Reshaping (wide and long)

Next entry

Previous entry

Related entries

Similar entries

Pingbacks

Pingbacks are closed.

Comments

No comments yet.

Post your comment