Full Transcript

#SCRIPT class 2 #To show the objects ls() #To remove objects rm(list=ls(all=TRUE)) #To select the folder to save the workspace: getwd() setwd("C:/name/name/name") #To save the workspace in a file save.image("name.Rdata") #To load the saved workspace load("name.Rdata") #Assign a valu...

#SCRIPT class 2 #To show the objects ls() #To remove objects rm(list=ls(all=TRUE)) #To select the folder to save the workspace: getwd() setwd("C:/name/name/name") #To save the workspace in a file save.image("name.Rdata") #To load the saved workspace load("name.Rdata") #Assign a value to a variable x=1 #The types of objects class(x) #Creating a vector y=c(1,5,4,8) class(y) #Create a matrix from two vectors z=c(3,6,99,8) m_h=rbind(y,z) m_v=cbind(y,z) class(m_h) #Each function is defined by its name and parameters #Help about a function: ?mean #Some parameters are mandatory, others optional #General form : nom_fonction(par1=valeur1,par2=valeur2,...) y=c(29020,32500,40320,20328,NA) mean(y) mean(y, na.rm = FALSE) mean(y, na.rm = TRUE) #1) Import the dataframe from a file text/excel #You need to retrieve a text file / excel sheet with observations for each individual in the rows and variables in the columns #We download the Nobel Prize data available at the following link: http://www.nber.org/nobel/ #We obtain the Excel sheet: Jones_Weinberg_2011_PNAS.xlsx nobel=read.table("Jones_Weinberg_2011_PNAS.csv", header=TRUE, sep=",") #in Rstudio you can use the integrated import tool nobel=data.frame(nobel) class(nobel) dim(nobel) rownames(nobel) colnames(nobel) head(nobel) tail(nobel) #Average age of high creativity (whole sample) nobel$age_discovery=nobel$year_research_mid-nobel$year_birth mean(nobel$age_discovery) #Example: Create two new dataframes of researchers who did their #prize-winning work before 1905 (included) et after 1985 (included) early_period=subset(nobel, subset= year_research_mid=1985) #Age of high creativity (Early period) early_period$age_discovery=early_period$year_research_mid-early_period$year_birth mean(early_period$age_discovery) #Age of high creativity (Late period) late_period$age_discovery=late_period$year_research_mid-late_period$year_birth mean(late_period$age_discovery) #The lifespan of researchers can be calculated as follows nobel$duree=nobel$year_death-nobel$year_birth #Problem in calculating the average lifespan mean(nobel$duree) #We can eliminate the NAs nobel_no_na= subset(nobel, subset=!is.na(duree)) dim(nobel) dim(nobel_no_na) #The average lifespan of a Nobel Laureate is: mean(nobel_no_na$duree) #Save the workspace: save.image("nobel.Rdata") #2) Data generation #We want to create a data matrix that includes a sample of 5 individuals #We have information about the age, gender, and weight of individuals. (3 variables) age=c(20,25,19,33,16) gender=c ("M","F", "M", "F","M") weight=c(70,60,100,75,90) #Create the dataset data=data.frame(a=age, g=gender, w=weight) class(data) data mean(data[data$g=="F",]$w) mean(data[data$g=="M",]$w) mean(data[data$g=="F","w"]) mean(data[data$g=="M","w"]) #You can select each variable: data$g #You can select a unique value: data[1,2] #You can change a value: data[1,1]=30 data #3) The dataframes already available in R or in a database package library(help = "datasets") cars=mtcars class(cars) chicken=chickwts class(chicken) quake=quakes class(quake) #Install the Ecdat package with the command: install.packages("Ecdat") #Load a package in the workspace: library("Ecdat") library(help="Ecdat") doctor=DoctorAUS class(doctor)