Data Input
Assign Values to Variables [source]
variables
- assigned with <- or ->
- = is also possible but discouraged [example]
- case sensitive, aa != AA != aA != Aa
numbers
a1<-7
a2<-"andre agassi"
vectors
b1<-c(1,2,3,4,5,6)
b2<-1:10
b3<-c("andre","agassi","was","the","best","tennis","player","ever")
access:
b1[5] // fifth element
b2[1:7] // element 1..7
c(b3[1:2],b3[4:8]) // element 1 and 2 as well as 4..8
b3[-3] // same
########################################################################
useful ways to create/fill vectors
repetition
aa1<-rep(4,23)
aa2<-c(1,2,3,rep(4,23),5,6,7)
aa3<-rep(c("small","large","small","large"),c(6,6,6,6))
aa4<-rep(c("small","large"),each=1,length.out=24)
sequences of values
aa5<-seq(from=1,to=10,by=2)
aa6<-seq(from=1,to=10,length.out=100)
sampling data
aa7<-sample(1:10)
aa8<-sample(1:10,30,replace=T)
random deviates/values
aa9<-rnorm(10,mean=5,sd=1)
aa10<-runif(10,min=0,max=1)
aa11<-.Random.seed[1:10]
########################################################################
matrices
- a matrix is 2 dimensional [nrow:ncol]
- all elements are of the same same data type
- use data frames for different data types (same type per column)
- use tables for higher dimension arrays
c1<-matrix(1:10,nrow=2)
c2<-matrix(1:10,ncol=2)
c3<-matrix(1:10,nrow=2,byrow=TRUE) // byrow - boolean, default=F(ALSE)
c4<-matrix(1:10,ncol=2,byrow=T)
access:
c1[1,5]
c1[1,]
c1[,2]
c1[1,2:4]
Read Data from Files
- text files rule! [source]
- separater usually tab, comma, semicolon
- use scan() for large files (and you only want a portion of it)
- read.table() reads data as table and converts it to data frame
- read.csv() is basically the same
mydata1<-read.table("input1.dat",sep="\t",header=F)
- column names possible [source]
- addressable by var$colname
mydata2<-read.table("input2.dat",sep="\t",header=T)
mydata2$id
mydata2$val_A
mydata2$val_B
NA Values
d1<-c(1:3,rep(NA,4),8:10)
mean(d1)
mean(d1,na.rm=T)
median(d1,na.rm=T)
Get Data from Database
- make sure you have the proper module installed and loaded
- this example uses a MySQL server on mln-web.cs.odu.edu [source]
library(RMySQL)
mydbcon<-dbConnect(MySQL(), user, password, dbname, host)
mydbdata<-dbGetQuery(mydbcon, "SELECT * FROM table")
dbDisconnect(mydbcon)