2017-01-15 13:57:04
which()> str(euro) Named num [1:11] 13.76 40.34 1.96 166.39 5.95 ... - attr(*, "names")= chr [1:11] "ATS" "BEF" "DEM" "ESP" ... > which(euro > 100) # 返回索引代称和索引序号 ESP ITL PTE 4 8 11
> euro[c(3, 6)] # 某条街道euro,3号和6号住的是谁?
DEM FRF
1.95583 6.55957 # 该地址住户为名为"DEM"的1.96和名为"FRF"的6.56
> euro['ITL'] # 此街道1-11号为ATS, BEF等大厦。ITL大厦的门牌8号
ITL
1936.27 # 返回该地址住户
有n维,就要有n-1个逗号
二维数组(如data.frame): data.frame[row.indices, col.indices]
> mtcars['Mazda RX4', 'mpg'] [1] 21 > mtcars[1:3, 1] [1] 21.0 21.0 22.8
array[indices on dim 1, indices on dim 2, ...]> str(Titanic) table [1:4, 1:2, 1:2, 1:2] 0 0 35 0 0 0 17 0 118 154 ... - attr(*, "dimnames")=List of 4 ..$ Class : chr [1:4] "1st" "2nd" "3rd" "Crew" ..$ Sex : chr [1:2] "Male" "Female" ..$ Age : chr [1:2] "Child" "Adult" ..$ Survived: chr [1:2] "No" "Yes" > Titanic['1st', 'Male', 'Adult', 'Yes'] # 四个维度都作唯一索引 ==> 单个元素 [1] 57
> euro[euro > 100]
ESP ITL PTE
166.386 1936.270 200.482
> mtcars[mtcars$mpg > 30, ]
mpg cyl disp hp drat wt qsec vs am gear carb
Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
splitsplit(x, f, drop = FALSE, ...)
- x: 向量或数据框 - f: 分组因子,将x按f因子分组切割。要与x等长。 - drop: 逻辑值,分割时找不到的因子水平是否被弃去
> split(1:10, rep(c(1,2), 5)) $`1` [1] 1 3 5 7 9 $`2` [1] 2 4 6 8 10 > str(split(morley, morley$Expt))
rbindrbind纵向合并> rbind(data.frame(A=c(1, 3), B=c(2, 4)),
data.frame(A=c(5, 7), B=c(6, 8)))
A B
1 1 2
2 3 4
3 5 6
4 7 8
data.table包的rbind> library(data.table) > DT1 = data.table(A=1:2, B=letters[1:2]) > DT2 = data.table(B=letters[4:5], A=4:5, C=c(TRUE, FALSE)) > rbind(DT1, DT2, use.names=TRUE, fill=TRUE) A B C 1: 1 a NA 2: 2 b NA 3: 4 d TRUE 4: 5 e FALSE
dplyr包的bind_rows> library(dplyr) > DT1 = tbl_df(data.frame(A=1:2, B=letters[1:2])) > DT2 = tbl_df(data.frame(B=letters[4:5], A=4:5, C=c(TRUE, FALSE))) > bind_rows(list(DT1, DT2), .id="from") # A tibble: 4 × 4 from A B C <chr> <int> <chr> <lgl> 1 1 1 a NA 2 1 2 b NA 3 2 4 d TRUE 4 2 5 e FALSE
cbindcbind> cbind(data.frame(A=c(1, 3), B=c(2, 4)),
data.frame(A=c(5, 7), B=c(6, 8)))
A B A B
1 1 2 5 6
2 3 4 7 8
data.table包的mergemerge匹配后合并> d1 <- data.table(a=rep(1:2, each=3), b=1:6, key="a,b")
> d2 <- data.table(a=0:1, b=0:1, bb=10:11, key="a,b")
> merge(d1, d2, by=c('a', 'b'), all=TRUE)
a b bb
1: 0 NA 10
2: 1 1 11
3: 1 2 11
4: 1 3 11
5: 2 4 NA
6: 2 5 NA
7: 2 6 NA
dplyr包的join家族inner_join, left_join, right_join, full_join …)> library(dplyr)
> DT1 = tbl_df(data.frame(a=rep(1:2, each=3), b=1:6))
> DT2 = tbl_df(data.frame(a=0:1, b=0:1, bb=10:11))
> inner_join(DT1, DT2, by=c("a", "b"))
# A tibble: 1 × 3
a b bb
<int> <int> <int>
1 1 1 11
> left_join(DT1, DT2, by=c("a", "b"))
# A tibble: 6 × 3
a b bb
<int> <int> <int>
1 1 1 11
2 1 2 NA
3 1 3 NA
4 2 4 NA
5 2 5 NA
6 2 6 NA
sort语法
sort(x, decreasing = FALSE, na.last = NA, ...)
示例
> sort(BOD$demand, decreasing = TRUE) # 逆序 [1] 19.8 19.0 16.0 15.6 10.3 8.3
只能用于向量或因子,不能对数据框排序
order语法
order(..., na.last = TRUE, decreasing = FALSE, method = c("shell", "radix"))示例
> order(BOD$demand, decreasing = TRUE) # 逆序 [1] 6 3 4 5 2 1 > BOD[order(BOD$demand, dec=TRUE), ] Time demand 6 7 19.8 3 3 19.0 4 4 16.0 5 5 15.6 2 2 10.3 1 1 8.3
data.table::setordersetorder(x, ..., na.last=FALSE) setorderv(x, cols, order=1L, na.last=FALSE)
> DT <- data.table( + A=sample(3, 4, TRUE), + B=sample(letters[1:3], 4, TRUE), + C=sample(4)) > DT A B C 1: 2 b 1 2: 2 a 4 3: 3 b 3 4: 1 c 2
> setorder(DT, A, -B) > DT A B C 1: 1 c 2 2: 2 b 1 3: 2 a 4 4: 3 b 3
setorderv(DT, c("A", "B"), c(1, -1))增加一行,直接录值
> BOD[nrow(BOD)+1, ] <- c(8, 11) > BOD Time demand 1 1 8.3 2 2 10.3 3 3 19.0 4 4 16.0 5 5 15.6 6 7 19.8 7 8 11.0
搭个新数据框,rbind进旧数据框
切片取
> BOD <- BOD[1:6, ]
切片弃:
> BOD <- BOD[-7, ] > BOD Time demand 1 1 8.3 2 2 10.3 3 3 19.0 4 4 16.0 5 5 15.6 6 7 19.8
data.table::rbindlist合并list(Df1, Df.new, Df2)> newBOD <- data.frame(Time=6, demand=15) > BOD <- data.table::rbindlist(list( + BOD[1:5, ], newBOD, BOD[6, ]), use.names=TRUE) > BOD Time demand 1: 1 8.3 2: 2 10.3 3: 3 19.0 4: 4 16.0 5: 5 15.6 6: 6 15.0 7: 7 19.8
> index <- c(1, 2, 5, 4, 3, 6) > BOD <- BOD[index, ] > BOD Time demand 1: 1 8.3 2: 2 10.3 3: 5 15.6 4: 4 16.0 5: 3 19.0 6: 7 19.8
直接录值
> BOD$newCol <- TRUE > BOD Time demand newCol 1: 1 8.3 TRUE 2: 2 10.3 TRUE 3: 3 19.0 TRUE 4: 4 16.0 TRUE 5: 5 15.6 TRUE 6: 7 19.8 TRUE
搭个新数据框,merge或cbind进旧数据框
切片取:
> BOD <- BOD[, c("Time", "demand")]切片弃:
BOD <- BOD[, -3]
设为NULL:
BOD$newCol <- NULL
do.call("cbind")合并Df1, 插入的数据框, Df2> newDf <- data.frame(newCol=TRUE)
> do.call('cbind', list(BOD[,1], newDf, BOD[,2]))
Time newCol demand
1: 1 TRUE 8.3
2: 2 TRUE 10.3
3: 3 TRUE 19.0
4: 4 TRUE 16.0
5: 5 TRUE 15.6
6: 7 TRUE 19.8
> newCol <- c("demand", "Time")
> BOD <- BOD[, newCol]
> BOD
demand Time
1: 8.3 1
2: 10.3 2
3: 19.0 3
4: 16.0 4
5: 15.6 5
6: 19.8 7
Thank you!