2017-01-15 13:58:52
> system.time(1:999999 + 1) # 向量化计算
user system elapsed
0.00 0.02 0.01
> system.time(sapply(1:999999, function(i) i + 1)) # 隐式循环
user system elapsed
0.87 0.01 0.89
> system.time({v <- vector(length=999999)
+ for (i in 1:999999) v[i] <- i + 1}) # 显式循环
user system elapsed
1.08 0.00 1.08 apply家族成员apply函数族是R菜鸟和入门的分野
lapply语法 (list-apply)
lapply(X, FUN, ...) # 返回一个list
示例
> lst <- split(mtcars$mpg, mtcars$gear) # 分拆成gear 3|4|5 三个list > lapply(lst, mean) # 对这三个list,并行使用函数mean $`3` [1] 16.10667 $`4` [1] 24.53333 $`5` [1] 21.38
sapply: lapply的扩展版语法 (simplified-apply)
sapply(X, FUN, ..., simplify = TRUE, USE.NAMES = TRUE) # 返回列表或向量
sapply是lapply的扩展版。simplify和USE.NAMES都为FALSE时,sapply与lapply等价。示例
> lst <- split(mtcars$mpg, mtcars$gear)
> sapply(lst, mean)
3 4 5
16.10667 24.53333 21.38000 mapply: lapply的多参数版语法 (multiple-apply)
mapply(FUN, ..., MoreArgs = NULL, SIMPLIFY = TRUE, USE.NAMES = TRUE)
示例
> mapply(rep, times = 1:3, MoreArgs = list(x = 42)) [[1]] [1] 42 [[2]] [1] 42 42 [[3]] [1] 42 42 42 > mapply(round, digits=0:5, list(x=pi)) [1] 3.00000 3.10000 3.14000 3.14200 3.14160 3.14159
vapply: sapply的可定制返回值版语法 (value-apply)
vapply(X, FUN, FUN.VALUE, ..., USE.NAMES = TRUE)
示例
> vapply(lst, fivenum, c(
+ Min.=0, "1st Qu."=0, Median=0, "3rd Qu."=0, Max.=0))
3 4 5
Min. 10.4 17.80 15.0
1st Qu. 14.5 21.00 15.8
Median 15.5 22.80 19.7
3rd Qu. 18.4 28.85 26.0
Max. 21.5 33.90 30.4也可以sapply后再row.names()赋值
rapply: lapply的递归版语法 (recursive-apply)
rapply(object, f, classes = "ANY", deflt = NULL,
how = c("unlist", "replace", "list"), ...)
示例
> X <- list(list(a = pi, b = list(c = 1:1)), d = "a test") > rapply(X, sqrt, classes = "numeric", how = "replace") [[1]] [[1]]$a [1] 1.772454 [[1]]$b [[1]]$b$c [1] 1 $d [1] "a test"
for, while, …)都可以用隐式循环lapply或sapply替代语法
apply(X, MARGIN, FUN, ...)
示例
> apply(mtcars, 2, mean)
mpg cyl disp hp drat wt qsec
20.090625 6.187500 230.721875 146.687500 3.596563 3.217250 17.848750
vs am gear carb
0.437500 0.406250 3.687500 2.812500
> apply(mtcars, 1, mean)
Mazda RX4 Mazda RX4 Wag Datsun 710 Hornet 4 Drive
29.90727 29.98136 23.59818 38.73955
Hornet Sportabout Valiant Duster 360 Merc 240D
53.66455 35.04909 59.72000 24.63455
...> apply(Titanic, 3, sum) # 第三维是Age
Child Adult
109 2092
> apply(Titanic, 4, sum) # 第四维是Survived
No Yes
1490 711
> apply(Titanic, c(1, 3), sum)
Age
Class Child Adult
1st 6 319
2nd 24 261
3rd 79 627
Crew 0 885
tapply函数语法 (table-apply)
tapply(X, INDEX, FUN = NULL, ..., simplify = TRUE)
示例
> str(warpbreaks)
'data.frame': 54 obs. of 3 variables:
$ breaks : num 26 30 54 25 70 52 51 26 67 18 ...
$ wool : Factor w/ 2 levels "A","B": 1 1 1 1 1 1 1 1 1 1 ...
$ tension: Factor w/ 3 levels "L","M","H": 1 1 1 1 1 1 1 1 1 2 ...
> tapply(warpbreaks$breaks, warpbreaks[,-1], sum)
tension
wool L M H
A 401 216 221
B 254 259 169by函数语法
by(data, INDICES, FUN, ..., simplify = TRUE)
示例
> by(warpbreaks[, c(1,2)], warpbreaks[,"wool"], summary) ## 获得两个水平的小结列表
warpbreaks[, "wool"]: A
breaks wool
Min. :10.00 A:27
1st Qu.:19.50 B: 0
Median :26.00
Mean :31.04
3rd Qu.:36.00
Max. :70.00
----------------------------------------------------------------------------------------------------------
warpbreaks[, "wool"]: B
breaks wool
Min. :13.00 A: 0
1st Qu.:18.00 B:27
Median :24.00
Mean :25.26
3rd Qu.:29.00
Max. :44.00
aggregate: 一步完成分组、运算、整合stats::aggregateaggregate(x, ...)aggregate(x, by, FUN, ..., simplify = TRUE, drop = TRUE)
aggregate(formula, data, FUN, ..., subset, na.action = na.omit)
aggregate(x, nfrequency = 1, FUN = sum, ndeltat = 1,
ts.eps = getOption("ts.eps"), ...)aggregate.data.frame方法by 必须是列表FUN函数的补充参数可直接列在aggregate表达式里(…传入)> aggregate(state.x77, list(Region = state.region), mean)
Region Population Income Illiteracy Life Exp Murder HS Grad Frost Area
1 Northeast 5495.111 4570.222 1.000000 71.26444 4.722222 53.96667 132.7778 18141.00
2 South 4208.125 4011.938 1.737500 69.70625 10.581250 44.34375 64.6250 54605.12
3 North Central 4803.000 4611.083 0.700000 71.76667 5.275000 54.51667 138.8333 62652.00
4 West 2915.308 4702.615 1.023077 71.23462 7.215385 62.00000 102.1538 134463.00
> aggregate(airquality[, 1:4], list(Month=airquality$Month), mean, na.rm=TRUE)
Month Ozone Solar.R Wind Temp
1 5 23.61538 181.2963 11.622581 65.54839
2 6 29.44444 190.1667 10.266667 79.10000
3 7 59.11538 216.4839 8.941935 83.90323
4 8 59.96154 171.8571 8.793548 83.96774
5 9 31.44828 167.4333 10.180000 76.90000
aggregate.formula方法formula: ~连接的表达式,可以是一/多对一/多subset: 子集向量> aggregate(.~Month, data=airquality[, -6], mean, na.rm=TRUE) Month Ozone Solar.R Wind Temp 1 5 24.12500 182.0417 11.504167 66.45833 2 6 29.44444 184.2222 12.177778 78.22222 3 7 59.11538 216.4231 8.523077 83.88462 4 8 60.00000 173.0870 8.860870 83.69565 5 9 31.44828 168.2069 10.075862 76.89655 > aggregate(cbind(Ozone, Temp) ~ Month, data = airquality, mean) Month Ozone Temp 1 5 23.61538 66.73077 2 6 29.44444 78.22222 3 7 59.11538 83.88462 4 8 59.96154 83.96154 5 9 31.44828 76.89655
eapply函数语法 (environment-apply)
eapply(env, FUN, ..., all.names = FALSE, USE.NAMES = TRUE)
示例
> env <- new.env(hash = FALSE)
> env$a <- 1:10
> env$beta <- exp(-3:3)
> utils::ls.str(env)
a : int [1:10] 1 2 3 4 5 6 7 8 9 10
beta : num [1:7] 0.0498 0.1353 0.3679 1 2.7183 ...
> unlist(eapply(env, mean))
beta a
4.535125 5.500000
Thank you!