2017-08-02 20:41:47
apply家族函数Reduce、Filter等也都是泛函
把传统的循环结构
for (x in xs) fun(x)
转化为更紧凑的形式
lapply(xs, fun)

moveN <- function(x, n, fun=mean, ...){
stopifnot(length(x)>n)
o <- sapply(n:(length(x)),
function(i) fun(x[(i-n+1):i]), ...)
c(rep(NA, n-1), o)
}
library(xts); data(sample_matrix)
mvAvg <- sapply(
c(5, 10, 30, 60), function(n)
moveN(sample_matrix[,4], n, mean))
dimnames(mvAvg) <- list(
dimnames(sample_matrix)[[1]],
c("d5", "d10", "d30", "d60"))plot(as.xts(sample_matrix[,'Close']))
for (i in 1:4) print(
lines(as.xts(mvAvg[,i]), col=i+1))
legend('bottomleft',
legend=colnames(mvAvg),
text.col=2:5)

apply家族函数进行并行计算扩充diamonds数据集,然后建模
library(ggplot2)
diamonds1 <- as.data.frame(do.call("rbind",
rep(list(diamonds), 100))) ## "349.8 Mb"
df.lst <- split(diamonds1, diamonds1$clarity)
models <- lapply(df.lst, function(df)
lm(price~carat, df))
(结果略。)
按"p"查看传统方法和并行方法的耗时对比
lapply的并行版mclapply (Linux/OSX) 或parLapply (Windows) 优化library(parallel)
cl <- makeCluster(detectCores()) # 创建集群
models <- parLapply(cl, df.lst, function(df)
lm(price~carat, df))
sapply(models, function(m) coef(m)[[2]])
I1 SI2 SI1 VS2
4209.793 7870.255 8090.348 8626.157
VS1 VVS2 VVS1 IF
9000.726 10077.188 10469.250 11625.441
stopCluster(cl) # 最后记得关闭集群
plyr包| in\out | list | data frame | array |
|---|---|---|---|
| list | lapply) |
sapply() |
|
| data frame | by() |
||
| array | apply() |
| in\out | list | data frame | array |
|---|---|---|---|
| list | llply() |
ldply() |
laply() |
| data frame | dlply() |
ddply() |
daply() |
| array | alply() |
adply() |
aaply() |
integrate(): f()函数的曲线下面积uniroot(): f()何时取值为零optimise(): f()取最大/小值的位置integrate(dnorm, -1.96, 1.96)
## 0.9500042 with absolute error < 1e-11
uniroot(function(x) cos(x) - x, lower = -pi,
upper = pi, tol = 1e-9)$root
## [1] 0.7390851
unlist(optimise(cos, c(0, 2 * pi)))
## minimum objective ## 3.141593 -1.000000
unlist(optimise(sin, c(0, 2 * pi),
maximum=TRUE))
## maximum objective ## 1.570794 1.000000
(按"p"看代码)




Hadley Wickham的一个例子: 基于核心工作函数,通过泛函繁衍出整个家族
rm_na <- function(x, y, identity) {
if (is.na(x) && is.na(y)) {
identity
} else if (is.na(x)) {
y
} else {
x
}
}
add <- function(x, y, na.rm = FALSE) {
if (na.rm && (is.na(x) || is.na(y)))
rm_na(x, y, 0)
else x + y
}r_add <- function(xs, na.rm = TRUE) {
Reduce(function(x, y)
add(x, y, na.rm = na.rm), xs, init = 0)
} # r_add 等价于sum()
c_add <- function(xs, na.rm = FALSE) {
Reduce(function(x, y)
add(x, y, na.rm = na.rm), xs,
accumulate = TRUE)
} # c_add 等价于cumsum()
row_sum <- function(x, na.rm = FALSE) {
apply(x, 1, r_add, na.rm = na.rm)
} # row_sum等价于rowSums()
col_sum <- function(x, na.rm = FALSE) {
apply(x, 2, r_add, na.rm = na.rm)
} # col_sum等价于colSums()Negate(), failwith()capture_it(), time_it()partialignore <- function(...) NULL
# 构造行为FO tee
tee <- function(fun, on_input = ignore,
on_output = ignore) {
function(...) {
on_input(...)
output <- fun(...)
on_output(output)
output
}
}
# 求解函数g的单位根
g <- function(x) cos(x) - x
zero <- uniroot(g, c(-5, 5))
show_x <- function(x, ...)
cat(sprintf("%+.08f", x), "\n")
zero$root ## 0.739
# 函数求解的位置
zero <- uniroot(tee(
g, on_input = show_x), c(-5, 5))
## -5.00000000 +5.00000000 +0.28366219 ## +0.87520341 +0.72298040 +0.73863091 ## +0.73908529 +0.73902425 +0.73908529
# 函数的值
zero <- uniroot(tee(
g, on_output = show_x), c(-5, 5))
## +5.28366219 -4.71633781 +0.67637474 ## -0.23436269 +0.02685676 +0.00076012 ## -0.00000026 +0.00010189 -0.00000026
(按p看notes)
delay_by <- function(sec.delay, f) {
function(...) {
Sys.sleep(sec.delay)
f(...)
}
}
delayed_sample <- delay_by(1, sample)
sapply(1:2, function(size) delayed_sample(1:10, size))
[[1]] [1] 5 [[2]] [1] 2 4
system.time(sapply(1:2, function(size) delayed_sample(1:10, size)))
user system elapsed 0.00 0.03 2.14
lst <- list(a=1:2, b=list(c(3, 4)),
c=NULL,NULL)
lst[!is.null(lst)]清除NULLrmNulls <- function(x)
Filter(Negate(is.null), x)
rmNulls(lst)
$a [1] 1 2 $b $b[[1]] [1] 3 4
使用泛函时,一处出错,则整体退出
sapply(list(1, pi, 'a'), log)
Error in FUN(X[[i]], ...) : non-numeric argument to mathematical function
用failwith可以增加容错性
sapply(list(1, pi, 'a'), plyr::failwith(
NA, log))
Error in f(...) : non-numeric argument to mathematical function [1] 0.00000 1.14473 NA
capture_it <- function(f) {
function(...)
capture.output(f(...))
}
str_it <- capture_it(str) str_it(Titanic)[3]
[1] " table [1:4, 1:2, 1:2, 1:2]
0 0 35 0 0 0 17 0 118 154 ..."
[2] " - attr(*, \"dimnames\")=List of 4"
[3] " ..$ Class :
chr [1:4] \"1st\" \"2nd\" \"3rd\" \"Crew\""
[4] " ..$ Sex :
chr [1:2] \"Male\" \"Female\""
[5] " ..$ Age :
chr [1:2] \"Child\" \"Adult\""
[6] " ..$ Survived:
chr [1:2] \"No\" \"Yes\""time_it <- function(f) {
function(...) system.time(f(...))
}
compute_mean <- list( base = function(x) mean(x), sum = function(x) sum(x) / length(x) ) x <- runif(1e6) call_fun <- function(f, ...) f(...) lapply(compute_mean, time_it(call_fun), x)
## $base ## user system elapsed ## 0.000 0.000 0.002 ## ## $sum ## user system elapsed ## 0.000 0.000 0.001
pryr::partialrmNulls <- partial(Filter, Negate(is.null)) mean2 <- partial(mean, na.rm=TRUE)
splat <- function(f) {
function(args) do.call(f, args)
}
x <- c(NA, runif(100), 1000) args <- list( list(x), list(x, na.rm = TRUE), list(x, na.rm = TRUE, trim = 0.1) ) sapply(args, splat(mean))
[1] NA 10.4307917 0.5433686
pryr::compose或pryr::%.%library(pryr) sapply(mtcars, compose(length, unique))
mpg cyl disp hp drat wt qsec vs am gear carb 25 3 27 22 22 29 30 2 2 3 6
square <- function(x) x^2 deviation <- function(x) x - mean(x) sd2 <- sqrt %.% mean %.% square %.% deviation sd2(1:10) # 同 1:10 %>% deviation %>% square %>% mean %>% sqrt
## [1] 2.872281
Thank you!