2017-08-02 20:41:47
apply
家族函数Reduce
、Filter
等也都是泛函把传统的循环结构
for (x in xs) fun(x)
转化为更紧凑的形式
lapply(xs, fun)
moveN <- function(x, n, fun=mean, ...){ stopifnot(length(x)>n) o <- sapply(n:(length(x)), function(i) fun(x[(i-n+1):i]), ...) c(rep(NA, n-1), o) } library(xts); data(sample_matrix) mvAvg <- sapply( c(5, 10, 30, 60), function(n) moveN(sample_matrix[,4], n, mean)) dimnames(mvAvg) <- list( dimnames(sample_matrix)[[1]], c("d5", "d10", "d30", "d60"))
plot(as.xts(sample_matrix[,'Close'])) for (i in 1:4) print( lines(as.xts(mvAvg[,i]), col=i+1)) legend('bottomleft', legend=colnames(mvAvg), text.col=2:5)
apply
家族函数进行并行计算扩充diamonds数据集,然后建模
library(ggplot2) diamonds1 <- as.data.frame(do.call("rbind", rep(list(diamonds), 100))) ## "349.8 Mb" df.lst <- split(diamonds1, diamonds1$clarity) models <- lapply(df.lst, function(df) lm(price~carat, df))
(结果略。)
按"p"查看传统方法和并行方法的耗时对比
lapply
的并行版mclapply
(Linux/OSX) 或parLapply
(Windows) 优化library(parallel) cl <- makeCluster(detectCores()) # 创建集群 models <- parLapply(cl, df.lst, function(df) lm(price~carat, df)) sapply(models, function(m) coef(m)[[2]])
I1 SI2 SI1 VS2 4209.793 7870.255 8090.348 8626.157 VS1 VVS2 VVS1 IF 9000.726 10077.188 10469.250 11625.441
stopCluster(cl) # 最后记得关闭集群
plyr
包in\out | list | data frame | array |
---|---|---|---|
list | lapply) |
sapply() |
|
data frame | by() |
||
array | apply() |
in\out | list | data frame | array |
---|---|---|---|
list | llply() |
ldply() |
laply() |
data frame | dlply() |
ddply() |
daply() |
array | alply() |
adply() |
aaply() |
integrate()
: f()函数的曲线下面积uniroot()
: f()何时取值为零optimise()
: f()取最大/小值的位置integrate(dnorm, -1.96, 1.96)
## 0.9500042 with absolute error < 1e-11
uniroot(function(x) cos(x) - x, lower = -pi, upper = pi, tol = 1e-9)$root
## [1] 0.7390851
unlist(optimise(cos, c(0, 2 * pi)))
## minimum objective ## 3.141593 -1.000000
unlist(optimise(sin, c(0, 2 * pi), maximum=TRUE))
## maximum objective ## 1.570794 1.000000
(按"p"看代码)
Hadley Wickham的一个例子: 基于核心工作函数,通过泛函繁衍出整个家族
rm_na <- function(x, y, identity) { if (is.na(x) && is.na(y)) { identity } else if (is.na(x)) { y } else { x } }
add <- function(x, y, na.rm = FALSE) { if (na.rm && (is.na(x) || is.na(y))) rm_na(x, y, 0) else x + y }
r_add <- function(xs, na.rm = TRUE) { Reduce(function(x, y) add(x, y, na.rm = na.rm), xs, init = 0) } # r_add 等价于sum()
c_add <- function(xs, na.rm = FALSE) { Reduce(function(x, y) add(x, y, na.rm = na.rm), xs, accumulate = TRUE) } # c_add 等价于cumsum()
row_sum <- function(x, na.rm = FALSE) { apply(x, 1, r_add, na.rm = na.rm) } # row_sum等价于rowSums()
col_sum <- function(x, na.rm = FALSE) { apply(x, 2, r_add, na.rm = na.rm) } # col_sum等价于colSums()
Negate()
, failwith()
capture_it()
, time_it()
partial
ignore <- function(...) NULL # 构造行为FO tee tee <- function(fun, on_input = ignore, on_output = ignore) { function(...) { on_input(...) output <- fun(...) on_output(output) output } }
# 求解函数g的单位根 g <- function(x) cos(x) - x zero <- uniroot(g, c(-5, 5)) show_x <- function(x, ...) cat(sprintf("%+.08f", x), "\n")
zero$root ## 0.739
# 函数求解的位置 zero <- uniroot(tee( g, on_input = show_x), c(-5, 5))
## -5.00000000 +5.00000000 +0.28366219 ## +0.87520341 +0.72298040 +0.73863091 ## +0.73908529 +0.73902425 +0.73908529
# 函数的值 zero <- uniroot(tee( g, on_output = show_x), c(-5, 5))
## +5.28366219 -4.71633781 +0.67637474 ## -0.23436269 +0.02685676 +0.00076012 ## -0.00000026 +0.00010189 -0.00000026
(按p看notes)
delay_by <- function(sec.delay, f) { function(...) { Sys.sleep(sec.delay) f(...) } } delayed_sample <- delay_by(1, sample) sapply(1:2, function(size) delayed_sample(1:10, size))
[[1]] [1] 5 [[2]] [1] 2 4
system.time(sapply(1:2, function(size) delayed_sample(1:10, size)))
user system elapsed 0.00 0.03 2.14
lst <- list(a=1:2, b=list(c(3, 4)), c=NULL,NULL)
lst[!is.null(lst)]
清除NULLrmNulls <- function(x) Filter(Negate(is.null), x) rmNulls(lst)
$a [1] 1 2 $b $b[[1]] [1] 3 4
使用泛函时,一处出错,则整体退出
sapply(list(1, pi, 'a'), log)
Error in FUN(X[[i]], ...) : non-numeric argument to mathematical function
用failwith
可以增加容错性
sapply(list(1, pi, 'a'), plyr::failwith( NA, log))
Error in f(...) : non-numeric argument to mathematical function [1] 0.00000 1.14473 NA
capture_it <- function(f) { function(...) capture.output(f(...)) }
str_it <- capture_it(str) str_it(Titanic)[3]
[1] " table [1:4, 1:2, 1:2, 1:2] 0 0 35 0 0 0 17 0 118 154 ..." [2] " - attr(*, \"dimnames\")=List of 4" [3] " ..$ Class : chr [1:4] \"1st\" \"2nd\" \"3rd\" \"Crew\"" [4] " ..$ Sex : chr [1:2] \"Male\" \"Female\"" [5] " ..$ Age : chr [1:2] \"Child\" \"Adult\"" [6] " ..$ Survived: chr [1:2] \"No\" \"Yes\""
time_it <- function(f) { function(...) system.time(f(...)) }
compute_mean <- list( base = function(x) mean(x), sum = function(x) sum(x) / length(x) ) x <- runif(1e6) call_fun <- function(f, ...) f(...) lapply(compute_mean, time_it(call_fun), x)
## $base ## user system elapsed ## 0.000 0.000 0.002 ## ## $sum ## user system elapsed ## 0.000 0.000 0.001
pryr::partial
rmNulls <- partial(Filter, Negate(is.null)) mean2 <- partial(mean, na.rm=TRUE)
splat <- function(f) { function(args) do.call(f, args) }
x <- c(NA, runif(100), 1000) args <- list( list(x), list(x, na.rm = TRUE), list(x, na.rm = TRUE, trim = 0.1) ) sapply(args, splat(mean))
[1] NA 10.4307917 0.5433686
pryr::compose
或pryr::%.%
library(pryr) sapply(mtcars, compose(length, unique))
mpg cyl disp hp drat wt qsec vs am gear carb 25 3 27 22 22 29 30 2 2 3 6
square <- function(x) x^2 deviation <- function(x) x - mean(x) sd2 <- sqrt %.% mean %.% square %.% deviation sd2(1:10) # 同 1:10 %>% deviation %>% square %>% mean %>% sqrt
## [1] 2.872281
Thank you!