Reduce glmer model size

I'm new to R, and I use glmer to fit multiple binomial models, and I only need them to call predictto use the obtained probabilities. However, I have a very large dataset, and even the size of one model becomes extremely large:

> library(pryr)
> object_size(mod)
701 MB

The size of the model coefficients pales in comparison:

> object_size(coef(mod))
1.16 MB

Like the size of the set values:

> object_size(fitted(mod))
25.6 MB

First of all, I do not understand why the size of the model object is so large. It looks like it contains the original data frame used to fit the model, but even this does not take size into account. Why is it so huge?

-, , ? , ? , glm http://blog.yhathq.com/posts/reducing-your-r-memory-footprint-by-7000x.html, , - .

.

Edit:

:

> object_size(getME(mod, "X"))
205 MB
> object_size(getME(mod, "Z"))
36.9 MB
> object_size(getME(mod, "Zt"))
38.4 MB
> object_size(getME(mod, "Ztlist"))
41.6 MB
> object_size(getME(mod, "mmList"))
38.4 MB
> object_size(getME(mod, "y"))
3.2 MB
> object_size(getME(mod, "mu"))
3.2 MB
> object_size(getME(mod, "u"))
18.4 kB
> object_size(getME(mod, "b"))
19.5 kB
> object_size(getME(mod, "Gp"))
56 B
> object_size(getME(mod, "Tp"))
472 B
> object_size(getME(mod, "L"))
15.5 MB
> object_size(getME(mod, "Lambda"))
38.1 kB
> object_size(getME(mod, "Lambdat"))
38.1 kB
> object_size(getME(mod, "Lind"))
9.22 kB
> object_size(getME(mod, "Tlist"))
936 B
> object_size(getME(mod, "A"))
38.4 MB
> object_size(getME(mod, "RX"))
30.3 kB
> object_size(getME(mod, "RZX"))
1.05 MB
> object_size(getME(mod, "sigma"))
48 B
> object_size(getME(mod, "flist"))
4.89 MB
> object_size(getME(mod, "fixef"))
4.5 kB
> object_size(getME(mod, "beta"))
496 B
> object_size(getME(mod, "theta"))
472 B
> object_size(getME(mod, "ST"))
936 B
> object_size(getME(mod, "REML"))
48 B
> object_size(getME(mod, "is_REML"))
48 B
> object_size(getME(mod, "n_rtrms"))
48 B
> object_size(getME(mod, "n_rfacs"))
48 B
> object_size(getME(mod, "N"))
256 B
> object_size(getME(mod, "n"))
256 B
> object_size(getME(mod, "p"))
256 B
> object_size(getME(mod, "q"))
256 B
> object_size(getME(mod, "p_i"))
408 B
> object_size(getME(mod, "l_i"))
408 B
> object_size(getME(mod, "q_i"))
408 B
> object_size(getME(mod, "mod"))
48 B
> object_size(getME(mod, "m_i"))
424 B
> object_size(getME(mod, "m"))
48 B
> object_size(getME(mod, "cnms"))
624 B
> object_size(getME(mod, "devcomp"))
2.21 kB
> object_size(getME(mod, "offset"))
3.2 MB

> get_obj_size(mod@resp, "RC")
                       [,1]
family            673355488
initialize        673355488
initialize#lmResp 673355488
ptr               673355488
resDev            673355488
updateMu          673355488
updateWts         673355488
wrss              673355488
eta                 3196024
mu                  3196024
n                   3196024
offset              3196024
sqrtrwt             3196024
sqrtXwt             3196024
weights             3196024
wtres               3196024
y                   3196024
Ptr                      40
> get_obj_size(mod@pp, "RC")
                   [,1]
beta          449419408
initialize    449419408
initializePtr 449419408
ldL2          449419408
ldRX2         449419408
linPred       449419408
ptr           449419408
setTheta      449419408
sqrL          449419408
u             449419408
X             204549128
V             182171288
Ut             38448168
Zt             38448168
LamtUt         38353248
Xwts            3196024
RZX             1047176
Lambdat           38136
VtV               26192
delu              18408
u0                18408
Utr               18408
Lind               9224
beta0               496
delb                496
Vtr                 496
theta                72
Ptr                  40
+4
2

:

library("lme4")
gm1 <- glmer(cbind(incidence, size - incidence) ~ period + (1 | herd),
               data = cbpp, family = binomial)
library("pryr") 
object_size(gm1)  ## 505 kB

Steve Walker S3/S4/Reference class :

get_obj_size <- function(obj,type="S4") {
    fields <- switch(type,
                     S4=slotNames(obj),
                     RC=ls(obj))
    get_field <- switch(type,
                     S4=function(x) slot(obj,x),
                     RC=function(x) obj[[x]])
    field_list <- setNames(lapply(fields,get_field),fields)
    cbind(sort(sapply(field_list,object_size),decreasing=TRUE))
}
get_obj_size(gm1)
##           [,1]
## resp    356620  ## 'response module'
## pp      355420  ## 'predictor module'
## frame     6640
## optinfo   1748
## devcomp   1424
## call      1244
## flist     1232
## cnms       224
## u          152
## beta        56
## Gp          32
## lower       32
## theta       32

, , / , /, .

, , , , , ...

get_obj_size(gm1@resp,"RC")
##                     [,1]
## initialize        356620
## initialize#lmResp 356620
## ptr               356620
## resDev            356620
## setOffset         356620
## updateMu          356620
## updateWts         356620
## wrss              356620
## family             26016
## eta                  472
## mu                   472
## n                    472
## offset               472
## sqrtrwt              472
## sqrtXwt              472
## weights              472
## wtres                472
## y                    472
## Ptr                   20

, , - object_size(getME(model,component)) , eval(formals(getME)$name); , , , (, ).

, , , , / (note), lme4 Github: predict, ).

glmer_chop <- function(object) {
    newobj <- object
    newobj@frame <- model.frame(object)[0,]
    newobj@pp <- with(object@pp,
                   new("merPredD",
                       Lambdat=Lambdat,
                       Lind=Lind,
                       theta=theta,
                       u=u,u0=u0,
                       n=nrow(X),
                       X=matrix(1,nrow=nrow(X)),
                       Zt=Zt)) ## .sparseDiagonal(n,shape="g")))
    newobj@resp <- new("glmResp",family=binomial(),y=numeric(0))
    return(newobj)
}
get_obj_size(environment(fm2@pp$initialize),"RC")
fm1 <- glmer(use ~ urban+age+livch+(1|district), Contraception, binomial)
object_size(Contraception)  ## 133 kB
object_size(fm1)  ## 1.05 MB
object_size(fm2 <- glmer_chop(fm1))  ## 699 kB
get_obj_size(fm2)          ## 'pp' is 547200 bytes
get_obj_size(fm2@pp,"RC")  ## 'initialize' object is 547200
saveRDS(fm2,file="tmp.rds")
fm2 <- readRDS("tmp.rds")
object_size(fm2)  ## 796 kB
rm(fm1)
pp <- predict(fm2,newdata=Contraception)
object_size(fm2) ## still 796K; no sharing

, compare_size(fm2) , , ( , compare_size/object.size ...)

+3

? , , , . - :

predictions <- predict(glmer(y ~ x, family = binomial), type = "response")
0
source

All Articles