How to analyze Julia's memory allocation and code coverage results

Question

How to analyze Julia's memory allocation and code coverage results

I am writing a package for Bayesian output using Gibbs fetch. Since these methods are usually expensive calculations, I am very concerned about the operation of my code. In fact, speed was the reason I switched from Python to Julia.

After implementing the Dirichlet process model, I analyzed the code using Coverage.jl and the command line --track-allocation=user option.

Here are the coverage results

  - #= - DPM - - Dirichlet Process Mixture Models - - 25/08/2015 - Adham Beyki, odinay@gmail.com - - =# - - type DPM{T} - bayesian_component::T - K::Int64 - aa::Float64 - a1::Float64 - a2::Float64 - K_hist::Vector{Int64} - K_zz_dict::Dict{Int64, Vector{Int64}} - - DPM{T}(c::T, K::Int64, aa::Float64, a1::Float64, a2::Float64) = new(c, K, aa, a1, a2, - Int64[], (Int64 => Vector{Int64})[]) - end 1 DPM{T}(c::T, K::Int64, aa::Real, a1::Real, a2::Real) = DPM{typeof(c)}(c, K, convert(Float64, aa), - convert(Float64, a1), convert(Float64, a2)) - - function Base.show(io::IO, dpm::DPM) - println(io, "Dirichlet Mixture Model with $(dpm.K) $(typeof(dpm.bayesian_component)) components") - end - - function initialize_gibbs_sampler!(dpm::DPM, zz::Vector{Int64}) - # populates the cluster labels randomly 1 zz[:] = rand(1:dpm.K, length(zz)) - end - - function DPM_sample_hyperparam(aa::Float64, a1::Float64, a2::Float64, K::Int64, NN::Int64, iters::Int64) - - # resampling concentration parameter based on Escobar and West 1995 352 for n = 1:iters 3504 eta = rand(Distributions.Beta(aa+1, NN)) 3504 rr = (a1+K-1) / (NN*(a2-log(NN))) 3504 pi_eta = rr / (1+rr) - 3504 if rand() < pi_eta 0 aa = rand(Distributions.Gamma(a1+K, 1/(a2-log(eta)))) - else 3504 aa = rand(Distributions.Gamma(a1+K-1, 1/(a2-log(eta)))) - end - end 352 aa - end - - function DPM_sample_pp{T1, T2}( - bayesian_components::Vector{T1}, - xx::T2, - nn::Vector{Float64}, - pp::Vector{Float64}, - aa::Float64) - 1760000 K = length(nn) 1760000 @inbounds for kk = 1:K 11384379 pp[kk] = log(nn[kk]) + logpredictive(bayesian_components[kk], xx) - end 1760000 pp[K+1] = log(aa) + logpredictive(bayesian_components[K+1], xx) 1760000 normalize_pp!(pp, K+1) 1760000 return sample(pp[1:K+1]) - end - - - function collapsed_gibbs_sampler!{T1, T2}( - dpm::DPM{T1}, - xx::Vector{T2}, - zz::Vector{Int64}, - n_burnins::Int64, n_lags::Int64, n_samples::Int64, n_internals::Int64; max_clusters::Int64=100) - - 2 NN = length(xx) # number of data points 2 nn = zeros(Float64, dpm.K) # count array 2 n_iterations = n_burnins + (n_samples)*(n_lags+1) 2 bayesian_components = [deepcopy(dpm.bayesian_component) for k = 1:dpm.K+1] 2 dpm.K_hist = zeros(Int64, n_iterations) 2 pp = zeros(Float64, max_clusters) - 2 tic() 2 for ii = 1:NN 10000 kk = zz[ii] 10000 additem(bayesian_components[kk], xx[ii]) 10000 nn[kk] += 1 - end 2 dpm.K_hist[1] = dpm.K 2 elapsed_time = toq() - 2 for iteration = 1:n_iterations - 352 println("iteration: $iteration, KK: $(dpm.K), KK mode: $(indmax(hist(dpm.K_hist, - 0.5:maximum(dpm.K_hist)+0.5)[2])), elapsed time: $elapsed_time") - 352 tic() 352 @inbounds for ii = 1:NN 1760000 kk = zz[ii] 1760000 nn[kk] -= 1 1760000 delitem(bayesian_components[kk], xx[ii]) - - # remove the cluster if empty 1760000 if nn[kk] == 0 166 println("\tcomponent $kk has become inactive") 166 splice!(nn, kk) 166 splice!(bayesian_components, kk) 166 dpm.K -= 1 - - # shifting the labels one cluster back 830166 idx = find(x -> x>kk, zz) 166 zz[idx] -= 1 - end - 1760000 kk = DPM_sample_pp(bayesian_components, xx[ii], nn, pp, dpm.aa) - 1760000 if kk == dpm.K+1 171 println("\tcomponent $kk activated.") 171 push!(bayesian_components, deepcopy(dpm.bayesian_component)) 171 push!(nn, 0) 171 dpm.K += 1 - end - 1760000 zz[ii] = kk 1760000 nn[kk] += 1 1760000 additem(bayesian_components[kk], xx[ii]) - end - 352 dpm.aa = DPM_sample_hyperparam(dpm.aa, dpm.a1, dpm.a2, dpm.K, NN, n_internals) 352 dpm.K_hist[iteration] = dpm.K 352 dpm.K_zz_dict[dpm.K] = deepcopy(zz) 352 elapsed_time = toq() - end - end - - function truncated_gibbs_sampler{T1, T2}(dpm::DPM{T1}, xx::Vector{T2}, zz::Vector{Int64}, - n_burnins::Int64, n_lags::Int64, n_samples::Int64, n_internals::Int64, K_truncation::Int64) - - NN = length(xx) # number of data points - nn = zeros(Int64, K_truncation) # count array - bayesian_components = [deepcopy(dpm.bayesian_component) for k = 1:K_truncation] - n_iterations = n_burnins + (n_samples)*(n_lags+1) - dpm.K_hist = zeros(Int64, n_iterations) - states = (ASCIIString => Int64)[] - n_states = 0 - - tic() - for ii = 1:NN - kk = zz[ii] - additem(bayesian_components[kk], xx[ii]) - nn[kk] += 1 - end - dpm.K_hist[1] = dpm.K - - # constructing the sticks - beta_VV = rand(Distributions.Beta(1.0, dpm.aa), K_truncation) - beta_VV[end] = 1.0 - π = ones(Float64, K_truncation) - π[2:end] = 1 - beta_VV[1:K_truncation-1] - π = log(beta_VV) + log(cumprod(π)) - - elapsed_time = toq() - - for iteration = 1:n_iterations - - println("iteration: $iteration, # active components: $(length(findn(nn)[1])), mode: $(indmax(hist(dpm.K_hist, - 0.5:maximum(dpm.K_hist)+0.5)[2])), elapsed time: $elapsed_time \n", nn) - - tic() - for ii = 1:NN - kk = zz[ii] - nn[kk] -= 1 - delitem(bayesian_components[kk], xx[ii]) - - # resampling label - pp = zeros(Float64, K_truncation) - for kk = 1:K_truncation - pp[kk] = π[kk] + logpredictive(bayesian_components[kk], xx[ii]) - end - pp = exp(pp - maximum(pp)) - pp /= sum(pp) - - # sample from pp - kk = sampleindex(pp) - zz[ii] = kk - nn[kk] += 1 - additem(bayesian_components[kk], xx[ii]) - - for kk = 1:K_truncation-1 - gamma1 = 1 + nn[kk] - gamma2 = dpm.aa + sum(nn[kk+1:end]) - beta_VV[kk] = rand(Distributions.Beta(gamma1, gamma2)) - end - beta_VV[end] = 1.0 - π = ones(Float64, K_truncation) - π[2:end] = 1 - beta_VV[1:K_truncation-1] - π = log(beta_VV) + log(cumprod(π)) - - # resampling concentration parameter based on Escobar and West 1995 - for internal_iters = 1:n_internals - eta = rand(Distributions.Beta(dpm.aa+1, NN)) - rr = (dpm.a1+dpm.K-1) / (NN*(dpm.a2-log(NN))) - pi_eta = rr / (1+rr) - - if rand() < pi_eta - dpm.aa = rand(Distributions.Gamma(dpm.a1+dpm.K, 1/(dpm.a2-log(eta)))) - else - dpm.aa = rand(Distributions.Gamma(dpm.a1+dpm.K-1, 1/(dpm.a2-log(eta)))) - end - end - end - - nn_string = nn2string(nn) - if !haskey(states, nn_string) - n_states += 1 - states[nn_string] = n_states - end - dpm.K_hist[iteration] = states[nn_string] - dpm.K_zz_dict[states[nn_string]] = deepcopy(zz) - elapsed_time = toq() - end - return states - end - - - function posterior{T1, T2}(dpm::DPM{T1}, xx::Vector{T2}, K::Int64, K_truncation::Int64=0) 2 n_components = 0 1 if K_truncation == 0 1 n_components = K - else 0 n_components = K_truncation - end - 1 bayesian_components = [deepcopy(dpm.bayesian_component) for kk=1:n_components] 1 zz = dpm.K_zz_dict[K] - 1 NN = length(xx) 1 nn = zeros(Int64, n_components) - 1 for ii = 1:NN 5000 kk = zz[ii] 5000 additem(bayesian_components[kk], xx[ii]) 5000 nn[kk] += 1 - end - 1 return([posterior(bayesian_components[kk]) for kk=1:n_components], nn) - end -

And here is the memory allocation:

  - #= - DPM - - Dirichlet Process Mixture Models - - 25/08/2015 - Adham Beyki, odinay@gmail.com - - =# - - type DPM{T} - bayesian_component::T - K::Int64 - aa::Float64 - a1::Float64 - a2::Float64 - K_hist::Vector{Int64} - K_zz_dict::Dict{Int64, Vector{Int64}} - - DPM{T}(c::T, K::Int64, aa::Float64, a1::Float64, a2::Float64) = new(c, K, aa, a1, a2, - Int64[], (Int64 => Vector{Int64})[]) - end 0 DPM{T}(c::T, K::Int64, aa::Real, a1::Real, a2::Real) = DPM{typeof(c)}(c, K, convert(Float64, aa), - convert(Float64, a1), convert(Float64, a2)) - - function Base.show(io::IO, dpm::DPM) - println(io, "Dirichlet Mixture Model with $(dpm.K) $(typeof(dpm.bayesian_component)) components") - end - - function initialize_gibbs_sampler!(dpm::DPM, zz::Vector{Int64}) - # populates the cluster labels randomly 0 zz[:] = rand(1:dpm.K, length(zz)) - end - - function DPM_sample_hyperparam(aa::Float64, a1::Float64, a2::Float64, K::Int64, NN::Int64, iters::Int64) - - # resampling concentration parameter based on Escobar and West 1995 0 for n = 1:iters 0 eta = rand(Distributions.Beta(aa+1, NN)) 0 rr = (a1+K-1) / (NN*(a2-log(NN))) 0 pi_eta = rr / (1+rr) - 0 if rand() < pi_eta 0 aa = rand(Distributions.Gamma(a1+K, 1/(a2-log(eta)))) - else 0 aa = rand(Distributions.Gamma(a1+K-1, 1/(a2-log(eta)))) - end - end 0 aa - end - - function DPM_sample_pp{T1, T2}( - bayesian_components::Vector{T1}, - xx::T2, - nn::Vector{Float64}, - pp::Vector{Float64}, - aa::Float64) - 0 K = length(nn) 0 @inbounds for kk = 1:K 0 pp[kk] = log(nn[kk]) + logpredictive(bayesian_components[kk], xx) - end 0 pp[K+1] = log(aa) + logpredictive(bayesian_components[K+1], xx) 0 normalize_pp!(pp, K+1) 0 return sample(pp[1:K+1]) - end - - - function collapsed_gibbs_sampler!{T1, T2}( - dpm::DPM{T1}, - xx::Vector{T2}, - zz::Vector{Int64}, - n_burnins::Int64, n_lags::Int64, n_samples::Int64, n_internals::Int64; max_clusters::Int64=100) - - 191688 NN = length(xx) # number of data points 96 nn = zeros(Float64, dpm.K) # count array 0 n_iterations = n_burnins + (n_samples)*(n_lags+1) 384 bayesian_components = [deepcopy(dpm.bayesian_component) for k = 1:dpm.K+1] 2864 dpm.K_hist = zeros(Int64, n_iterations) 176 pp = zeros(Float64, max_clusters) - 48 tic() 0 for ii = 1:NN 0 kk = zz[ii] 0 additem(bayesian_components[kk], xx[ii]) 0 nn[kk] += 1 - end 0 dpm.K_hist[1] = dpm.K 0 elapsed_time = toq() - 0 for iteration = 1:n_iterations - 5329296 println("iteration: $iteration, KK: $(dpm.K), KK mode: $(indmax(hist(dpm.K_hist, - 0.5:maximum(dpm.K_hist)+0.5)[2])), elapsed time: $elapsed_time") - 16800 tic() 28000000 @inbounds for ii = 1:NN 0 kk = zz[ii] 0 nn[kk] -= 1 0 delitem(bayesian_components[kk], xx[ii]) - - # remove the cluster if empty 0 if nn[kk] == 0 161880 println("\tcomponent $kk has become inactive") 0 splice!(nn, kk) 0 splice!(bayesian_components, kk) 0 dpm.K -= 1 - - # shifting the labels one cluster back 69032 idx = find(x -> x>kk, zz) 42944 zz[idx] -= 1 - end - 0 kk = DPM_sample_pp(bayesian_components, xx[ii], nn, pp, dpm.aa) - 0 if kk == dpm.K+1 158976 println("\tcomponent $kk activated.") 14144 push!(bayesian_components, deepcopy(dpm.bayesian_component)) 4872 push!(nn, 0) 0 dpm.K += 1 - end - 0 zz[ii] = kk 0 nn[kk] += 1 0 additem(bayesian_components[kk], xx[ii]) - end - 0 dpm.aa = DPM_sample_hyperparam(dpm.aa, dpm.a1, dpm.a2, dpm.K, NN, n_internals) 0 dpm.K_hist[iteration] = dpm.K 14140000 dpm.K_zz_dict[dpm.K] = deepcopy(zz) 0 elapsed_time = toq() - end - end - - function truncated_gibbs_sampler{T1, T2}(dpm::DPM{T1}, xx::Vector{T2}, zz::Vector{Int64}, - n_burnins::Int64, n_lags::Int64, n_samples::Int64, n_internals::Int64, K_truncation::Int64) - - NN = length(xx) # number of data points - nn = zeros(Int64, K_truncation) # count array - bayesian_components = [deepcopy(dpm.bayesian_component) for k = 1:K_truncation] - n_iterations = n_burnins + (n_samples)*(n_lags+1) - dpm.K_hist = zeros(Int64, n_iterations) - states = (ASCIIString => Int64)[] - n_states = 0 - - tic() - for ii = 1:NN - kk = zz[ii] - additem(bayesian_components[kk], xx[ii]) - nn[kk] += 1 - end - dpm.K_hist[1] = dpm.K - - # constructing the sticks - beta_VV = rand(Distributions.Beta(1.0, dpm.aa), K_truncation) - beta_VV[end] = 1.0 - π = ones(Float64, K_truncation) - π[2:end] = 1 - beta_VV[1:K_truncation-1] - π = log(beta_VV) + log(cumprod(π)) - - elapsed_time = toq() - - for iteration = 1:n_iterations - - println("iteration: $iteration, # active components: $(length(findn(nn)[1])), mode: $(indmax(hist(dpm.K_hist, - 0.5:maximum(dpm.K_hist)+0.5)[2])), elapsed time: $elapsed_time \n", nn) - - tic() - for ii = 1:NN - kk = zz[ii] - nn[kk] -= 1 - delitem(bayesian_components[kk], xx[ii]) - - # resampling label - pp = zeros(Float64, K_truncation) - for kk = 1:K_truncation - pp[kk] = π[kk] + logpredictive(bayesian_components[kk], xx[ii]) - end - pp = exp(pp - maximum(pp)) - pp /= sum(pp) - - # sample from pp - kk = sampleindex(pp) - zz[ii] = kk - nn[kk] += 1 - additem(bayesian_components[kk], xx[ii]) - - for kk = 1:K_truncation-1 - gamma1 = 1 + nn[kk] - gamma2 = dpm.aa + sum(nn[kk+1:end]) - beta_VV[kk] = rand(Distributions.Beta(gamma1, gamma2)) - end - beta_VV[end] = 1.0 - π = ones(Float64, K_truncation) - π[2:end] = 1 - beta_VV[1:K_truncation-1] - π = log(beta_VV) + log(cumprod(π)) - - # resampling concentration parameter based on Escobar and West 1995 - for internal_iters = 1:n_internals - eta = rand(Distributions.Beta(dpm.aa+1, NN)) - rr = (dpm.a1+dpm.K-1) / (NN*(dpm.a2-log(NN))) - pi_eta = rr / (1+rr) - - if rand() < pi_eta - dpm.aa = rand(Distributions.Gamma(dpm.a1+dpm.K, 1/(dpm.a2-log(eta)))) - else - dpm.aa = rand(Distributions.Gamma(dpm.a1+dpm.K-1, 1/(dpm.a2-log(eta)))) - end - end - end - - nn_string = nn2string(nn) - if !haskey(states, nn_string) - n_states += 1 - states[nn_string] = n_states - end - dpm.K_hist[iteration] = states[nn_string] - dpm.K_zz_dict[states[nn_string]] = deepcopy(zz) - elapsed_time = toq() - end - return states - end - - - function posterior{T1, T2}(dpm::DPM{T1}, xx::Vector{T2}, K::Int64, K_truncation::Int64=0) 0 n_components = 0 0 if K_truncation == 0 0 n_components = K - else 0 n_components = K_truncation - end - 0 bayesian_components = [deepcopy(dpm.bayesian_component) for kk=1:n_components] 0 zz = dpm.K_zz_dict[K] - 0 NN = length(xx) 0 nn = zeros(Int64, n_components) - 0 for ii = 1:NN 0 kk = zz[ii] 0 additem(bayesian_components[kk], xx[ii]) 0 nn[kk] += 1 - end - 0 return([posterior(bayesian_components[kk]) for kk=1:n_components], nn) - end -

I do not seem to understand why, for example, a simple assignment that is performed only twice allocates 191688 units (I believe the unit is Bytes, I'm not sure though).

.cov:

  2 NN = length(xx) # number of data points

.mem:

  191688 NN = length(xx) # number of data points

or is it worse:

sow:

  352 @inbounds for ii = 1:NN

MEM:

  28000000 @inbounds for ii = 1:NN

+7

memory julia-lang

Adham Sep 11 '15 at 9:37

source share

1 answer

tholy · Accepted Answer · 2015-09-12T10:09:29+0000

The answer is briefly mentioned in the docs : "In Customizing, the first line of any function directly called from REPL will display highlighting due to events that occur in the REPL code itself." Also possibly relevant: “Moreover, JIT compilation also adds to distribution counts, since most of the Julias compiler is written in Julia (and compilation usually requires memory allocation). The recommended procedure is to force compilation by executing all the commands you want to parse , then call Profile.clear_malloc_data () to reset all distribution counters. "

Bottom line: The first line is blamed for distributions that occur elsewhere because the first line starts reporting again.

How to analyze Julia's memory allocation and code coverage results

More articles: