Templated Rcpp function to erase NA values

I would write a function (using Rcpp ) that removes all NA values ​​from the vector R

Before that, I made a small test function through the Rcpp::cppFunction .

 library(inline) cppFunction(' Vector<INTSXP> na_test(const Vector<INTSXP>& x) { return setdiff(x, Vector<INTSXP>::create(::traits::get_na<INTSXP>())); } ') 

This works as follows:

 na_test(c(1, NA, NA, 1, 2, NA)) # [1] 1 2 

After that, I tried to generalize this function using the C++ template mechanism.

So, in the external .cpp file (using the sourceCpp function) I wrote:

 template <int RTYPE> Vector<RTYPE> na_test_template(const Vector<RTYPE>& x) { return setdiff(x, Vector<RTYPE>::create(::traits::get_na<RTYPE>())); } // [[Rcpp::export(na_test_cpp)]] SEXP na_test(SEXP x) { switch(TYPEOF(x)) { case INTSXP: return na_test_template<INTSXP>(x); case REALSXP: return na_test_template<REALSXP>(x); } return R_NilValue; } 

This code compiles, but behaves differently , and I cannot explain why.

Infact:

 na_test_cpp(c(1, NA, NA, 1, 2, NA)) # [1] 2 NA NA NA 1 

Why does the same function (apparently) behave differently? What's going on here?

+6
source share
4 answers

Following your answer, I would use something like this as a template:

 template <int RTYPE> Vector<RTYPE> na_omit_template(const Vector<RTYPE>& x) { int n = x.size() ; int n_out = n - sum( is_na(x) ) ; Vector<RTYPE> out(n_out) ; for( int i=0, j=0; i<n; i++){ if( Vector<RTYPE>::is_na( x[i] ) ) continue ; out[j++] = x[i]; } return out ; } 

So the idea is to first calculate the length of the result, and then just use the Rcpp vector classes instead of std::vector . This will reduce the number of copies of data.


With the Rcpp development Rcpp (svn revision> = 4308), it works for me for all types, and we can use our send macro RCPP_RETURN_VECTOR instead of writing switch :

 // [[Rcpp::export]] SEXP na_omit( SEXP x ){ RCPP_RETURN_VECTOR( na_omit_template, x ) ; } 

na_omit was included in Rcpp (svn revision> = 4309) with a few changes, i.e. it can handle named vectors and arbitrary sugar expressions.

+5
source

I continued to explore the solution to the pattern problem (e.g. @Sameer answer).

So, I wrote another function, and now the template engine is working.

In an external .cpp file:

 #include <Rcpp.h> template <int RTYPE, class T> Vector<RTYPE> na_omit_template(const Vector<RTYPE>& x) { typedef typename Vector<RTYPE>::iterator rvector_it; if (x.size() == 0) { return x; } std::vector<T> out; rvector_it it = x.begin(); for (; it != x.end(); ++it) { if (!Vector<RTYPE>::is_na(*it)) { out.push_back(*it); } } return wrap(out); } // [[Rcpp::export(na_omit_cpp)]] SEXP na_omit(SEXP x) { switch(TYPEOF(x)) { case INTSXP: return na_omit_template<INTSXP, int>(x); case REALSXP: return na_omit_template<REALSXP, double>(x); case LGLSXP: return na_omit_template<LGLSXP, bool>(x); case CPLXSXP: return na_omit_template<CPLXSXP, Rcomplex>(x); case RAWSXP: return na_omit_template<RAWSXP, Rbyte>(x); default: stop("unsupported data type"); } } 

This function removes NA values. , that was my original goal.

Unfortunately, at the moment it does not work for all types of vectors , as the examples below R show.

 library(Rcpp) sourceCpp('file.cpp') na_omit_cpp(as.integer(c(1, NA, NA, 1, 2, NA))) # OK # [1] 1 1 2 na_omit_cpp(as.numeric(c(1, NA, NA, 1, 2, NA))) # [1] 1 1 2 na_omit_cpp(c(NA, 1L, NA, 3L, NA)) # OK # [1] 1 3 na_omit_cpp(c(NA, 2L, 1, NA)) # OK # [1] 2 1 na_omit_cpp(c(1.0, 1.1, 2.2, NA, 3, NA, 4)) # OK # [1] 1.0 1.1 2.2 3.0 4.0 na_omit_cpp(c(1L, NaN, NaN, 0, NA)) # OK # [1] 1 NaN NaN 0 na_omit_cpp(c(NA, NaN, 1.0, 0.0, 2.2, NA, 3.3, NA, 4.4)) # OK # [1] NaN 1.0 0.0 2.2 3.3 4.4 na_omit_cpp(as.logical(c(1, 0, 1, NA))) # OK # [1] TRUE FALSE TRUE na_omit_cpp(as.logical(c(TRUE, FALSE, NA, TRUE, NA))) # OK # [1] TRUE FALSE TRUE # empty vectors ? na_omit_cpp(c(NA)) # OK # logical(0) na_omit_cpp(numeric(0)) # OK # numeric(0) na_omit_cpp(logical(0)) # OK # logical(0) na_omit_cpp(raw(0)) # OK # raw(0) na_omit_cpp(as.raw(c(40,16,NA,0,2))) # NO! (R converts it to 00) # [1] 28 10 00 00 02 # Warning message ... na_omit_cpp(as.complex(c(-1, 2, 1, NA, 0, NA, -1))) # NO! # [1] -1+0i 2+0i 1+0i NA 0+0i NA -1+0i 

Thus, this function works in almost all cases except raw vectors and complex vectors.

current open issues :

  • I do not know why this error, and I would like to know why. Any idea?
  • The previous template function shown by @Sameer has weird behavior.
  • How to make to accept character vectors?

I was thinking clearly about case STRSXP: return na_omit_template<STRSXP, ?>(x); but this statement does not work, replacing std::string , Rcpp:String with ? .

+1
source

The template engine seems to be working fine.

 > na_test_cpp(as.numeric(c(1, NA, NA, 1, 2, NA))) [1] 2 NA NA NA 1 > na_test_cpp(as.integer(c(1, NA, NA, 1, 2, NA))) [1] 1 2 

This code works for INTSXP, but not for REALSXP

 Vector<REALSXP> na_test_real(const Vector<REALSXP>& x) { return setdiff(x, Vector<REALSXP>::create(::traits::get_na<REALSXP>())); } 
0
source

Some implementations:

 // naive template <int RTYPE> Vector<RTYPE> na_omit_impl(const Vector<RTYPE>& x) { std::size_t n = x.size(); // Estimate out length std::size_t n_out = 0; for(std::size_t i = 0; i < n; ++i) { if (Vector<RTYPE>::is_na(x[i])) continue; ++n_out; } // exit if no NAs if (n_out == n) return x; // allocate vector without filling Vector<RTYPE> res = no_init(n_out); // fill result vector for(std::size_t i = 0, j = 0; i < n; ++i) { if (Vector<RTYPE>::is_na(x[i])) continue; res[j] = x[i]; ++j; } return res; } // STL algorithms template <int RTYPE> struct not_na { typedef typename Vector<RTYPE>::stored_type type; bool operator() (const type& i) { return !Vector<RTYPE>::is_na(i); } }; template <int RTYPE> Vector<RTYPE> na_omit_impl(const Vector<RTYPE>& x) { // Estimate out length std::size_t n_out = std::count_if(x.begin(), x.end(), not_na<RTYPE>()); // exit if no NAs if (n_out == x.size()) return x; // allocate vector without filling Vector<RTYPE> res = no_init(n_out); // fill result vector std::copy_if(x.begin(), x.end(), res.begin(), not_na<RTYPE>()); return res; } // Rcpp sugar template <class T> T na_omit_impl(const T& x) { return x[!is_na(x)]; } // Rcpp sugar template <class T> T na_omit_impl(const T& x) { return Rcpp::na_omit(x); } 

All implementations work with the RCPP_RETURN_VECTOR macro:

 // [[Rcpp::export]] RObject na_omit(RObject x){ RCPP_RETURN_VECTOR(na_omit_impl, x); } 
0
source

All Articles