Efficient way to calculate average over non-overlapping sub-bands of an STL card

I am converting an algorithm from C # to C ++. A small part of the algorithm is to calculate the average values ​​for certain areas in the dictionary.

The data in the dictionary is stored as follows:

Index     Value
1         10
3         28
290       78
1110      90

I need to calculate the average of all values ​​with an index less than a certain number and all index values ​​greater than a certain number. In C #, I do it like this:

if (dictionary.Where(x => x.Key < areaWidth).Count() > 0)
{
    avgValue = (int) dictionary.Where(x => x.Key < areaWidth).Average(
        x => x.Value);
}

for (var i = 0; i < line.Length; i++)
{
    if (i == areaWidth)
    {
        avgValue = -1;
        i = line.Length - areaWidth;
        var rightBorder = i - areaWidth;

        if (dictionary.Where(x => x.Key > (rightBorder)).Count() > 0)
        {
            avgValue = (int) dictionary.Where(
                x => x.Key > (rightBorder)).Average(
                                x => x.Value);
        }
    }

    if (line[i] < avgValue * 0.8)
    {
        reallyImportantValue += (avgValue - line[i]);
    }
}

I know that this is not very efficient and rather crappy code, but I knew that I still had to completely rewrite this part of the algorithm in C ++, so I decided to implement it quickly and dirty.

++ , , . ++/STL , , , , , , #.

, ++, , .


EDIT: . , STL , , , . , - , , . :

500 1000 . - , - .

+5
8

- std:: map - , , , , for ( STL ). value:

std::map<int, int> map;
map[...] = ...;

int count = 0, sum = 0;
for (std::map<int, int>::const_iterator it = map.begin();
     it != map.end() && it->first < value; ++it, ++count)
{
    sum += it->second;
}
// check for count == 0
int avg = sum / count; // do note integer division, change if appropriate

, , map.rbegin() ( std::map<...>::const_reverse_iterator), map.rend() >.

edit: STL ( , ). , value.

int ipsum(int p1, const std::pair<int, int>& p2) {
    return p1 + p2.second;
}

...

std::map<int, int> map;
int sum = std::accumulate(map.begin(), map.lower_bound(value), 0, ipsum);
+1

std::accumulate , . , , STL.

+3

EDIT: - result2 :

#include <map>
#include <algorithm>
#include <numeric>

typedef map<const unsigned int, unsigned int> Values;

struct averageMap
{
    averageMap() : lowerCount(0), lowerSum(0), upperSum(0) {}
    averageMap operator()(const averageMap& input, 
           const Values::value_type& current)
    {
        if (current.first > boundary)
        {
            upperSum += current.second;
        }
        else
        {
            lowerSum += current.second;
            ++lowerCount;
        }
        return *this;
    }

    static size_t boundary;
    size_t lowerCount;
    unsigned int lowerSum;
    unsigned int upperSum;
};

size_t averageMap::boundary(0);

struct averageRange
{
    averageRange() : count(0), sum(0) {}
    averageRange operator()(const averageRange& input, 
        const Values::value_type& current)
    {
        sum += current.second;
        ++count;

        return *this;
    }

    size_t count;
    unsigned int sum;
};


int main()
{
    Values values;

    values[1] = 10;
    values[3] = 28;
    values[290] = 78;
    values[1110] = 110;

    averageMap::boundary = 100;
    averageMap result = accumulate(values.begin(), values.end(), 
        averageMap(boundary), averageMap(boundary));

averageRange result2 = accumulate(values.lower_bound(2), values.upper_bound(300), 
    averageRange(), averageRange());

    return 0;
};

:

. accumulate , map::upper_bound, , STL , . - map >= 0.

#include <map>
#include <algorithm>
#include <numeric>
#include <vector>

using namespace std;

typedef map<unsigned int, unsigned int> Values;

int main()
{
    Values values;

    values[1] = 10;
    values[3] = 28;
    values[290] = 78;
    values[1110] = 110;

    size_t boundary(100);
    Values::iterator iter = values.upper_bound(boundary);

    vector<int> lowerRange(values.size(), -1);

    transform(values.begin(), iter, lowerRange.begin(), 
        [](std::pair<unsigned int, unsigned int> p) 
                -> int { return p.second; });

    vector<int>::iterator invalid(find(lowerRange.begin(), 
        lowerRange.end(), -1));
    size_t lowerCount(distance(lowerRange.begin(), invalid));
    lowerRange.resize(lowerCount);

    vector<int> upperRange(values.size() - lowerCount);
    transform(iter, values.end(), upperRange.begin(), 
        [](std::pair<unsigned int, unsigned int> p) 
                -> int { return p.second; });

    size_t lowerAverage = accumulate(lowerRange.begin(), 
        lowerRange.end(), 0) / lowerRange.size();
    size_t upperAverage = accumulate(upperRange.begin(), 
        upperRange.end(), 0) / upperRange.size();

    return 0;
};
+3
  • std:: lower_bound std:: upper_bound, , lower_bound , , p >= , upper_bound p > . , .

  • , std:: pairs , , boost:: transform_iterator, , . , ( ).

+2

, , std::map<>::lower_bound() std::map<>::upper_bound(). , , std::accumulate() from <numeric>. , , , second, std::pair<>.

- , std::partition():

// tmp container: should be fast with std::distance()
typedef std::vector<int> seq;

seq tmp(dict.size());
seq::iterator end(std::partition(dict.begin(), dict.end(),
                                 tmp.begin(),
                                 std::bind2nd(std::tmp(), UPPER_BOUND)));

// std::vector works well with std::distance()
seq::difference_type new_count = std::distance(tmp.begin(), end);
double lower_avg = std::accumulate(tmp.begin(), end, 0.0) / new_count;
seq::difference_type new_count = std::distance(end, tmp.end());
double higher_avg = std::accumulate(tmp.begin(), end, 0.0) / new_count;

<vector>, <algorithm>, <numeric>, <iterator> <functional>.

+1

, , , . , . , . , , .

, . . , , , , .

int key = <whatever>;

std::map<int, int>::const_iterator it = map.begin(), end = map.end();

size_t num1 = 0;
long total1 = 0;

while (it != end && it->first < key) {
    total1 += it->second;
    ++num1;
    ++it;
}

size_t num2 = map.size() - num1;
long total2 = 0;

while (it != end) {
    total2 += it->second;
    ++it;
}

int avg_less = num1 > 0 ? total1 / num1 : 0;
int avg_greater_equal = num2 > 0 ? total2 / num2 : 0;

, - , std::lower_bound . , , . - , .

(, , , , . .)

+1

, , , . StatsCollector. , , , , , . . , , value_type.

class StatsCollector
{
public:
   StatsCollector();

   void add(double val);

 // some stats you might want
   size_t count() const;
   double mean() const;
   double variance() const;
   double skewness() const;
   double kurtosis() const;
};

. , , , , , , .

( ) . . ( , std:: accumulate , , , . , -op)

struct AddPairToStats
{
  template< typename T >
  StatsCollector * operator()( StatsCollector * stats, const T& value_type ) const
  { 
     stats->add( value_type.second );
     return stats;
  }
};

, double, .

, , , :

StatsCollector stats;
std::accumuluate( iterStart, iterEnd, &stats, AddPairToStats() );

. , , , , , /4- , , ( , ).

+1

:

  • map::upper_bound/lower_bound,
  • accumulate () count

( ). :

 struct RunningAverage
 {
     double sum;
     int count;
     RunningAverage() { sum = 0; count = 0; }
     RunningAverage & operator+=(double value) 
     { sum += value; ++count; }

     RunningAverage operator+(double value) 
     { RunningAverage result = *this; result += value; return result; }

     double Avg() { return sum / count; } 
 }

, , .


[edit] , :

  • O (N) N
  • ( node)

, , , ( ). .

"" , .

I would prefer this custom “accumulate” solution because it simply expanded or changed for other operations, and the “accumulate” details remained isolated. It can also be used with a hypothetical method accumulate_pthat parallelizes access (you will also need an operator struct + struct, but it's simple).

Oh, and const correctness left as an exercise for the reader :)

0
source

All Articles