Comparing data in an efficient way (with C ++)

Is there a more efficient way to compare data compared to using a C ++ list container operator comparison?

I need to compare [large? 10 kByte <size <500 kByte] of the amount of data to check the integrity of external storage devices.

So I read the files one at a time and save the values ​​in a list of unsigned characters. The sources of this list are processed by shared_ptr , so I can pass it in the program without worrying about the size of the list

typedef boost::shared_ptr< list< unsigned char > > = contentPtr;
namespace boost::filesystem = fs;

contentPtr GetContent( fs::path filePath ){
 contentPtr actualContent (new list< unsigned char > );       
 // Read the file with a stream, put read values into actual content
return actualContent;

This is done twice because there are always two copies of the file. The contents of these two files should be compared and throw an exception if a mismatch is found

void CompareContent() throw( NotMatchingException() ){
 // this part is very fast, below 50ms
 contentPtr contentA = GetContent("/fileA");
 contentPtr contentB = GetContent("/fileB");
 // the next part takes about 2secs with a file size of ~64kByte
 if( *contentA != *contentB )
      throw( NotMatchingException() );
}

:
. 100 , . ....

? ?

+5
7

std::list std::vector.

std::list , .

std::vector, , ( ).

, . .

+8

.

, , , , , , - . , , 1% .

, - , . , .

+2

(, , shared_ptr CompareContent()), , ?

, :

// compare files
if (equal(std::istreambuf_iterator<char>(local_f),
          std::istreambuf_iterator<char>(),
          std::istreambuf_iterator<char>(host_f)))
{
    // we're good: move table to OutPath, remove other files

EDIT: , , std::deque , std::vector , GOTW # 54.. - . , , - deque istreambuf_iterator.

+1

, . boost mapped_files. . " " (, ) , ...

Cubbi, : http://www.cplusplus.com/forum/general/94032/ , , , . , , , :

#include <iostream>
#include <algorithm>
#include <boost/iostreams/device/mapped_file.hpp>
#include <boost/filesystem.hpp>

namespace io = boost::iostreams;
namespace fs = boost::filesystem;

bool files_equal(const std::string& path1, const std::string& path2)
{
    fs::path f1(path1);
    fs::path f2(path2);

    if (fs::file_size(f1) != fs::file_size(f2))
        return false;

    // zero-sized files cannot be opened with mapped_file_source
    // hence we consider all zero-sized files equal
    if (fs::file_size(f1) == 0)
        return true;

    io::mapped_file_source mf1(f1.string());
    io::mapped_file_source mf2(f1.string());
    return std::equal(mf1.data(), mf1.data() + mf1.size(), mf2.data());
}

int main()
{
    if (files_equal("test.1", "test.2"))
        std::cout << "The files are equal.\n";
    else
        std::cout << "The files are not equal.\n";
}
+1

std:: list char - , O (1), .

STL, std::vector, std:: list, char *, - memcmp?

0

- , memcmp. ( , .)

0

In the interest of objectivity, in the memcmp-vs-equal discussions, I propose the following test program so that you can see for yourself that, if any, are faster on your system. It also checks the == operator. On my system (Borland C ++ 5.5.1 for Win32):

std :: equal: 1375 clocks operator ==: 1297 clock ticks
memcmp: 1297 clock ticks

What is going on in your system?

#include <algorithm>
#include <vector>
#include <iostream>

using namespace std;

static char* buff ;
static vector<char> v0, v1 ;

static int const BufferSize = 100000 ;

static clock_t StartTimer() ;
static clock_t EndTimer (clock_t t) ;

int main (int argc, char** argv)
  {
  // Allocate a buffer
  buff = new char[BufferSize] ;

  // Create two vectors
  vector<char> v0 (buff, buff + BufferSize) ;
  vector<char> v1 (buff, buff + BufferSize) ;

  clock_t t ;

  // Compare them 10000 times using std::equal
  t = StartTimer() ;
  for (int i = 0 ; i < 10000 ; i++)
    if (!equal (v0.begin(), v0.end(), v1.begin()))
      cout << "Error in std::equal\n", exit (1) ;
  t = EndTimer (t) ;
  cout << "std::equal: " << t << " clock ticks\n" ;

  // Compare them 10000 times using operator==
  t = StartTimer() ;
  for (int i = 0 ; i < 10000 ; i++)
    if (v0 != v1)
      cout << "Error in operator==\n", exit (1) ;
  t = EndTimer (t) ;
  cout << "operator==: " << t << " clock ticks\n" ;

  // Compare them 10000 times using memcmp
  t = StartTimer() ;
  for (int i = 0 ; i < 10000 ; i++)
    if (memcmp (&v0[0], &v1[0], v0.size()))
      cout << "Error in memcmp\n", exit (1) ;
  t = EndTimer (t) ;
  cout << "memcmp: " << t << " clock ticks\n" ;

  return 0 ;
  }

static clock_t StartTimer()
  {
  // Start on a clock tick, to enhance reproducibility
  clock_t t = clock() ;
  while (clock() == t)
    ;
  return clock() ;
  }

static clock_t EndTimer (clock_t t)
  {
  return clock() - t ;
  }
0
source

All Articles