When comparing my quicksort implementation with std :: sort in my compiler and my mergesort implementation, I noticed an odd pattern for large data sets: when working on 64-bit integers, quicksort is consistently faster than mergesort; however, with smaller sizes, the sort size decreases, and merging is faster.
Here is the testing code:
#include <iostream> #include <vector> #include <iterator> #include <algorithm> #include <utility> #include <random> #include <chrono> #include <limits> #include <functional> #include <cstdint> template <typename Iterator> void insertion_sort(Iterator first, Iterator last) { using namespace std; Iterator head = first; Iterator new_position; while(head != last) { new_position = head; while(new_position != first && *new_position < *prev(new_position)) { swap(*new_position, *prev(new_position)); --new_position; } ++head; } } template <typename Iterator> void recursive_mergesort_impl(Iterator first, Iterator last, std::vector<typename Iterator::value_type>& temp) { if(last - first > 32) { auto middle = first + (last-first)/2; recursive_mergesort_impl(first, middle, temp); recursive_mergesort_impl(middle, last, temp); auto last_merged = merge_move(first, middle, middle, last, temp.begin()); std::move(temp.begin(), last_merged, first); } else { insertion_sort(first, last); } } template <typename Iterator> void recursive_mergesort(Iterator first, Iterator last) { std::vector<typename Iterator::value_type> temp(last-first); recursive_mergesort_impl(first, last, temp); } // Pick a pivot and move it to front of range template <typename Iterator> template <typename Iterator> void quicksort_pivot_back(Iterator first, Iterator last) { using namespace std; auto middle = first + (last-first)/2; auto last_elem = prev(last); Iterator pivot; if(*first < *middle) { if(*middle < *last_elem) pivot = middle; else if(*first < *last_elem) pivot = last_elem; else pivot = first; } else if(*first < *last_elem) pivot = first; else if(*middle < *last_elem) pivot = last_elem; else pivot = middle; swap(*last_elem, *pivot); } template <typename Iterator, typename Function> std::pair<Iterator, Iterator> quicksort_partition(Iterator first, Iterator last, Function pivot_select) { using namespace std; pivot_select(first, last); auto pivot = prev(last); auto bottom = first; auto top = pivot; while(bottom != top) { if(*bottom < *pivot) ++bottom; else swap(*bottom, *--top); } swap(*pivot, *top++); return make_pair(bottom, top); } template <typename Iterator> void quicksort_loop(Iterator first, Iterator last) { using namespace std; while(last - first > 32) { auto bounds = quicksort_partition(first, last, quicksort_pivot_back<Iterator>); quicksort_loop(bounds.second, last); last = bounds.first; } } template <typename Iterator> void quicksort(Iterator first, Iterator last) { quicksort_loop(first, last); insertion_sort(first, last); } template <typename IntType = uint64_t, typename Duration = std::chrono::microseconds, typename Timer = std::chrono::high_resolution_clock, typename Function, typename Generator> void run_trial(Function sort_func, Generator gen, std::string name, std::size_t trial_size, std::size_t trial_count) { using namespace std; using namespace chrono; vector<IntType> data(trial_size); Duration elapsed(0); cout << "Sorting with " << name << endl; for(unsigned int i = 0; i < trial_count; ++i) { generate(data.begin(), data.end(), gen); auto start = Timer::now(); sort_func(data.begin(), data.end()); auto finish = Timer::now(); elapsed += duration_cast<Duration>(finish-start); } cout << "Done. Average elapsed time: " << elapsed.count() / trial_count << endl; cout << "Is correct: " << is_sorted(data.begin(), data.end()) << endl << endl; } int main() { using namespace std; using namespace chrono; using int_type = uint64_t; const size_t trial_size = 12800000; const int trial_count = 15; vector<int_type> data(trial_size); uniform_int_distribution<int_type> distr; mt19937_64 rnd; run_trial<int_type>(recursive_mergesort<vector<int_type>::iterator>, bind(distr, rnd), "recursive mergesort", trial_size, trial_count); run_trial<int_type>(quicksort<vector<int_type>::iterator>, bind(distr, rnd), "quicksort", trial_size, trial_count); run_trial<int_type>(sort<vector<int_type>::iterator>, bind(distr, rnd), "std::sort", trial_size, trial_count); }
Here are examples of 15 trials of 12,800,000 elements:
uint64_t :
Sorting with recursive mergesort Done. Average elapsed time: 1725431 Is correct: 1 Sorting with quicksort Done. Average elapsed time: 1238070 Is correct: 1 Sorting with std::sort Done. Average elapsed time: 1131464 Is correct: 1
uint16_t :
Sorting with recursive mergesort Done. Average elapsed time: 1186467 Is correct: 1 Sorting with quicksort Done. Average elapsed time: 2368535 Is correct: 1 Sorting with std::sort Done. Average elapsed time: 888517 Is correct: 1
I feel like the problem is with unbalanced memory accesses, however it still makes me wonder why other algorithms get acceleration while quicksort slows down.