Quicksort 3路分区太慢了

时间:2022-09-25 21:08:59

I am using quicksort 3 way partition, but it is turning out to be too slow as and when the vector size is greater than 10000. What am I doing wrong? Please guide me! Any help will be appreciated The answer should be computed in less than 2.2 sec.

我正在使用quicksort 3路分区,但是当矢量大小大于10000时,它变得太慢了。我做错了什么?请指导我!任何帮助将不胜感激。答案应在不到2.2秒的时间内计算出来。

#include <iostream>
#include <vector>
#include <cstdlib>
#include <algorithm>

using std::vector;
using std::swap;

void print(vector<int> v)
{
  for(int i = 0; i < v.size(); i++) std::cout << v[i] << " ";
  std::cout << std::endl;
}

void partition2(vector<int> &a, int l, int r, int &i, int &j) {
  int k;
  int middle=(l+r)/2;
  /*Selecting pivot as median of low, high and middle*/
  if(((a[l]<=a[middle]) && (a[middle]<=a[r])) || ((a[r]<=a[middle]) && (a[middle]<=a[l])))
      k=middle;
  else if(((a[middle]<=a[l]) && (a[l]<=a[r])) || ((a[r]<=a[l]) && (a[l]<=a[middle])))
      k=l;
  else if(((a[middle]<=a[r]) && (a[r]<=a[l])) || ((a[l]<=a[r]) && (a[r]<=a[middle])))
      k=r;

  swap(a[l], a[k]);
  //print(a);

  int low_value = a[l];
  int index_low = l;
  int index_high = l;
  int counter=l;
  for (int i = l + 1; i <= r; i++) {
    if (a[i] < low_value) {
      swap(a[i], a[index_low]);
      counter++;
      low_value=a[l];
    }
    else if(a[i]==low_value)
    {
        index_high++;
        swap(a[i], a[index_high]);      
    }
    //print(a);
  }

  i=counter;
  j=index_high;
  //swap(a[l], a[j]);
  //return j;
}

void randomized_quick_sort(vector<int> &a, int l, int r) {
  if (l >= r) {
    return;
  }

  int i,j;
  partition2(a, l, r, i, j);

  randomized_quick_sort(a, l, i-1);
  randomized_quick_sort(a, j+1, r);
}

int main() {
  int n;
  std::cin >> n;
  //while(1){
  //n=100+rand()%99999;
  //std::cout<<n<<std::endl;
  vector<int> a(n);
  for (size_t i = 0; i < a.size(); ++i) {
    std::cin >> a[i];
    //a[i]=1+rand()%99999999;
  }
  randomized_quick_sort(a, 0, a.size() - 1);
  for (size_t i = 0; i < a.size(); ++i) {
    std::cout << a[i] << ' ';
  }
  //std::cout<<"Pass\n";  
  //}
  return 0;
}

2 个解决方案

#1


0  

Everything at first glance is correct. However, there are probably just too many comparison operations. Try this option - it works on my computer for 1.6 seconds on average.

乍一看一切都是正确的。但是,可能只有太多的比较操作。试试这个选项 - 它平均在我的电脑上运行1.6秒。

#include <stdlib.h>
#include <stdio.h>
#include <iostream>
#include <vector>
#include <ctime>
#include <random>
#include <chrono>
#include <iomanip>

using namespace std;
using namespace std::chrono;

//======= quick_sort =======//
template<typename T>
int partition(vector<T>& numbers, const int& left, const int& right)
{
    swap(numbers[left], numbers[left + (right - left) / 2]);
    T mid = numbers[left];
    int i(left + 1), j(right);

    while (i <= j)
    {
        while ( i <= j && numbers[i] <= mid ) i++;
        while ( i <= j && numbers[j] > mid ) j--;
        if ( i < j ) swap(numbers[i], numbers[j]);
    }

    swap(numbers[i - 1], numbers[left]);
    return i - 1;
}

template<typename T>
void quick_sort_rec(vector<T>& numbers, const int& left, const int& right)
{
    if (left >= right) return;

    int p = partition(numbers, left, right);
    quick_sort_rec(numbers, left , p - 1);
    quick_sort_rec(numbers, p + 1 , right);
}
//=========================//


template<typename T>
T random_T(long min, long max)
{
    return (T)min + static_cast<T>(rand()) / (static_cast<T>(RAND_MAX / ((T)(max - min))));
}

template<typename T>
float time_func(void (*f)(vector<T>&, const int&, const int&), vector<T>& a)
{
    high_resolution_clock::time_point t1 = high_resolution_clock::now();
    f(a, 0, a.size() - 1);
    high_resolution_clock::time_point t2 = high_resolution_clock::now();

    return 1000.0 * (duration_cast<microseconds>(t2 - t1).count()) / (float)(CLOCKS_PER_SEC); /// CLOCKS_PER_SEC;
}

int main()
{
    srand((unsigned)(777));
    vector<int> a;

    for (int i = 0; i < 10000; i++)
    {
        a.push_back(random_T<int>(0, 1000));
    }

    cout << setprecision(10) << "quick sort rec = " << time_func(quick_sort_rec, a) << endl;
    return 0;
}

#2


0  

I run the following code to test partition2

我运行以下代码来测试partition2

int main(){
    vector<int> a = {2, 1, 1, 9, 5, 3, 4, 2, 7};
    int i, j;
    partition2(a, 0, a.size() - 1, i, j);
    for (auto i : a)
        cout << i << ' ';
    cout << '\n';
    return 0;
}

And the results are

结果是

1 1 5 9 2 3 4 2 7 

If the partition2 selecting median of low, high and middle as pivot, then the pivot should be 5 and the results should be something like

如果partition2选择low,high和middle的中位数作为pivot,那么pivot应该是5,结果应该是这样的

2 1 1 3 4 2 5 9 7 

Then I check the code

然后我检查代码

if (a[i] < low_value) {
  swap(a[i], a[index_low]);
  counter++;
  low_value=a[l];
}
else if(a[i]==low_value)
{
    index_high++;
    swap(a[i], a[index_high]);      
}

It seems to be that the code try to find the minimum value of array and then move them to beginning of array. It seems that it is doing selection sort instead of quicksort. It explains why it is slow when input size is large.

似乎代码试图找到数组的最小值,然后将它们移动到数组的开头。它似乎正在做选择排序而不是快速排序。它解释了当输入尺寸很大时为什么它很慢。

#1


0  

Everything at first glance is correct. However, there are probably just too many comparison operations. Try this option - it works on my computer for 1.6 seconds on average.

乍一看一切都是正确的。但是,可能只有太多的比较操作。试试这个选项 - 它平均在我的电脑上运行1.6秒。

#include <stdlib.h>
#include <stdio.h>
#include <iostream>
#include <vector>
#include <ctime>
#include <random>
#include <chrono>
#include <iomanip>

using namespace std;
using namespace std::chrono;

//======= quick_sort =======//
template<typename T>
int partition(vector<T>& numbers, const int& left, const int& right)
{
    swap(numbers[left], numbers[left + (right - left) / 2]);
    T mid = numbers[left];
    int i(left + 1), j(right);

    while (i <= j)
    {
        while ( i <= j && numbers[i] <= mid ) i++;
        while ( i <= j && numbers[j] > mid ) j--;
        if ( i < j ) swap(numbers[i], numbers[j]);
    }

    swap(numbers[i - 1], numbers[left]);
    return i - 1;
}

template<typename T>
void quick_sort_rec(vector<T>& numbers, const int& left, const int& right)
{
    if (left >= right) return;

    int p = partition(numbers, left, right);
    quick_sort_rec(numbers, left , p - 1);
    quick_sort_rec(numbers, p + 1 , right);
}
//=========================//


template<typename T>
T random_T(long min, long max)
{
    return (T)min + static_cast<T>(rand()) / (static_cast<T>(RAND_MAX / ((T)(max - min))));
}

template<typename T>
float time_func(void (*f)(vector<T>&, const int&, const int&), vector<T>& a)
{
    high_resolution_clock::time_point t1 = high_resolution_clock::now();
    f(a, 0, a.size() - 1);
    high_resolution_clock::time_point t2 = high_resolution_clock::now();

    return 1000.0 * (duration_cast<microseconds>(t2 - t1).count()) / (float)(CLOCKS_PER_SEC); /// CLOCKS_PER_SEC;
}

int main()
{
    srand((unsigned)(777));
    vector<int> a;

    for (int i = 0; i < 10000; i++)
    {
        a.push_back(random_T<int>(0, 1000));
    }

    cout << setprecision(10) << "quick sort rec = " << time_func(quick_sort_rec, a) << endl;
    return 0;
}

#2


0  

I run the following code to test partition2

我运行以下代码来测试partition2

int main(){
    vector<int> a = {2, 1, 1, 9, 5, 3, 4, 2, 7};
    int i, j;
    partition2(a, 0, a.size() - 1, i, j);
    for (auto i : a)
        cout << i << ' ';
    cout << '\n';
    return 0;
}

And the results are

结果是

1 1 5 9 2 3 4 2 7 

If the partition2 selecting median of low, high and middle as pivot, then the pivot should be 5 and the results should be something like

如果partition2选择low,high和middle的中位数作为pivot,那么pivot应该是5,结果应该是这样的

2 1 1 3 4 2 5 9 7 

Then I check the code

然后我检查代码

if (a[i] < low_value) {
  swap(a[i], a[index_low]);
  counter++;
  low_value=a[l];
}
else if(a[i]==low_value)
{
    index_high++;
    swap(a[i], a[index_high]);      
}

It seems to be that the code try to find the minimum value of array and then move them to beginning of array. It seems that it is doing selection sort instead of quicksort. It explains why it is slow when input size is large.

似乎代码试图找到数组的最小值,然后将它们移动到数组的开头。它似乎正在做选择排序而不是快速排序。它解释了当输入尺寸很大时为什么它很慢。