利用堆排序找出数组中前n大的元素

时间:2024-04-24 14:35:25
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <time.h>
#include <malloc.h>
#include <memory.h>
#define MAX_SIZE (1000 * 10000 + 1) #define PARENT(i) (i/2)
#define RIGHT(i) (i*2 + 1)
#define LEFT(i) (i*2)
#define EXCHANGE(a,b,t) do{t=a;a=b;b=t;}while(0) // 生成不重复的随机数序列写入文件
void gen_test_data(uint32_t cnt)
{
if( cnt >= MAX_SIZE){printf("cnt too largr\n");return;}
//uint32_t i = 0;
//char *buf = (char*)malloc(MAX_SIZE);
//for(;i < cnt;++i){buf[i] = 1;}
uint32_t n = ;
char file_name[];
snprintf(file_name,,"test_data_%d.txt",cnt);
FILE *fp = fopen(file_name,"w");
if(NULL == fp){printf("open %s error!\n",file_name);return;}
while(n < cnt)
{
int32_t nRand = rand() % cnt;
//while(buf[nRand] == 0)nRand = (nRand + 1)%cnt;
//buf[nRand] = 0;
fprintf(fp,"%d ",nRand);
++n;
}
fclose(fp);
printf("gen %s finished\n",file_name);
} // 读取文件
void read_data(int32_t arr[],const uint32_t size,uint32_t *cnt,const uint32_t data_cnt)
{
FILE *fp = NULL;
*cnt = ;
char file_name[];
if(data_cnt > size){printf("data_cnt too largr\n");return;}
snprintf(file_name,,"test_data_%d.txt",data_cnt);
fp = fopen(file_name,"r");
if(NULL == fp){printf("open %s error!\n",file_name);return;}
while(!feof(fp) && *cnt < size)
{
fscanf(fp,"%d ",&arr[*cnt]);
(*cnt)++;
}
fclose(fp);
} // 快速排序
void quick_sort(int32_t arr[],int32_t low,int32_t high)
{
if(low >= high)return;
int32_t i = low,j = high,tmp = arr[i];
while(i<j)
{
while(i<j && arr[j] <= tmp)j--;
if(i<j){arr[i] = arr[j];i++;}
while(i<j && arr[i] > tmp)i++;
if(i<j){arr[j] = arr[i];j--;}
}
arr[i] = tmp;
quick_sort(arr,low,i-);
quick_sort(arr,i+,high);
} void get_topn_quick(int32_t arr[],int32_t low,int32_t high,const int32_t topn)
{
if(low >= high || topn > high)return;
int32_t i = low,j = high,tmp = arr[i];
while(i<j)
{
while(i<j && arr[j] < tmp)j--;
if(i<j)arr[i++] = arr[j];
while(i<j && arr[i] >= tmp)i++;
if(i<j)arr[j--] = arr[i];
}
arr[i] = tmp;
int32_t n = i - low + ;
if (n == topn)return;
else if (n > topn)
get_topn_quick(arr, low, i-, topn);
else if (n < topn)
get_topn_quick(arr, i+, high, topn - n);
} void max_heapify(int32_t arr[],const uint32_t size,uint32_t i)
{
uint32_t left = LEFT(i),right = RIGHT(i),largest = ,tmp = ;
if(left<size && arr[left] > arr[i])largest = left;
else largest = i;
if(right<size && arr[right] > arr[largest])largest = right;
if(largest != i)
{
EXCHANGE(arr[i],arr[largest],tmp);
max_heapify(arr,size,largest);
}
} void min_heapify(int32_t arr[],const uint32_t size,uint32_t i)
{
uint32_t left = LEFT(i),right = RIGHT(i),largest = ,tmp = ;
if(left<size && arr[left] < arr[i])largest = left;
else largest = i;
if(right<size && arr[right] < arr[largest])largest = right;
if(largest != i)
{
EXCHANGE(arr[i],arr[largest],tmp);
min_heapify(arr,size,largest);
}
} void get_topn_heap(int32_t arr[], const int32_t arr_size, const int32_t topn)
{
int32_t i = topn / , tmp = ;
// 在[0--topn)范围内构建最小堆,即优先级队列
while (i >= )min_heapify(arr, topn, i--);
for (i = topn; i < arr_size; ++i)
{
if (arr[i] <= arr[])continue; //小于最小值,没有判断的必要
EXCHANGE(arr[], arr[i], tmp);
min_heapify(arr, topn, );
}
} void dump1(int32_t arr[],const uint32_t cnt)
{
uint32_t i = ;
for(;i < cnt;++i)
{
printf("%4d ",arr[i]);
}
printf("\n");
} void dump2(int32_t arr[],const uint32_t start,const uint32_t end)
{
uint32_t i = start;
for(;i < end;++i)
{
printf("%5d ",arr[i]);
}
printf("\n");
} int32_t main(int32_t argc, char *argv[])
{
uint32_t t = ;
int32_t *arr = (int32_t*)malloc(sizeof(int32_t)*MAX_SIZE);
int32_t *heap = (int32_t*)malloc(sizeof(int32_t)*MAX_SIZE);
int32_t *quick = (int32_t*)malloc(sizeof(int32_t)*MAX_SIZE);
uint32_t cnt = ,data_cnt = ;
for(cnt = ;cnt <= MAX_SIZE;cnt*=)
{
gen_test_data(cnt);
}
for(data_cnt = ;data_cnt <= MAX_SIZE;data_cnt*=)
{
read_data(arr, MAX_SIZE, &cnt, data_cnt);
memcpy(heap,arr,sizeof(int32_t)*MAX_SIZE);
printf("cnt=%d\n",cnt);
t = clock();
get_topn_heap(heap,cnt,cnt/);
printf("heap use time:%ld\n",clock()-t);
quick_sort(heap,,cnt/-);
//dump2(heap,0,cnt/10); memcpy(quick,arr,sizeof(int32_t)*MAX_SIZE);
t = clock();
get_topn_quick(quick,,cnt-,cnt/);
printf("quick use time:%ld\n",clock()-t);
quick_sort(quick,,cnt/-);
//dump2(quick,0,cnt/10);
if(memcmp(heap,quick,sizeof(int32_t)*(cnt/-)) == )printf("OK\n");
}
return ;
}

函数 get_topn_heap 实现了用最小堆查找数组arr中最大topn个数字,并将它们放置在数组中[0-tonp)的位置

与前面的用快速排序的方法相比,用最小堆的方法效率稍低一些,快速排序方法:http://www.cnblogs.com/tangxin-blog/p/5617736.html

对比数据:

利用堆排序找出数组中前n大的元素