首页 > 代码库 > 100亿个数取前1w名
100亿个数取前1w名
#include <fcntl.h>#include <stdio.h>#include <stdlib.h>#include <string.h>#include <unistd.h>#include <sys/time.h>static unsigned int BUF_PAGES; /* 缓冲区有多少个page */static unsigned int PAGE_SIZE; /* page的大小 */static unsigned int BUF_SIZE; /* 缓冲区的大小, BUF_SIZE = BUF_PAGES*PAGE_SIZE */static int *buffer; /* 输入缓冲区 */static int *heap; /* 最小堆 */long get_time_usecs();void init_heap(int n);void adjust(int n, int i);int main(int argc, char **argv){ unsigned int K, idx, length; int fd, i, bytes, element; long start_usecs = get_time_usecs(); fd = open(argv[1], O_RDONLY); if (fd < 0) { printf("can‘t open file %s\n",argv[1]); exit(0); } PAGE_SIZE = 4096; /* page = 4KB */ BUF_PAGES = 512; BUF_SIZE = PAGE_SIZE*BUF_PAGES; /* 4KB*512 = 2M */ buffer = (int *)malloc(BUF_SIZE); if (buffer == NULL) exit(0); K = atoi(argv[2]); heap = (int *)malloc(sizeof(int)*(K+1)); if (heap == NULL) { free(buffer); exit(0); } bytes = read(fd, heap+1, K*sizeof(int)); if (bytes < K*sizeof(int)) { printf("data size is too small\n"); exit(0); } init_heap(K); idx = length = 0; for (;;) { if (idx == length) { // 输入缓冲区用完 bytes = read(fd, buffer, BUF_SIZE); if (bytes == 0) break; length = bytes/4; idx = 0; } //从buffer取出一个数,若比最小堆堆顶元素大,则替换之 element = buffer[idx++]; if (element > heap[1]) { heap[1] = element; adjust(K, 1); } } long end_usecs = get_time_usecs(); printf("the top %d numbers are: \n", K); for (i = 1; i <= K; i++) { printf("%d ", heap[i]); if (i % 6 == 0) { printf("\n"); } } printf("\n"); free(buffer); free(heap); close(fd); double secs = (double)(end_usecs - start_usecs) / (double)1000000; printf("program tooks %.02f seconds.\n", secs); return 0;}void init_heap(int n){ int i; for (i = n/2; i > 0; i--) { adjust(n, i); }}/* 节点i的左子树和右子树已是最小堆, 此函数的 * 作用是把以节点i为根的树调整为最小堆 */void adjust(int n, int i){ heap[0] = heap[i]; i = 2 * i; //左子节点 while (i <= n) { if (i < n && heap[i+1] < heap[i]) { i++; //左右节点小的那个进行比较 } if (heap[i] >= heap[0]) { break; } heap[i / 2] = heap[i]; //左右节点都比根节点大 将较小的节点上移 i = i * 2; //锁定到下一层节点 } heap[i / 2] = heap[0]; //找到合适位置 进行更换}long get_time_usecs(){ struct timeval time; struct timezone tz; memset(&tz, ‘\0‘, sizeof(struct timezone)); gettimeofday(&time, &tz); long usecs = time.tv_sec*1000000 + time.tv_usec; return usecs;}
编译: g++ -o main main.cpp
产生测试数据: dd if=/dev/urandom of=random.dat bs=1M count=1024
运行: ./main random.dat 100
算法复杂度: nlog10000
100亿个数取前1w名
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。