首页 > 代码库 > 运用简单的bloomfilter算法生成100万个不重复的随机数

运用简单的bloomfilter算法生成100万个不重复的随机数

本文中只是简单的体会bloomFilter算法的基本原理,设计实现一个生成100万个不重复的随机数。

选择3个分布均匀质数,在这里面质数的选择还是挺有讲究的,要注意不能太小,必须能够满足bloomfilter空间,不然整个空间都是1了还没有找到100万个不重复的随机数。不多说,上代码。

#include<stdio.h>
#include<stdlib.h>
#include<time.h>
#include<cstdbool>
#define  MAXNUM 10000000



int hash_fuction(int dst, int select_number)
{
    return dst % select_number;
}

int * byte_bloomfilter_random(int generate_number, int maxValue)
{
    int temp;
    char * bloomfilter;
    int *dst;
    bool flag;

    int index_a, index_b, index_c;
    char diff_a, diff_b, diff_c;
    bloomfilter = (char *)malloc((size_t)MAXNUM / 8 * sizeof(char));
    dst = (int *)malloc((size_t)generate_number * sizeof(int));
    for (int i = 0; i < MAXNUM / 8; i++)
    {
        bloomfilter[i] = 0;
    }

    for (int i = 0; i < generate_number; i++)
    {
        flag = true;

        while (flag)
        {
            int temp_a, temp_b, temp_c;
            char bit_a, bit_b, bit_c;

            temp = rand() * rand() % maxValue;
            //select 3 prime numbers  and select 3 hash functions
            temp_a = hash_fuction(temp, 524287);
            temp_b = hash_fuction(temp, 1046527);
            temp_c = hash_fuction(temp, 3967);

            index_a = temp_a >> 3;
            diff_a = temp_a % 8;
            index_b = temp_b >> 3;
            diff_b = temp_b % 8;
            index_c = temp_c >> 3;
            diff_c = temp_c % 8;

            
            bit_a = bloomfilter[index_a] & (1 << diff_a);
            bit_b = bloomfilter[index_b] & (1 << diff_b);
            bit_c = bloomfilter[index_c] & (1 << diff_c);
            if (!bit_a || !bit_b || !bit_c)
            {
                dst[i] = temp;
                bloomfilter[index_a] = bloomfilter[index_a] | (1 << diff_a);
                bloomfilter[index_b] = bloomfilter[index_b] | (1 << diff_b);
                bloomfilter[index_c] = bloomfilter[index_c] | (1 << diff_c);
                flag = false;
            }
        }
    }
    free(bloomfilter);
    return dst;
}