search——Bloom Filter

时间:2025-02-24 18:04:35
package main import ( "fmt" "math" "strconv" ) type BloomFilter struct { *bitMap k uint64 // hash function count m uint64 // bits array length n uint64 // insert n number p float64 // 失误率 p_true float64 // 真是失误率 } func (b *BloomFilter) Insert(id uint64) { for i := 0; uint64(i) < b.k; i++ { b.bitMap.insert(getHash(strconv.Itoa(int(id)), uint64(i)) % b.bitMap.Size) } } func (b *BloomFilter) IsExist(id uint64) bool { for i := 0; uint64(i) < b.k; i++ { if !b.bitMap.isExist(getHash(strconv.Itoa(int(id)), uint64(i)) % b.bitMap.Size) { return false } } return true } func NewBloomFilter(p float64, n uint64) *BloomFilter { m := uint64(-(float64(n)*math.Log2(p))/(math.Ln2*math.Ln2) + 0.5) k := uint64(math.Ln2*(float64(m)/float64(n)) + 0.5) p_true := math.Pow(1-math.Pow(math.E, -(float64(n)*float64(k))/float64(m)), float64(k)) bl := &BloomFilter{bitMap: newBitMap(m/64 + 1)} bl.m = m // bits array length bl.n = n // insert n number bl.k = k // hash function count bl.p = p // 失误率 bl.p_true = p_true // 真实失误率 return bl } func main() { bl := NewBloomFilter(0.00000001, 100) bl.Insert(10000) fmt.Println(bl.IsExist(10000)) fmt.Println(bl.IsExist(10001)) } func main1() { p := 0.00000000001 n := 100 m := uint(-(float64(n)*math.Log2(p))/(math.Ln2*math.Ln2) + 0.5) fmt.Println(m/64 + 1) k := uint64(math.Ln2*(float64(m)/float64(n)) + 0.5) fmt.Println(m) fmt.Println(k) p_true := float64(0) p_true = math.Pow(1-math.Pow(math.E, -(float64(n)*float64(k))/float64(m)), float64(k)) fmt.Println(p_true < p) } type bitMap struct { Map []uint64 Cap uint64 Size uint64 } func newBitMap(cap uint64) *bitMap { return &bitMap{make([]uint64, cap), cap, cap * 64} } func (b *bitMap) insert(id uint64) { b.Map[id/64] |= 1 << (id % 64) } func (b *bitMap) isExist(id uint64) bool { return (b.Map[id/64])&(1<<(id%64)) > 0 } func (b *bitMap) delete(id uint64) { b.Map[id/64] &^= 1 << (id % 64) } func (b *bitMap) range_() (res []uint64) { for i := 0; uint64(i) < b.Cap; i++ { if b.Map[i] > 0 { for j, bits := 0, b.Map[i]; bits > 0; j, bits = j+1, bits>>1 { if bits&1 == 1 { res = append(res, uint64(i)*64+uint64(j)) } } } } return res } func getHash(str string, i uint64) uint64 { return hash(str) + i*hash1(str) } func hash(str string) (res uint64) { factor := ")k+,p-m~90$#2(*&6q}ev73541]n{fl['?|c" str = str + factor[:16-(len(str)%16)] for start, end := 0, len(str)/16; end > 0; start, end = start+16, end-1 { h0 := uint64(0) for i := 15; i >= 0; i-- { h0 += uint64(str[start+i]-byte(i))*uint64(pow(36, i)) ^ uint64(factor[(i+start)%36]) } h0 *= 0x12345 res += (res >> 30) ^ h0<<34 res += (res >> 32) | (h0 << 32) ^ uint64(start*start*start) ^ uint64(factor[start%36]) res += (res>>16)&(h0<<48) ^ uint64(factor[35-start%36]) ^ uint64(start-end*end) res += (res >> 17) | (h0 << 47) ^ uint64(start*start) } res += 235791113<<32 | 1719232931>>32 return res } func hash1(str string) (res uint64) { factor := ")k+,p-m~90$#2(*&6q}ev73541]n{fl['?|c" str = str + factor[:16-(len(str)%16)] for start, end := 0, len(str)/16; end > 0; start, end = start+16, end-1 { h0 := uint64(0) for i := 15; i >= 0; i-- { h0 += uint64(str[start+i]-byte(i))*uint64(pow(36, i)) ^ uint64(factor[(i+start)%36]) } h0 *= 0x54321 res += (res >> 32) | (h0 << 32) ^ uint64(start*start*start) ^ uint64(factor[start%36]) res += (res>>16)&(h0<<48) ^ uint64(factor[35-start%36]) ^ uint64(start-end*end) res += (res >> 17) | (h0 << 47) ^ uint64(start*start) res += (res >> 30) ^ h0<<34 } res += 1719232931<<32 | 235791113>>32 return res } func pow(x, n int) int { if n == 0 { return 1 } else { for (n & 1) == 0 { n, x = n>>1, x*x } } result := x n >>= 1 for n != 0 { x *= x if n&1 == 1 { result *= x } n >>= 1 } return result }