search——Bloom Filter
package main
import (
"fmt"
"math"
"strconv"
)
type BloomFilter struct {
*bitMap
k uint64 // hash function count
m uint64 // bits array length
n uint64 // insert n number
p float64 // 失误率
p_true float64 // 真是失误率
}
func (b *BloomFilter) Insert(id uint64) {
for i := 0; uint64(i) < b.k; i++ {
b.bitMap.insert(getHash(strconv.Itoa(int(id)), uint64(i)) % b.bitMap.Size)
}
}
func (b *BloomFilter) IsExist(id uint64) bool {
for i := 0; uint64(i) < b.k; i++ {
if !b.bitMap.isExist(getHash(strconv.Itoa(int(id)), uint64(i)) % b.bitMap.Size) {
return false
}
}
return true
}
func NewBloomFilter(p float64, n uint64) *BloomFilter {
m := uint64(-(float64(n)*math.Log2(p))/(math.Ln2*math.Ln2) + 0.5)
k := uint64(math.Ln2*(float64(m)/float64(n)) + 0.5)
p_true := math.Pow(1-math.Pow(math.E, -(float64(n)*float64(k))/float64(m)), float64(k))
bl := &BloomFilter{bitMap: newBitMap(m/64 + 1)}
bl.m = m // bits array length
bl.n = n // insert n number
bl.k = k // hash function count
bl.p = p // 失误率
bl.p_true = p_true // 真实失误率
return bl
}
func main() {
bl := NewBloomFilter(0.00000001, 100)
bl.Insert(10000)
fmt.Println(bl.IsExist(10000))
fmt.Println(bl.IsExist(10001))
}
func main1() {
p := 0.00000000001
n := 100
m := uint(-(float64(n)*math.Log2(p))/(math.Ln2*math.Ln2) + 0.5)
fmt.Println(m/64 + 1)
k := uint64(math.Ln2*(float64(m)/float64(n)) + 0.5)
fmt.Println(m)
fmt.Println(k)
p_true := float64(0)
p_true = math.Pow(1-math.Pow(math.E, -(float64(n)*float64(k))/float64(m)), float64(k))
fmt.Println(p_true < p)
}
type bitMap struct {
Map []uint64
Cap uint64
Size uint64
}
func newBitMap(cap uint64) *bitMap {
return &bitMap{make([]uint64, cap), cap, cap * 64}
}
func (b *bitMap) insert(id uint64) {
b.Map[id/64] |= 1 << (id % 64)
}
func (b *bitMap) isExist(id uint64) bool {
return (b.Map[id/64])&(1<<(id%64)) > 0
}
func (b *bitMap) delete(id uint64) {
b.Map[id/64] &^= 1 << (id % 64)
}
func (b *bitMap) range_() (res []uint64) {
for i := 0; uint64(i) < b.Cap; i++ {
if b.Map[i] > 0 {
for j, bits := 0, b.Map[i]; bits > 0; j, bits = j+1, bits>>1 {
if bits&1 == 1 {
res = append(res, uint64(i)*64+uint64(j))
}
}
}
}
return res
}
func getHash(str string, i uint64) uint64 {
return hash(str) + i*hash1(str)
}
func hash(str string) (res uint64) {
factor := ")k+,p-m~90$#2(*&6q}ev73541]n{fl['?|c"
str = str + factor[:16-(len(str)%16)]
for start, end := 0, len(str)/16; end > 0; start, end = start+16, end-1 {
h0 := uint64(0)
for i := 15; i >= 0; i-- {
h0 += uint64(str[start+i]-byte(i))*uint64(pow(36, i)) ^ uint64(factor[(i+start)%36])
}
h0 *= 0x12345
res += (res >> 30) ^ h0<<34
res += (res >> 32) | (h0 << 32) ^ uint64(start*start*start) ^ uint64(factor[start%36])
res += (res>>16)&(h0<<48) ^ uint64(factor[35-start%36]) ^ uint64(start-end*end)
res += (res >> 17) | (h0 << 47) ^ uint64(start*start)
}
res += 235791113<<32 | 1719232931>>32
return res
}
func hash1(str string) (res uint64) {
factor := ")k+,p-m~90$#2(*&6q}ev73541]n{fl['?|c"
str = str + factor[:16-(len(str)%16)]
for start, end := 0, len(str)/16; end > 0; start, end = start+16, end-1 {
h0 := uint64(0)
for i := 15; i >= 0; i-- {
h0 += uint64(str[start+i]-byte(i))*uint64(pow(36, i)) ^ uint64(factor[(i+start)%36])
}
h0 *= 0x54321
res += (res >> 32) | (h0 << 32) ^ uint64(start*start*start) ^ uint64(factor[start%36])
res += (res>>16)&(h0<<48) ^ uint64(factor[35-start%36]) ^ uint64(start-end*end)
res += (res >> 17) | (h0 << 47) ^ uint64(start*start)
res += (res >> 30) ^ h0<<34
}
res += 1719232931<<32 | 235791113>>32
return res
}
func pow(x, n int) int {
if n == 0 {
return 1
} else {
for (n & 1) == 0 {
n, x = n>>1, x*x
}
}
result := x
n >>= 1
for n != 0 {
x *= x
if n&1 == 1 {
result *= x
}
n >>= 1
}
return result
}