文件名称:SetSketch:填补MinHash和HyperLogLog之间的空白-C/C++开发
文件大小:27.21MB
文件格式:ZIP
更新时间:2024-06-15 20:08:05
C/C++ Miscellaneous
SetSketch:填补MinHash和HyperLogLog之间的空白SetSketch:填补MinHash和HyperLogLog之间的空白该存储库包含源代码,以重现论文“ SetSketch:填补MinHash和HyperLogLog之间的空白”中的所有结果和图形(arXiv预印本) 。 摘要MinHash和HyperLogLog是草绘算法,对于大数据应用程序中的集合摘要而言已成为必不可少的算法。 HyperLogLog允许以很小的空间计算不同的元素,而MinHash适合进行快速比较
【文件预览】:
set-sketch-paper-master
----.gitignore(211B)
----build.gradle(23KB)
----data()
--------performance_test(name=GeneralizedHyperLogLog;numRegisters=256;q=62;base=2.00000000000000000e+00;a=3.90625000000000000e-03;registerStateType=registers;aggregationMode=stream;).csv(1KB)
--------performance_test(name=SetSketch1;numRegisters=256;q=62;base=1.00099999999999989e+00;a=2.00000000000000000e+01;registerStateType=registers with lower bound;bulkAddFirstAttemptSuccessProbability=9.49999999999999956e-01;aggregationMode=bulk;).csv(2KB)
--------joint_test(name=SetSketch2;numRegisters=4096;q=65534;base=1.00099999999999989e+00;a=2.00000000000000000e+01;registerStateType=registers with lower bound;bulkAddFirstAttemptSuccessProbability=9.49999999999999956e-01;).csv(614KB)
--------performance_test(name=SetSketch2;numRegisters=256;q=62;base=1.00099999999999989e+00;a=2.00000000000000000e+01;registerStateType=registers with lower bound;bulkAddFirstAttemptSuccessProbability=9.49999999999999956e-01;aggregationMode=bulk;).csv(2KB)
--------joint_test(name=SetSketch1;numRegisters=4096;q=62;base=2.00000000000000000e+00;a=2.00000000000000000e+01;registerStateType=registers with lower bound;bulkAddFirstAttemptSuccessProbability=9.49999999999999956e-01;).csv(611KB)
--------cardinality_test(name=SetSketch2;numRegisters=256;q=65534;base=1.00099999999999989e+00;a=2.00000000000000000e+01;registerStateType=registers with lower bound;bulkAddFirstAttemptSuccessProbability=9.49999999999999956e-01;).csv(267KB)
--------performance_test(name=SetSketch2;numRegisters=4096;q=62;base=2.00000000000000000e+00;a=2.00000000000000000e+01;registerStateType=registers with lower bound;bulkAddFirstAttemptSuccessProbability=9.49999999999999956e-01;aggregationMode=bulk;).csv(2KB)
--------performance_test(name=GeneralizedHyperLogLog;numRegisters=4096;q=62;base=2.00000000000000000e+00;a=2.44140625000000000e-04;registerStateType=registers with lower bound;aggregationMode=stream;).csv(2KB)
--------performance_test(name=HyperLogLog;numRegisters=4096;q=52;base=2.00000000000000000e+00;a=2.44140625000000000e-04;registerStateType=registers with lower bound;aggregationMode=stream;).csv(1KB)
--------performance_test(name=GeneralizedHyperLogLog;numRegisters=4096;q=62;base=1.00099999999999989e+00;a=2.44140625000000000e-04;registerStateType=registers;aggregationMode=stream;).csv(1KB)
--------performance_test(name=GeneralizedHyperLogLog;numRegisters=4096;q=62;base=2.00000000000000000e+00;a=2.44140625000000000e-04;registerStateType=registers;aggregationMode=stream;).csv(1KB)
--------performance_test(name=SetSketch1;numRegisters=4096;q=62;base=2.00000000000000000e+00;a=2.00000000000000000e+01;registerStateType=registers with lower bound;bulkAddFirstAttemptSuccessProbability=9.49999999999999956e-01;aggregationMode=bulk;).csv(2KB)
--------joint_test(name=SetSketch1;numRegisters=4096;q=254;base=1.19999999999999996e+00;a=2.00000000000000000e+01;registerStateType=registers with lower bound;bulkAddFirstAttemptSuccessProbability=9.49999999999999956e-01;).csv(611KB)
--------cardinality_test(name=GeneralizedHyperLogLog;numRegisters=4096;q=62;base=2.00000000000000000e+00;a=2.44140625000000000e-04;registerStateType=registers with lower bound;).csv(267KB)
--------performance_test(name=SetSketch1;numRegisters=256;q=62;base=1.00099999999999989e+00;a=2.00000000000000000e+01;registerStateType=registers with lower bound;bulkAddFirstAttemptSuccessProbability=9.49999999999999956e-01;aggregationMode=stream;).csv(2KB)
--------performance_test(name=SetSketch1;numRegisters=4096;q=62;base=1.00099999999999989e+00;a=2.00000000000000000e+01;registerStateType=registers with lower bound;bulkAddFirstAttemptSuccessProbability=9.49999999999999956e-01;aggregationMode=stream;).csv(2KB)
--------performance_test(name=SetSketch1;numRegisters=256;q=62;base=2.00000000000000000e+00;a=2.00000000000000000e+01;registerStateType=registers with lower bound;bulkAddFirstAttemptSuccessProbability=9.49999999999999956e-01;aggregationMode=stream;).csv(2KB)
--------performance_test(name=HyperLogLog;numRegisters=4096;q=52;base=2.00000000000000000e+00;a=2.44140625000000000e-04;registerStateType=registers;aggregationMode=stream;).csv(1KB)
--------performance_test(name=SetSketch1;numRegisters=256;q=62;base=2.00000000000000000e+00;a=2.00000000000000000e+01;registerStateType=registers with lower bound;bulkAddFirstAttemptSuccessProbability=9.49999999999999956e-01;aggregationMode=bulk;).csv(2KB)
--------performance_test(name=MinHash;base=1.00000000000000000e+00;numRegisters=4096;aggregationMode=stream;).csv(1KB)
--------performance_test(name=GeneralizedHyperLogLog;numRegisters=256;q=62;base=1.00099999999999989e+00;a=3.90625000000000000e-03;registerStateType=registers with lower bound;aggregationMode=stream;).csv(2KB)
--------joint_test(name=SetSketch2;numRegisters=4096;q=254;base=1.19999999999999996e+00;a=2.00000000000000000e+01;registerStateType=registers with lower bound;bulkAddFirstAttemptSuccessProbability=9.49999999999999956e-01;).csv(612KB)
--------cardinality_test(name=SetSketch1;numRegisters=256;q=62;base=2.00000000000000000e+00;a=2.00000000000000000e+01;registerStateType=registers with lower bound;bulkAddFirstAttemptSuccessProbability=9.49999999999999956e-01;).csv(267KB)
--------cardinality_test(name=SetSketch1;numRegisters=4096;q=62;base=2.00000000000000000e+00;a=2.00000000000000000e+01;registerStateType=registers with lower bound;bulkAddFirstAttemptSuccessProbability=9.49999999999999956e-01;).csv(267KB)
--------joint_test(name=MinHash;base=1.00000000000000000e+00;numRegisters=4096;).csv(1MB)
--------performance_test(name=GeneralizedHyperLogLog;numRegisters=4096;q=62;base=1.00099999999999989e+00;a=2.44140625000000000e-04;registerStateType=registers with lower bound;aggregationMode=stream;).csv(2KB)
--------cardinality_test(name=SetSketch2;numRegisters=256;q=62;base=2.00000000000000000e+00;a=2.00000000000000000e+01;registerStateType=registers with lower bound;bulkAddFirstAttemptSuccessProbability=9.49999999999999956e-01;).csv(267KB)
--------performance_test(name=SetSketch1;numRegisters=4096;q=62;base=1.00099999999999989e+00;a=2.00000000000000000e+01;registerStateType=registers with lower bound;bulkAddFirstAttemptSuccessProbability=9.49999999999999956e-01;aggregationMode=bulk;).csv(2KB)
--------joint_test(name=GeneralizedHyperLogLog;numRegisters=4096;q=65534;base=1.00099999999999989e+00;a=2.44140625000000000e-04;registerStateType=registers with lower bound;).csv(611KB)
--------performance_test(name=MinHash;base=1.00000000000000000e+00;numRegisters=256;aggregationMode=stream;).csv(1KB)
--------performance_test(name=SetSketch2;numRegisters=256;q=62;base=1.00099999999999989e+00;a=2.00000000000000000e+01;registerStateType=registers with lower bound;bulkAddFirstAttemptSuccessProbability=9.49999999999999956e-01;aggregationMode=stream;).csv(2KB)
--------performance_test(name=GeneralizedHyperLogLog;numRegisters=256;q=62;base=2.00000000000000000e+00;a=3.90625000000000000e-03;registerStateType=registers with lower bound;aggregationMode=stream;).csv(2KB)
--------performance_test(name=SetSketch2;numRegisters=256;q=62;base=2.00000000000000000e+00;a=2.00000000000000000e+01;registerStateType=registers with lower bound;bulkAddFirstAttemptSuccessProbability=9.49999999999999956e-01;aggregationMode=bulk;).csv(2KB)
--------cardinality_test(name=SetSketch2;numRegisters=4096;q=65534;base=1.00099999999999989e+00;a=2.00000000000000000e+01;registerStateType=registers with lower bound;bulkAddFirstAttemptSuccessProbability=9.49999999999999956e-01;).csv(267KB)
--------joint_test(name=SetSketch2;numRegisters=4096;q=62;base=2.00000000000000000e+00;a=2.00000000000000000e+01;registerStateType=registers with lower bound;bulkAddFirstAttemptSuccessProbability=9.49999999999999956e-01;).csv(610KB)
--------performance_test(name=GeneralizedHyperLogLog;numRegisters=256;q=62;base=1.00099999999999989e+00;a=3.90625000000000000e-03;registerStateType=registers;aggregationMode=stream;).csv(2KB)
--------performance_test(name=SetSketch2;numRegisters=4096;q=62;base=1.00099999999999989e+00;a=2.00000000000000000e+01;registerStateType=registers with lower bound;bulkAddFirstAttemptSuccessProbability=9.49999999999999956e-01;aggregationMode=bulk;).csv(2KB)
--------cardinality_test(name=SetSketch1;numRegisters=256;q=65534;base=1.00099999999999989e+00;a=2.00000000000000000e+01;registerStateType=registers with lower bound;bulkAddFirstAttemptSuccessProbability=9.49999999999999956e-01;).csv(267KB)
--------performance_test(name=HyperLogLog;numRegisters=256;q=56;base=2.00000000000000000e+00;a=3.90625000000000000e-03;registerStateType=registers with lower bound;aggregationMode=stream;).csv(1KB)
--------joint_test(name=GeneralizedHyperLogLog;numRegisters=4096;q=254;base=1.19999999999999996e+00;a=2.44140625000000000e-04;registerStateType=registers with lower bound;).csv(610KB)
--------performance_test(name=SetSketch1;numRegisters=4096;q=62;base=2.00000000000000000e+00;a=2.00000000000000000e+01;registerStateType=registers with lower bound;bulkAddFirstAttemptSuccessProbability=9.49999999999999956e-01;aggregationMode=stream;).csv(2KB)
--------cardinality_test(name=SetSketch1;numRegisters=4096;q=65534;base=1.00099999999999989e+00;a=2.00000000000000000e+01;registerStateType=registers with lower bound;bulkAddFirstAttemptSuccessProbability=9.49999999999999956e-01;).csv(267KB)
--------performance_test(name=HyperLogLog;numRegisters=256;q=56;base=2.00000000000000000e+00;a=3.90625000000000000e-03;registerStateType=registers;aggregationMode=stream;).csv(1KB)
--------joint_test(name=SetSketch1;numRegisters=4096;q=65534;base=1.00099999999999989e+00;a=2.00000000000000000e+01;registerStateType=registers with lower bound;bulkAddFirstAttemptSuccessProbability=9.49999999999999956e-01;).csv(614KB)
--------cardinality_test(name=GeneralizedHyperLogLog;numRegisters=256;q=62;base=2.00000000000000000e+00;a=3.90625000000000000e-03;registerStateType=registers with lower bound;).csv(267KB)
--------performance_test(name=SetSketch2;numRegisters=256;q=62;base=2.00000000000000000e+00;a=2.00000000000000000e+01;registerStateType=registers with lower bound;bulkAddFirstAttemptSuccessProbability=9.49999999999999956e-01;aggregationMode=stream;).csv(2KB)
--------joint_test(name=HyperMinHash;numRegisters=4096;base=1.00067713069306641e+00;bucketBits=12;bucketSize=6;subBucketSize=10;registerStateType=registers;).csv(728KB)
--------cardinality_test(name=SetSketch2;numRegisters=4096;q=62;base=2.00000000000000000e+00;a=2.00000000000000000e+01;registerStateType=registers with lower bound;bulkAddFirstAttemptSuccessProbability=9.49999999999999956e-01;).csv(267KB)
--------performance_test(name=SetSketch2;numRegisters=4096;q=62;base=2.00000000000000000e+00;a=2.00000000000000000e+01;registerStateType=registers with lower bound;bulkAddFirstAttemptSuccessProbability=9.49999999999999956e-01;aggregationMode=stream;).csv(2KB)
--------joint_test(name=GeneralizedHyperLogLog;numRegisters=4096;q=62;base=2.00000000000000000e+00;a=2.44140625000000000e-04;registerStateType=registers with lower bound;).csv(609KB)
--------performance_test(name=SetSketch2;numRegisters=4096;q=62;base=1.00099999999999989e+00;a=2.00000000000000000e+01;registerStateType=registers with lower bound;bulkAddFirstAttemptSuccessProbability=9.49999999999999956e-01;aggregationMode=stream;).csv(2KB)
--------performance_test(dummy;aggregationMode=stream;).csv(1KB)
--------cardinality_test(name=GeneralizedHyperLogLog;numRegisters=256;q=65534;base=1.00099999999999989e+00;a=3.90625000000000000e-03;registerStateType=registers with lower bound;).csv(267KB)
--------cardinality_test(name=GeneralizedHyperLogLog;numRegisters=4096;q=65534;base=1.00099999999999989e+00;a=2.44140625000000000e-04;registerStateType=registers with lower bound;).csv(267KB)
----.gitmodules(90B)
----paper()
--------probability_densities.pdf(1.23MB)
--------helper_func_error.pdf(1.23MB)
--------joint_MinHash_1000_1_0.pdf(1.3MB)
--------joint_SetSketch2_1000000.pdf(1.32MB)
--------joint_SetSketch2_1000.pdf(1.32MB)
--------joint_HyperMinHash_1000_1_0006771306930664.pdf(1.3MB)
--------collision_probability.pdf(805KB)
--------cardinality_ml.pdf(1.34MB)
--------joint_SetSketch1_1000000.pdf(1.32MB)
--------mse_upperbound_estimation.pdf(1.26MB)
--------cardinality_simple.pdf(1.34MB)
--------joint_SetSketch1_1000.pdf(1.32MB)
--------theoretical_variance.pdf(783KB)
--------performance.pdf(1.23MB)
--------joint_GeneralizedHyperLogLog_1000000.pdf(1.32MB)
--------joint_GeneralizedHyperLogLog_1000.pdf(1.31MB)
--------expected_relative_error.pdf(675KB)
--------joint_MinHash_1000000_1_0.pdf(1.3MB)
--------joint_HyperMinHash_1000000_1_0006771306930664.pdf(1.3MB)
----README.md(2KB)
----python()
--------joint_charts.py(15KB)
--------performance_charts.py(9KB)
--------theoretical_variance.py(4KB)
--------cardinality_error_charts.py(11KB)
--------color_defs.py(2KB)
--------probability_densities.py(4KB)
--------expected_relative_error.py(3KB)
--------collision_probability.py(7KB)
--------random_test.py(5KB)
--------helper_func_error.py(4KB)
----c++()
--------performance_test.cpp(10KB)
--------sketch.hpp(78KB)
--------bitstream_random.hpp(11KB)
--------util.hpp(3KB)
--------exponential_distribution.hpp(17KB)
--------joint_estimation_test.cpp(18KB)
--------bitstream_test.cpp(2KB)
--------wyhash()
--------cardinality_test.cpp(6KB)
--------bulk_update_test.cpp(5KB)
--------random_test.cpp(9KB)