文件名称:set-sketch-paper:SetSketch:填补MinHash和HyperLogLog之间的空白
文件大小:13.12MB
文件格式:ZIP
更新时间:2024-02-26 20:49:12
sketch estimation minhash locality-sensitive-hashing intersection
SetSketch:填补MinHash和HyperLogLog之间的空白 该存储库包含源代码,可重现论文“ SetSketch:填补MinHash和HyperLogLog之间的空白”( )中提出的所有结果和图形。 抽象 MinHash和HyperLogLog是草绘算法,对于大数据应用程序中的集合摘要而言已成为必不可少的算法。 HyperLogLog允许以很小的空间对不同元素进行计数,而MinHash适用于集合的快速比较,因为它可以估计Jaccard相似度和其他联合数量。 这项工作提出了一个名为SetSketch的新数据结构,该结构能够不断填补两个用例之间的空白。 它的可交换和幂等的插入操作以及
【文件预览】:
set-sketch-paper-master
----paper()
--------cardinality.pdf(1.62MB)
--------helper_func_error.pdf(1.29MB)
--------joint(GeneralizedHyperLogLog).pdf(1.3MB)
--------performance.pdf(1.23MB)
--------expected_relative_error.pdf(675KB)
--------joint(SetSketch1).pdf(1.32MB)
--------mse_upperbound_estimation.pdf(1.26MB)
--------joint(SetSketch2).pdf(1.32MB)
--------collision_probability.pdf(787KB)
----.gitmodules(90B)
----README.md(2KB)
----data()
--------joint_test(name=SetSketch1;numRegisters=16384;q=62;base=2.00000000000000000e+00;a=3.00000000000000000e+01;seed=d532d9748fd00ce0;).csv(187KB)
--------performance_test(name=GeneralizedHyperLogLog;numRegisters=256;q=62;base=2.00000000000000000e+00;a=3.90625000000000000e-03;seed=a35fb5263a6b8b0c;).csv(1KB)
--------performance_test(name=SetSketch1;numRegisters=1024;q=62;base=2.00000000000000000e+00;a=3.00000000000000000e+01;seed=6deaea5dae02750d;).csv(1KB)
--------cardinality_test(name=GeneralizedHyperLogLog;numRegisters=1024;q=65534;base=1.00099999999999989e+00;a=9.76562500000000000e-04;seed=01cd2ba5e72869c4;).csv(266KB)
--------performance_test(name=GeneralizedHyperLogLog;numRegisters=4096;q=62;base=2.00000000000000000e+00;a=2.44140625000000000e-04;seed=1d8f5a77f5bbb902;).csv(1KB)
--------performance_test(name=GeneralizedHyperLogLog;numRegisters=4096;q=65534;base=1.00099999999999989e+00;a=2.44140625000000000e-04;seed=ecc818200c3c6ef3;).csv(1KB)
--------performance_test(name=SetSketch1;numRegisters=1024;q=65534;base=1.00099999999999989e+00;a=3.00000000000000000e+01;seed=524c5f4ac61ddad7;).csv(1KB)
--------cardinality_test(name=GeneralizedHyperLogLog;numRegisters=1024;q=62;base=2.00000000000000000e+00;a=9.76562500000000000e-04;seed=14662929af74678a;).csv(266KB)
--------cardinality_test(name=SetSketch1;numRegisters=16384;q=62;base=2.00000000000000000e+00;a=3.00000000000000000e+01;seed=559075ffabc64341;).csv(266KB)
--------cardinality_test(name=SetSketch2;numRegisters=1024;q=65534;base=1.00099999999999989e+00;a=3.00000000000000000e+01;seed=31d2dc0fbf11f576;).csv(266KB)
--------performance_test(name=SetSketch2;numRegisters=256;q=62;base=2.00000000000000000e+00;a=3.00000000000000000e+01;seed=e6a78c5e8aa8a7ac;).csv(1KB)
--------performance_test(name=HyperLogLog;numRegisters=4096;q=52;base=2.00000000000000000e+00;a=2.44140625000000000e-04;seed=0000000000000000;).csv(1KB)
--------performance_test(dummy;).csv(680B)
--------performance_test(name=SetSketch1;numRegisters=4096;q=65534;base=1.00099999999999989e+00;a=3.00000000000000000e+01;seed=923f3c815254dac0;).csv(1KB)
--------cardinality_test(name=GeneralizedHyperLogLog;numRegisters=4096;q=65534;base=1.00099999999999989e+00;a=2.44140625000000000e-04;seed=32ee6041633624f4;).csv(266KB)
--------cardinality_test(name=SetSketch2;numRegisters=256;q=65534;base=1.00099999999999989e+00;a=3.00000000000000000e+01;seed=106826bc0c793764;).csv(266KB)
--------joint_test(name=GeneralizedHyperLogLog;numRegisters=16384;q=65534;base=1.00099999999999989e+00;a=6.10351562500000000e-05;seed=3f0da0e0183cd125;).csv(189KB)
--------performance_test(name=GeneralizedHyperLogLog;numRegisters=16384;q=65534;base=1.00099999999999989e+00;a=6.10351562500000000e-05;seed=ba731718850df005;).csv(1KB)
--------performance_test(name=SetSketch2;numRegisters=1024;q=65534;base=1.00099999999999989e+00;a=3.00000000000000000e+01;seed=6e3c64196517a9ed;).csv(1KB)
--------performance_test(name=SetSketch2;numRegisters=16384;q=65534;base=1.00099999999999989e+00;a=3.00000000000000000e+01;seed=2ea3947092370ade;).csv(1KB)
--------performance_test(name=HyperLogLog;numRegisters=16384;q=50;base=2.00000000000000000e+00;a=6.10351562500000000e-05;seed=0000000000000000;).csv(1KB)
--------joint_test(name=GeneralizedHyperLogLog;numRegisters=16384;q=254;base=1.19999999999999996e+00;a=6.10351562500000000e-05;seed=559075ffabc64341;).csv(188KB)
--------cardinality_test(name=GeneralizedHyperLogLog;numRegisters=256;q=62;base=2.00000000000000000e+00;a=3.90625000000000000e-03;seed=d532d9748fd00ce0;).csv(266KB)
--------cardinality_test(name=SetSketch1;numRegisters=256;q=65534;base=1.00099999999999989e+00;a=3.00000000000000000e+01;seed=c6af5f872e8bb2bc;).csv(266KB)
--------cardinality_test(name=SetSketch2;numRegisters=16384;q=62;base=2.00000000000000000e+00;a=3.00000000000000000e+01;seed=d6960482253fc240;).csv(266KB)
--------performance_test(name=GeneralizedHyperLogLog;numRegisters=256;q=65534;base=1.00099999999999989e+00;a=3.90625000000000000e-03;seed=bd44696fa13ed2b4;).csv(1KB)
--------cardinality_test(name=SetSketch2;numRegisters=4096;q=62;base=2.00000000000000000e+00;a=3.00000000000000000e+01;seed=62fdfc2132af106f;).csv(266KB)
--------cardinality_test(name=SetSketch2;numRegisters=4096;q=65534;base=1.00099999999999989e+00;a=3.00000000000000000e+01;seed=5ba34df0ff814e7a;).csv(266KB)
--------performance_test(name=SetSketch1;numRegisters=4096;q=62;base=2.00000000000000000e+00;a=3.00000000000000000e+01;seed=c02f012f9f81ffd5;).csv(1KB)
--------performance_test(name=HyperLogLog;numRegisters=1024;q=54;base=2.00000000000000000e+00;a=9.76562500000000000e-04;seed=0000000000000000;).csv(1KB)
--------performance_test(name=SetSketch1;numRegisters=256;q=65534;base=1.00099999999999989e+00;a=3.00000000000000000e+01;seed=78461f7915348ba1;).csv(1KB)
--------performance_test(name=HyperLogLog;numRegisters=256;q=56;base=2.00000000000000000e+00;a=3.90625000000000000e-03;seed=0000000000000000;).csv(1KB)
--------performance_test(name=SetSketch1;numRegisters=256;q=62;base=2.00000000000000000e+00;a=3.00000000000000000e+01;seed=63b031c36fcd348d;).csv(1KB)
--------cardinality_test(name=SetSketch1;numRegisters=4096;q=62;base=2.00000000000000000e+00;a=3.00000000000000000e+01;seed=ddcd924a08baac8c;).csv(266KB)
--------cardinality_test(name=GeneralizedHyperLogLog;numRegisters=256;q=65534;base=1.00099999999999989e+00;a=3.90625000000000000e-03;seed=66f86268110b0bfc;).csv(266KB)
--------performance_test(name=SetSketch2;numRegisters=4096;q=62;base=2.00000000000000000e+00;a=3.00000000000000000e+01;seed=8121ebd56b11d982;).csv(1KB)
--------performance_test(name=GeneralizedHyperLogLog;numRegisters=16384;q=62;base=2.00000000000000000e+00;a=6.10351562500000000e-05;seed=e771a67e6a075a79;).csv(1KB)
--------joint_test(name=SetSketch1;numRegisters=16384;q=254;base=1.19999999999999996e+00;a=3.00000000000000000e+01;seed=14662929af74678a;).csv(188KB)
--------cardinality_test(name=SetSketch1;numRegisters=4096;q=65534;base=1.00099999999999989e+00;a=3.00000000000000000e+01;seed=2cc151ec0c9f1e7c;).csv(266KB)
--------cardinality_test(name=SetSketch2;numRegisters=16384;q=65534;base=1.00099999999999989e+00;a=3.00000000000000000e+01;seed=845cbab0654df26a;).csv(266KB)
--------cardinality_test(name=GeneralizedHyperLogLog;numRegisters=4096;q=62;base=2.00000000000000000e+00;a=2.44140625000000000e-04;seed=82a8545b35daf016;).csv(266KB)
--------performance_test(name=SetSketch2;numRegisters=256;q=65534;base=1.00099999999999989e+00;a=3.00000000000000000e+01;seed=3d7088784ba2c9f9;).csv(1KB)
--------performance_test(name=SetSketch1;numRegisters=16384;q=65534;base=1.00099999999999989e+00;a=3.00000000000000000e+01;seed=c06b311001bd8a92;).csv(1KB)
--------joint_test(name=SetSketch2;numRegisters=16384;q=62;base=2.00000000000000000e+00;a=3.00000000000000000e+01;seed=ee135ccdbaf83684;).csv(187KB)
--------cardinality_test(name=SetSketch2;numRegisters=256;q=62;base=2.00000000000000000e+00;a=3.00000000000000000e+01;seed=3f0da0e0183cd125;).csv(266KB)
--------joint_test(name=SetSketch2;numRegisters=16384;q=65534;base=1.00099999999999989e+00;a=3.00000000000000000e+01;seed=525738371a6744a3;).csv(189KB)
--------performance_test(name=SetSketch2;numRegisters=1024;q=62;base=2.00000000000000000e+00;a=3.00000000000000000e+01;seed=759934cdee109cdd;).csv(1KB)
--------performance_test(name=GeneralizedHyperLogLog;numRegisters=1024;q=62;base=2.00000000000000000e+00;a=9.76562500000000000e-04;seed=c45fcbf37f7900da;).csv(1KB)
--------performance_test(name=SetSketch2;numRegisters=16384;q=62;base=2.00000000000000000e+00;a=3.00000000000000000e+01;seed=80ccde04166e17e6;).csv(1KB)
--------cardinality_test(name=GeneralizedHyperLogLog;numRegisters=16384;q=62;base=2.00000000000000000e+00;a=6.10351562500000000e-05;seed=ee135ccdbaf83684;).csv(266KB)
--------joint_test(name=SetSketch2;numRegisters=16384;q=254;base=1.19999999999999996e+00;a=3.00000000000000000e+01;seed=1b297f07fc96869e;).csv(188KB)
--------cardinality_test(name=SetSketch1;numRegisters=1024;q=65534;base=1.00099999999999989e+00;a=3.00000000000000000e+01;seed=0153b464e5066cab;).csv(266KB)
--------performance_test(name=SetSketch2;numRegisters=4096;q=65534;base=1.00099999999999989e+00;a=3.00000000000000000e+01;seed=5aec1299c64ec447;).csv(1KB)
--------joint_test(name=SetSketch1;numRegisters=16384;q=65534;base=1.00099999999999989e+00;a=3.00000000000000000e+01;seed=82a8545b35daf016;).csv(189KB)
--------performance_test(name=SetSketch1;numRegisters=16384;q=62;base=2.00000000000000000e+00;a=3.00000000000000000e+01;seed=3d6aac04d1dec8cf;).csv(1KB)
--------performance_test(name=GeneralizedHyperLogLog;numRegisters=1024;q=65534;base=1.00099999999999989e+00;a=9.76562500000000000e-04;seed=38ed7e2c9017e399;).csv(1KB)
--------cardinality_test(name=SetSketch1;numRegisters=256;q=62;base=2.00000000000000000e+00;a=3.00000000000000000e+01;seed=1b297f07fc96869e;).csv(266KB)
--------cardinality_test(name=SetSketch2;numRegisters=1024;q=62;base=2.00000000000000000e+00;a=3.00000000000000000e+01;seed=1592d0e8b64ff10d;).csv(266KB)
--------cardinality_test(name=SetSketch1;numRegisters=1024;q=62;base=2.00000000000000000e+00;a=3.00000000000000000e+01;seed=525738371a6744a3;).csv(266KB)
--------joint_test(name=GeneralizedHyperLogLog;numRegisters=16384;q=62;base=2.00000000000000000e+00;a=6.10351562500000000e-05;seed=ddcd924a08baac8c;).csv(188KB)
--------cardinality_test(name=GeneralizedHyperLogLog;numRegisters=16384;q=65534;base=1.00099999999999989e+00;a=6.10351562500000000e-05;seed=c2950eb3d4a4c77e;).csv(266KB)
--------cardinality_test(name=SetSketch1;numRegisters=16384;q=65534;base=1.00099999999999989e+00;a=3.00000000000000000e+01;seed=9cc92d7fa9393899;).csv(266KB)
----c++()
--------sketch.hpp(43KB)
--------random_test.cpp(9KB)
--------cardinality_test.cpp(8KB)
--------wyhash()
--------joint_estimation_test.cpp(24KB)
--------bitstream_random.hpp(11KB)
--------exponential_distribution.hpp(17KB)
--------bitstream_test.cpp(2KB)
--------util.hpp(3KB)
--------performance_test.cpp(9KB)
----.gitignore(197B)
----python()
--------collision_probability.py(7KB)
--------performance_charts.py(7KB)
--------expected_relative_error.py(3KB)
--------helper_func_error.py(5KB)
--------cardinality_error_charts.py(9KB)
--------random_test.py(5KB)
--------color_defs.py(2KB)
--------joint_charts.py(13KB)
----build.gradle(19KB)