Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Source file bloom.ml
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162(* A counted Bloom filter *)moduleA=BatArraymoduleFp=FingerprintmoduleL=BatListmoduleLog=Dolog.Logtypet=intarrayarray(* input feature index (0..N-1) to output feature
indexes mapping (0..M-1) *)letdistinct_randsrngnbound=letrecloopacccount=ifcount=nthenaccelseletcand=Random.State.intrngboundinifList.memcandaccthenloopacccount(* retry *)elseloop(cand::acc)(count+1)inloop[]0(* n: input vector dimension
k: number of "hash" functions;
number of output features "turned ON" by a single input feature
m: output vector dimension *)letinitnkm=letres=Array.make_matrixnk0inletrng=Random.State.make[|3141596|]infori=0ton-1doletrands=distinct_randsrngkminL.iteri(funjrand->res.(i).(j)<-rand)randsdone;(* log the number of collisions
(different input features mapping to the same set of output features *)letcollisions=ref0inletsorted=A.copyresinA.sortcomparesorted;fori=1ton-1doifsorted.(i-1)=sorted.(i)thenincrcollisions;done;(if!collisions>0thenLog.warn"Bloom.init(%d,%d,%d): %d collisions"nkm!collisions);(n,k,m,res)letencode(_n,k,m,mappings)fp=letkvs=Fp.key_value_pairsfpin(* sparse input vector *)letres=A.createm0in(* dense output vector *)L.iter(fun(key,value)->letoutput_indexes=mappings.(key)in(* increment all corresponding output features *)fori=0tok-1doletj=output_indexes.(i)inres.(j)<-res.(j)+valuedone)kvs;res