Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Source file FV_tree.ml
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410(* This file is free software, part of Logtk. See file "license" for more details. *)(** {1 Feature Vector indexing} *)(** Feature Vector indexing (see Schulz 2004) for efficient forward
and backward subsumption *)moduleT=TermmoduleFmt=CCFormattypelabels=Index_intf.labelstypefeature=|Nofint|SofID.Set.t|MofintID.Map.t|Loflabelstypefeature_vector=featureIArray.t(** a vector of feature *)letmk_ni=Niletmk_ss=Ssletmk_mm=Mmletmk_ls=LsmoduleFeature:sigtypet=featureincludeInterfaces.ORDwithtypet:=tincludeInterfaces.EQwithtypet:=tincludeInterfaces.PRINTwithtypet:=tvaladd:t->t->tvalmax:t->t->tvalleq:t->t->boolend=structtypet=featureletppout(f:t):unit=matchfwith|Ni->Fmt.intouti|Ss->Fmt.fprintfout"(@[set@ %a@])"(Fmt.iterID.pp)(ID.Set.to_iters)|Mm->Fmt.fprintfout"(@[mset@ %a@])"Fmt.(iter(pair~sep:silentID.ppint))(ID.Map.to_iterm)|Ll->Fmt.fprintfout"(@[labels@ %a@])"Fmt.(iterint)(Util.Int_set.to_iterl)letto_string=Fmt.to_stringppletcomparef1f2:int=letto_int=functionN_->0|S_->1|M_->2|L_->3inmatchf1,f2with|Ni1,Ni2->CCInt.comparei1i2|Ss1,Ss2->ID.Set.compares1s2|Mm1,Mm2->ID.Map.compareCCInt.comparem1m2|Lm1,Lm2->Util.Int_set.comparem1m2|N_,_|S_,_|M_,_|L_,_->CCInt.compare(to_intf1)(to_intf2)letequalf1f2=comparef1f2=0(* combination of features *)letcombine_~default~opf1f2:feature=matchf1,f2with|Ni1,Ni2->mk_n(i1+i2)|Ss1,Ss2->mk_s(ID.Set.unions1s2)|Mm1,Mm2->ID.Map.merge(fun_o1o2->Some(op(CCOpt.get_or~defaulto1)(CCOpt.get_or~defaulto2)))m1m2|>mk_m|Ls1,Ls2->Util.Int_set.unions1s2|>mk_l|N_,_|S_,_|M_,_|L_,_->assertfalseletadd=combine_~default:0~op:(+)letmax=combine_~default:0~op:maxletleq(f1:t)(f2:t):bool=matchf1,f2with|Ni1,Ni2->i1<=i2|Ss1,Ss2->ID.Set.subsets1s2|Mm1,Mm2->ID.Map.for_all(funki1->i1<=ID.Map.get_or~default:~-1km2)m1|Ls1,Ls2->Util.Int_set.subsets1s2|N_,_|S_,_|M_,_|L_,_->assertfalseend(** {2 Features} *)moduleMake(C:Index_intf.CLAUSE)=structmoduleC=CmoduleFeature_fun=structtypet={name:string;f:Index_intf.lits->Index_intf.labels->feature;}(** a function that computes a given feature on clauses *)letcomputefc=f.f(C.to_litsc)(C.labelsc)letnamef=f.nameletppoutf=Fmt.stringoutf.nameletto_string=Fmt.to_stringppletmakenamef={name;f}letsize_plus=make"size+"(funlits_->Iter.filterSLiteral.is_poslits|>Iter.length|>mk_n)letsize_minus=make"size-"(funlits_->Iter.filterSLiteral.is_neglits|>Iter.length|>mk_n)letweight_litlit=SLiteral.to_iterlit|>Iter.mapT.ho_weight|>Iter.fold(+)0letweight_namefilter=makename(funlits_->Iter.filterfilterlits|>Iter.mapweight_lit|>Iter.fold(+)0|>mk_n)letweight_plus=weight_"weight+"SLiteral.is_posletweight_minus=weight_"weight-"SLiteral.is_negletlabels=make"labels"(fun_labels->mk_llabels)letnot_app_vart=not(T.is_app_vart)(* sequence of symbols of clause, of given sign *)letsymbols_filterlits:ID.tIter.t=lits|>Iter.filterfilter|>Iter.flat_mapSLiteral.to_iter|>Iter.flat_map(T.Seq.symbols~filter_term:not_app_var)letset_sym_filterlits_=symbols_filterlits|>ID.Set.of_iter|>mk_sletset_sym_plus=make"set_symb+"(set_sym_SLiteral.is_pos)letset_sym_minus=make"set_symb-"(set_sym_SLiteral.is_neg)letmultiset_sym_filterlits_=symbols_filterlits|>Iter.fold(funmid->letn=ID.Map.get_or~default:0idminID.Map.addid(n+1)m)ID.Map.empty|>mk_mletmultiset_sym_plus=make"mset_symb+"(multiset_sym_SLiteral.is_pos)letmultiset_sym_minus=make"mset_symb-"(multiset_sym_SLiteral.is_neg)(* sequence of symbols of clause with their depth *)letsymbols_depth_filterlits:(ID.t*int)Iter.t=(* ignores app vars and does not count opening the functions *)letsubterms_depthtk=letrecrecursedeptht=ifnot(T.is_app_vart)then(k(t,depth);matchT.viewtwith|Const_|DB_|Var_->()|Fun(_,u)->recursedepthu(* not increasing the depth when we go under a fun
-- no need to eta expand *)|AppBuiltin(_,l)->List.iter(recurse(depth+1))l|App(hd,l)->recursedepthhd;List.iter(recurse(depth+1))l)inrecurse0tinlits|>Iter.filterfilter|>Iter.flat_mapSLiteral.to_iter|>Iter.flat_mapsubterms_depth|>Iter.filter_map(fun(t,d)->matchT.viewtwith|T.Constid->Some(id,d)|_->None)letdepth_sym_filterlits_=symbols_depth_filterlits|>Iter.fold(funm(id,d)->letd'=ID.Map.get_or~default:0idminID.Map.addid(maxdd')m)ID.Map.empty|>mk_mletdepth_sym_plus=make"depth_sym+"(depth_sym_SLiteral.is_pos)letdepth_sym_minus=make"depth_sym-"(depth_sym_SLiteral.is_neg)endtypefeature_funs=Feature_fun.tIArray.tletcompute_fvfunslitslabels:feature_vector=IArray.map(funfeat->feat.Feature_fun.flitslabels)funsletcompute_fv_cfunsc:feature_vector=compute_fvfuns(C.to_litsc)(C.labelsc)(** {2 Feature Trie} *)moduleFeat_map=CCMap.Make(Feature)moduleC_set=Set.Make(C)(* TODO: replace intmap by RAL? or simply a list? or dynamic array *)typetrie=|TrieNodeoftrieFeat_map.t(** map feature -> trie *)|TrieLeafofC_set.t(** leaf with a set of clauses *)letempty_trien=matchnwith|TrieNodemwhenFeat_map.is_emptym->true|TrieLeafsetwhenC_set.is_emptyset->true|_->false(* get/add/remove the leaf for the given list of ints. The
continuation k takes the leaf, and returns a leaf
that replaces the old leaf.
This function returns the new trie. *)letgoto_leaf(trie:trie)(fv:feature_vector)k=(* the root of the tree *)letroot=triein(* function to go to the given leaf, building it if needed *)letrecgototrieirebuild=ifi=IArray.lengthfvthenmatchtriewith|(TrieLeafset)asleaf->(* found leaf *)beginmatchksetwith|new_leafwhenleaf==new_leaf->root(* no change, return same tree *)|new_leaf->rebuildnew_leaf(* replace by new leaf *)end|TrieNode_->assertfalseelsematchtrie,IArray.getfviwith|TrieNodem,c->begintry(* insert in subtrie *)letsubtrie=Feat_map.findcminletrebuild'subtrie=matchsubtriewith|_whenempty_triesubtrie->rebuild(TrieNode(Feat_map.removecm))|_->rebuild(TrieNode(Feat_map.addcsubtriem))ingotosubtrie(i+1)rebuild'withNot_found->(* no subtrie found *)letsubtrie=ifi+1=IArray.lengthfvthenTrieLeafC_set.emptyelseTrieNodeFeat_map.emptyandrebuild'subtrie=matchsubtriewith|_whenempty_triesubtrie->rebuild(TrieNode(Feat_map.removecm))|_->rebuild(TrieNode(Feat_map.addcsubtriem))ingotosubtrie(i+1)rebuild'end|TrieLeaf_,_->assertfalse(* wrong arity *)ingototrie0(funt->t)(** {2 Subsumption Index} *)typet={trie:trie;funs:feature_funs;}letempty_withfuns={trie=TrieNodeFeat_map.empty;funs;}letname="feature_vector_idx"letfeature_funsidx=idx.funs(* NOTE: order of features matter. Put first
the ones with coarse discrimination and cheap features (integers),
to prune early and to ensure some sharing. Very accurate features
should come last, where most instances have been pruned already. *)letdefault_feature_funs:feature_funs=IArray.of_list[Feature_fun.size_plus;Feature_fun.size_minus;Feature_fun.weight_plus;Feature_fun.weight_minus;Feature_fun.labels;Feature_fun.set_sym_plus;Feature_fun.set_sym_minus;Feature_fun.depth_sym_plus;Feature_fun.depth_sym_minus;Feature_fun.multiset_sym_plus;Feature_fun.multiset_sym_minus;]letempty()=empty_withdefault_feature_funsletaddidxc=(* feature vector of [c] *)letfv=compute_fv_cidx.funscin(* insertion *)letkset=TrieLeaf(C_set.addcset)inlettrie'=goto_leafidx.triefvkin{idxwithtrie=trie';}letadd_seq=Iter.foldaddletadd_list=List.fold_leftaddletremoveidxc=letfv=compute_fv_cidx.funscin(* remove [c] from the trie *)letkset=TrieLeaf(C_set.removecset)inlettrie'=goto_leafidx.triefvkin{idxwithtrie=trie';}letremove_seqidxseq=Iter.foldremoveidxseq(* retrieve all clauses which have a feature vector [fv] such that
[check (compute c).(i) fv.(i) = true] *)letretrieve_~checkidxlitslabelsf:unit=(* feature vector of the hc *)letfv=compute_fvidx.funslitslabelsinletrecfold_higherinode=ifi=IArray.lengthfvthenmatchnodewith|TrieLeafset->C_set.iterfset|TrieNode_->assertfalseelsematchnode,IArray.getfviwith|TrieNodemap,feat->Feat_map.iter(funfeat'subnode->ifcheck~feat_query:feat~feat_tree:feat'thenfold_higher(i+1)subnode(* go in the branch *)else())map|TrieLeaf_,_->assertfalseinfold_higher0idx.trie(* clauses that subsume (potentially) the given clause *)letretrieve_subsumingidxlitslabelsf:unit=retrieve_idxlitslabelsf~check:(fun~feat_query~feat_tree->Feature.leqfeat_treefeat_query)(* clauses that are subsumed (potentially) by the given clause *)letretrieve_subsumedidxlitslabelsf:unit=retrieve_idxlitslabelsf~check:(fun~feat_query~feat_tree->Feature.leqfeat_queryfeat_tree)(* clauses that are potentially alpha-equivalent to the given clause*)letretrieve_alpha_equividxlitslabelsf:unit=retrieve_idxlitslabelsf~check:(fun~feat_query~feat_tree->Feature.equalfeat_queryfeat_tree)letretrieve_subsuming_cidxcf=retrieve_subsumingidx(C.to_litsc)(C.labelsc)fletretrieve_subsumed_cidxcf=retrieve_subsumedidx(C.to_litsc)(C.labelsc)fletretrieve_alpha_equiv_cidxcf=retrieve_alpha_equividx(C.to_litsc)(C.labelsc)fletiteridxf=letreciter=function|TrieLeafset->C_set.iterfset|TrieNodemap->Feat_map.iter(fun_t'->itert')mapiniteridx.trieletfoldfaccidx=letrecfoldacc=function|TrieLeafset->C_set.fold(funxacc->faccx)setacc|TrieNodemap->Feat_map.fold(fun_t'acc->foldacct')mapaccinfoldaccidx.trieend