Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Source file FeatureVector.ml
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342(* This file is free software, part of Logtk. See file "license" for more details. *)(** {1 Feature Vector indexing} *)(** Feature Vector indexing (see Schulz 2004) for efficient forward
and backward subsumption *)moduleT=Termtypelits=Index_intf.litsmoduleMake(C:Index.CLAUSE)=structmoduleC=C(* TODO use array? *)typefeature_vector=intlist(** a vector of feature *)(** {2 Features} *)moduleFeature=structtypet={name:string;f:lits->int;}(** a function that computes a given feature on clauses *)letcomputefc=f.fcletnamef=f.nameletppoutf=CCFormat.stringoutf.nameletto_string=CCFormat.to_stringppletsize_plus={name="size+";f=(funlits->Iter.filterSLiteral.is_poslits|>Iter.length);}letsize_minus={name="size-";f=(funlits->Iter.filterSLiteral.is_neglits|>Iter.length);}letrec_depth_termdeptht=letpref,t=T.open_funtinletdepth=depth+(List.lengthpref)inmatchT.viewtwith|T.Var_|T.Const_|T.DB_->depth|T.Fun_->assertfalse(* we just opened a function*)|T.AppBuiltin(_,l)|T.App(_,l)->ifCCList.is_emptyl||T.is_var(T.head_termt)thendepth(* for logical operators*)else(letmax_arg_depth=Iter.max_exn(Iter.map(_depth_termdepth)(Iter.of_listl))inmax_arg_depth+1)(* sum of depths at which symbols occur. Eg f(a, g(b)) will yield 4 (f
is at depth 0) *)letsum_of_depths={name="sum_of_depths";f=(funlits->Iter.fold(funacclit->SLiteral.fold(funacct->acc+_depth_term0t)acclit)0lits);}let_select_sign~signlits=lits|>Iter.filter(funl->SLiteral.signl=sign)(* sequence of symbols of clause, of given sign *)let_symbols~signlits=letfilter_termt=not(T.is_app_vart)in_select_sign~signlits|>Iter.flat_mapSLiteral.to_iter|>Iter.flat_map(T.Seq.symbols~filter_term)letcount_symb_plussymb={name=CCFormat.sprintf"count+(%a)"ID.ppsymb;f=(funlits->Iter.length(_symbols~sign:truelits));}letcount_symb_minussymb={name=CCFormat.sprintf"count-(%a)"ID.ppsymb;f=(funlits->Iter.length(_symbols~sign:falselits));}(* max depth of the symbol in the term, or -1 *)letmax_depth_termsymbt=letsymbs_depths=T.Seq.subterms_deptht|>Iter.filter_map(fun(t,depth)->matchT.Classic.viewtwith|T.Classic.App(s,_)whenID.equalssymb->Somedepth|_->None)inmatchIter.maxsymbs_depthswith|None->0|Somem->mlet_max_depth_lits~signsymblits=Iter.fold(fundepthlit->ifsign=SLiteral.signlitthenSLiteral.fold(fundeptht->maxdepth(max_depth_termsymbt))depthlitelsedepth)0litsletmax_depth_plussymb={name=CCFormat.sprintf"max_depth+(%a)"ID.ppsymb;f=(_max_depth_lits~sign:truesymb);}letmax_depth_minussymb={name=CCFormat.sprintf"max_depth-(%a)"ID.ppsymb;f=(_max_depth_lits~sign:falsesymb);}endletcompute_fvfeatureslits=List.map(funfeat->feat.Feature.flits)features(** {2 Feature Trie} *)moduleIntMap=Map.Make(CCInt)moduleCSet=Set.Make(structtypet=C.tletcompare=C.compareend)(* TODO: replace intmap by RAL? or simply a list? or dynamic array *)typetrie=|TrieNodeoftrieIntMap.t(** map feature -> trie *)|TrieLeafofCSet.t(** leaf with a set of clauses *)letempty_trien=matchnwith|TrieNodemwhenIntMap.is_emptym->true|TrieLeafsetwhenCSet.is_emptyset->true|_->false(** get/add/remove the leaf for the given list of ints. The
continuation k takes the leaf, and returns a leaf
that replaces the old leaf.
This function returns the new trie. *)letgoto_leaftrietk=(* the root of the tree *)letroot=triein(* function to go to the given leaf, building it if needed *)letrecgototrietrebuild=matchtrie,twith|(TrieLeafset)asleaf,[]->(* found leaf *)(matchksetwith|new_leafwhenleaf==new_leaf->root(* no change, return same tree *)|new_leaf->rebuildnew_leaf)(* replace by new leaf *)|TrieNodem,c::t'->(try(* insert in subtrie *)letsubtrie=IntMap.findcminletrebuild'subtrie=matchsubtriewith|_whenempty_triesubtrie->rebuild(TrieNode(IntMap.removecm))|_->rebuild(TrieNode(IntMap.addcsubtriem))ingotosubtriet'rebuild'withNot_found->(* no subtrie found *)letsubtrie=ift'=[]thenTrieLeafCSet.emptyelseTrieNodeIntMap.emptyandrebuild'subtrie=matchsubtriewith|_whenempty_triesubtrie->rebuild(TrieNode(IntMap.removecm))|_->rebuild(TrieNode(IntMap.addcsubtriem))ingotosubtriet'rebuild')|TrieNode_,[]->assertfalse(* ill-formed term *)|TrieLeaf_,_->assertfalse(* wrong arity *)ingototriet(funt->t)(** {2 Subsumption Index} *)typet={trie:trie;features:Feature.tlist;}letempty_withfeatures={trie=TrieNodeIntMap.empty;features;}letname="feature_vector_idx"letfeaturesidx=idx.featuresletdefault_features=[Feature.size_plus;Feature.size_minus;Feature.sum_of_depths](** maximam number of features in addition to basic ones *)letmax_features=25letfeatures_of_signature?(ignore=fun_->false)sigma=(* list of (salience: float, feature) *)letfeatures=ref[]in(* create features for the symbols *)Signature.itersigma(funs(ty,_)->ifignoresthen()(* base symbols don't count *)elseletarity=matchType.aritytywith|Type.NoArity->0|Type.Arity(_,i)->iinifType.equaltyType.TPTP.othenfeatures:=[1+arity,Feature.count_symb_pluss;1+arity,Feature.count_symb_minuss]@!featureselsefeatures:=[0,Feature.max_depth_pluss;0,Feature.max_depth_minuss;1+arity,Feature.count_symb_pluss;1+arity,Feature.count_symb_minuss]@!features);(* only take a limited number of features *)letfeatures=List.sort(fun(s1,_)(s2,_)->s2-s1)!featuresinletfeatures=CCList.takemax_featuresfeaturesinletfeatures=List.map(fun(_,f)->f)featuresinletfeatures=default_features@featuresinUtil.debugf2"FV features: [%a]"(funk->k(CCFormat.listFeature.pp)features);featuresletof_signaturesignature=letfeatures=features_of_signaturesignatureinempty_withfeaturesletempty()=empty_withdefault_featuresletaddidxc=(* feature vector of [c] *)letfv=compute_fvidx.features(C.to_litsc)in(* insertion *)letkset=TrieLeaf(CSet.addcset)inlettrie'=goto_leafidx.triefvkin{idxwithtrie=trie';}letadd_seq=Iter.foldaddletadd_list=List.fold_leftaddletremoveidxc=letfv=compute_fvidx.features(C.to_litsc)in(* remove [c] from the trie *)letkset=TrieLeaf(CSet.removecset)inlettrie'=goto_leafidx.triefvkin{idxwithtrie=trie';}letremove_seqidxseq=Iter.foldremoveidxseq(* clauses that subsume (potentially) the given clause *)letretrieve_subsumingidxlits_f=(* feature vector of [c] *)letfv=compute_fvidx.featureslitsinletrecfold_lowerfvnode=matchfv,nodewith|[],TrieLeafset->CSet.iterfset|i::fv',TrieNodemap->IntMap.iter(funjsubnode->ifj<=ithenfold_lowerfv'subnode(* go in the branch *)else())map|_->failwith"number of features in feature vector changed"infold_lowerfvidx.trie(** clauses that are subsumed (potentially) by the given clause *)letretrieve_subsumedidxlits_f=(* feature vector of the hc *)letfv=compute_fvidx.featureslitsinletrecfold_higherfvnode=matchfv,nodewith|[],TrieLeafset->CSet.iterfset|i::fv',TrieNodemap->IntMap.iter(funjsubnode->ifj>=ithenfold_higherfv'subnode(* go in the branch *)else())map|_->failwith"number of features in feature vector changed"infold_higherfvidx.trie(** clauses that are potentially alpha-equivalent to the given clause*)letretrieve_alpha_equividxlits_f=(* feature vector of the hc *)letfv=compute_fvidx.featureslitsinletrecfold_higherfvnode=matchfv,nodewith|[],TrieLeafset->CSet.iterfset|i::fv',TrieNodemap->IntMap.iter(funjsubnode->ifj=ithenfold_higherfv'subnode(* go in the branch *)else())map|_->failwith"number of features in feature vector changed"infold_higherfvidx.trieletretrieve_subsuming_cidxcf=retrieve_subsumingidx(C.to_litsc)(C.labelsc)fletretrieve_subsumed_cidxcf=retrieve_subsumedidx(C.to_litsc)(C.labelsc)fletretrieve_alpha_equiv_cidxcf=retrieve_alpha_equividx(C.to_litsc)(C.labelsc)fletiteridxf=letreciter=function|TrieLeafset->CSet.iterfset|TrieNodemap->IntMap.iter(fun_t'->itert')mapiniteridx.trieletfoldfaccidx=letrecfoldacc=function|TrieLeafset->CSet.fold(funxacc->faccx)setacc|TrieNodemap->IntMap.fold(fun_t'acc->foldacct')mapaccinfoldaccidx.trieend