package molenc
Molecular encoder/featurizer using rdkit and OCaml
Install
Dune Dependency
Authors
Maintainers
Sources
v16.13.0.tar.gz
sha256=deb4a9f58f49bd9cefb7cf2004ad7ce750aa949655e6f277d4c3e61dfa23c6d6
md5=e90db1862c04f7eb39cf437d33ddf9b3
doc/src/molenc/MSE_mol.ml.html
Source file MSE_mol.ml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
(* Copyright (C) 2020, Francois Berenger Yamanishi laboratory, Department of Bioscience and Bioinformatics, Faculty of Computer Science and Systems Engineering, Kyushu Institute of Technology, 680-4 Kawazu, Iizuka, Fukuoka, 820-8502, Japan. *) (* Multi-Scale-Encoded molecule *) open Printf module L = MyList module Log = Dolog.Log module String = BatString module StringMap = BatMap.String type t = { name: string; map: int StringMap.t } let create name map = { name; map } let get_name x = x.name let get_map x = x.map let feat_count_of_string s = try Scanf.sscanf s "%s %d" (fun s d -> (s, d)) with exn -> (eprintf "MSE_mol.feat_count_of_string: cannot parse: %s" s; raise exn) (* to construct one molecules with all its constituent lines already read from the input file *) let read_one = function | [] -> failwith "MSE_mol.read_one: empty list" | name_line :: feat_count_strs -> (* molecule separator is a line starting with a '#' char *) assert(String.get name_line 0 = '#'); let name = String.lchop name_line in (* remove it *) let map = List.fold_left (fun acc line -> let feat, count = feat_count_of_string line in (* feature cannot already be here; otherwise, there was a problem during encoding of the molecule *) if StringMap.mem feat acc then Log.warn "mol: %s dup feat: %s" name feat; StringMap.add feat count acc ) StringMap.empty feat_count_strs in create name map let previous_name = ref "" exception Break (* get lines for just one molecule (i.e. for one call to read_one after) *) let get_lines input = let acc = ref [] in if !previous_name = "" then begin let line = input_line input in assert(BatString.starts_with line "#"); (* enforce name line *) previous_name := line end; acc := [!previous_name]; try while true do let line' = input_line input in if BatString.starts_with line' "#" then (* this is the start of another molecule *) begin previous_name := line'; raise Break end else acc := line' :: !acc done; assert(false) (* for typing: should never be reached at exec *) with Break -> L.rev !acc | End_of_file -> begin previous_name := ""; L.rev !acc end let of_lines lines = let rec loop acc ls = match ls with | [] -> L.rev acc | _ -> let name_l, rest = L.fold_while (fun l -> String.starts_with l "#") (fun acc x -> x :: acc) [] ls in (match name_l with | [name] -> (let feat_counts, remaining_mols = L.fold_while (fun l -> not (String.starts_with l "#")) (fun acc x -> x :: acc) [] rest in let mol = read_one (name :: feat_counts) in loop (mol :: acc) remaining_mols) | _ -> assert(false)) in loop [] lines
sectionYPositions = computeSectionYPositions($el), 10)"
x-init="setTimeout(() => sectionYPositions = computeSectionYPositions($el), 10)"
>