package biotk

  1. Overview
  2. Docs
Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source

Source file cisbp.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
open Core

let pwm_archive_url = "http://cisbp.ccbr.utoronto.ca/data/1.02/DataFiles/Bulk_downloads/EntireDataset/PWMs.zip"
let tf_information_archive_url = "http://cisbp.ccbr.utoronto.ca/data/1.02/DataFiles/Bulk_downloads/EntireDataset/TF_Information_all_motifs.txt.zip"

module TF_information = struct
  type item = {
    tf_id : string ;
    family_id : string ;
    tsource_id : string ;
    motif_id : string option ;
    msource_id : string option ;
    dbid : string ;
    tf_name : string ;
    tf_species : string ;
    tf_status : string ;
    family_name : string ;
    dbds : string list ;
    dbd_count : int ;
    cutoff : float ;
    dbid2 : string option ;
    motif_type : string option ;
    msource_identifier : string option ;
    msource_type : string option ;
    msource_author : string option ;
    msource_year : int option ;
    pmid : string option ;
    msource_version : string option ;
    tfsource_name : string ;
    tfsource_url : string ;
    tfsource_year : int ;
    tfsource_month : string ;
    tfsource_day : int ;
  }
  type t = item list

  let opt f = function
    | "." -> None
    | s -> Some (f s)

  let string x = x

  let int x = Int.of_string x
  let float x = Float.of_string x
  let list f x =
    String.split ~on:',' x
    |> List.map ~f

  let parse_fields = function
    | [ tf_id ;
        family_id ;
        tsource_id ;
        motif_id ;
        msource_id ;
        dbid ;
        tf_name ;
        tf_species ;
        tf_status ;
        family_name ;
        dbds ;
        dbd_count ;
        cutoff ;
        dbid2 ;
        motif_type ;
        msource_identifier ;
        msource_type ;
        msource_author ;
        msource_year ;
        pmid ;
        msource_version ;
        tfsource_name ;
        tfsource_url ;
        tfsource_year ;
        tfsource_month ;
        tfsource_day ;
      ] -> {
        tf_id ;
        family_id ;
        tsource_id ;
        motif_id = opt string motif_id ;
        msource_id = opt string msource_id ;
        dbid ;
        tf_name ;
        tf_species ;
        tf_status ;
        family_name ;
        dbds = list string dbds ;
        dbd_count = int dbd_count ;
        cutoff = float cutoff ;
        dbid2 = opt string dbid2 ;
        motif_type = opt string motif_type ;
        msource_identifier = opt string msource_identifier ;
        msource_type = opt string msource_type ;
        msource_author = opt string msource_author ;
        msource_year = opt int msource_year ;
        pmid = opt string pmid ;
        msource_version = opt string msource_version ;
        tfsource_name ;
        tfsource_url ;
        tfsource_year = int tfsource_year ;
        tfsource_month ;
        tfsource_day = int tfsource_day ;
      }
    | fields -> failwithf "incorrect line format: %s" (String.concat ~sep:"\t" fields) ()

  let from_file fn =
    In_channel.read_all fn
    |> String.split ~on:'\n'
    |> Fn.flip List.drop 1
    |> List.filter ~f:(String.( <> ) "")
    |> List.map ~f:(String.split ~on:'\t')
    |> List.map ~f:parse_fields
end

module Motif = struct
  type t = float array array

  let parse_line l =
    let f = Float.of_string in
    match String.split l ~on:'\t' with
    | [ _ ; a ; c ; g ; t ] ->
      [| f a ; f c ; f g ; f t |]
    | _ ->
      failwithf "Cis_bp.Motif.parse_line: unable to parse %s" l ()

  let from_file fn =
    In_channel.read_lines fn
    |> Fn.flip List.drop 1
    |> List.map ~f:parse_line
    |> Array.of_list

  let read_all_in_dir dir =
    Stdlib.Sys.readdir dir
    |> Array.to_list
    |> List.filter ~f:(Fn.flip Filename.check_suffix ".txt")
    |> List.map ~f:(fun fn -> fn, from_file (Filename.concat dir fn))


  let pwm m =
    let bg = Pwm.flat_background () in
    Array.map m ~f:(fun pos ->
        Array.map pos ~f:(fun p -> Float.to_int (p *. 100.))
      )
    |> Fn.flip Pwm.make bg
end
OCaml

Innovation. Community. Security.