package biotk

  1. Overview
  2. Docs
Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source

Source file encode.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
open Core_kernel

module Experiment_list = struct
  type t = item list
  and item = {
    data_type : [  | `CAGE | `ChIA_PET | `ChIP_seq
                   | `Combined | `DNA_PET | `DNase_DGF
                   | `DNase_seq | `Exon_array | `FAIRE_seq
                   | `GENCODE | `Genotype | `_5C ] ;
    cell_type : string ;
    experimental_factors : (string * string) list ;
    treatment : string ;
    lab : string ;
    pi : string ;
    assembly : string ;
    status : string ;
    geo_accession : string ;
    dcc_accession : string ;
    data_unrestricted : string
  }

  exception Parse_error of string * int

  let data_type_of_string = function
    | "5C" -> `_5C
    | "CAGE" -> `CAGE
    | "ChIA-PET" -> `ChIA_PET
    | "ChIP-seq" -> `ChIP_seq
    | "Combined" -> `Combined
    | "DNA-PET" -> `DNA_PET
    | "DNase-DGF" -> `DNase_DGF
    | "DNase-seq" -> `DNase_seq
    | "Exon Array" -> `Exon_array
    | "FAIRE-seq" -> `FAIRE_seq
    | "GENCODE" -> `GENCODE
    | "Genotype" -> `Genotype
    | x -> failwith ("Encode.data_type_of_string: unknown data type ``" ^ x ^ "''")

  let experimental_factors_of_string _ = assert false

  let item_of_line i = function
    | [ data_type ; cell_type ; experimental_factors ; treatment ; lab ; pi ; assembly ; status ; geo_accession ; dcc_accession ; data_unrestricted ] ->
      {
        data_type = data_type_of_string data_type ;
        cell_type ;
        experimental_factors = experimental_factors_of_string experimental_factors ;
        treatment ;
        lab ;
        pi ;
        assembly ;
        status ;
        geo_accession ;
        dcc_accession ;
        data_unrestricted ;
      }
    | l -> raise (Parse_error (String.concat ~sep:"\t" l, i))

  let parse_exn fn =
    In_channel.read_lines fn
    |> List.tl_exn (* skip header *)
    |> List.map ~f:(String.split ~on:'\t')
    |> List.mapi ~f:item_of_line

  let parse fn =
    try `Ok (parse_exn fn)
    with Parse_error (l,lno) -> `Error (l,lno)

  let human_url = "https://spreadsheets.google.com/pub?key=0AvQL5qBL6AfEdFJqaU16U3JjT2hUX0JjeFFKVk56QlE&hl=en&output=csv"
  let mouse_url = "https://spreadsheets.google.com/pub?key=0AvQL5qBL6AfEdEJKd3RmYl9tbkc0d04wZDdiXzRrbmc&output=csv"
end
OCaml

Innovation. Community. Security.