package bistro-bio

  1. Overview
  2. Docs
Bistro workflows for computational biology

Install

Dune Dependency

Authors

Maintainers

Sources

bistro-0.6.0.tbz
sha256=146177faaaa9117a8e2bf0fd60cb658662c0aa992f35beb246e6fd0766050e66
sha512=553fe0c20f236316449b077a47e6e12626d193ba1916e9da233e5526dd39090e8677277e1c79baace3bdc940cb009f25431730a8efc00ae4ed9cc42a0add9609

doc/src/bistro-bio.examples/chen2008.ml.html

Source file chen2008.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
open Core_kernel
open Bistro
open Bistro_bio
open Biotk

module Sample = struct
  type t = [
    | `ES_WT_ChIP_Nanog_Chen2008
    | `ES_WT_ChIP_Pou5f1_Chen2008
    | `ES_WT_ChIP_Sox2_Chen2008
    | `ES_WT_ChIP_Essrb_Chen2008
  ]
  [@@deriving show,enumerate]

  let srr_id = function
    | `ES_WT_ChIP_Nanog_Chen2008 -> List1.cons "SRR002004" ["SRR002005";"SRR002006";"SRR002007";"SRR002008";"SRR002009";"SRR002010";"SRR002011"]
    | `ES_WT_ChIP_Pou5f1_Chen2008 -> List1.cons "SRR002012" ["SRR002013";"SRR002014";"SRR002015"]
    | `ES_WT_ChIP_Sox2_Chen2008 -> List1.cons "SRR002023" ["SRR002024";"SRR002025";"SRR002026"]
    | `ES_WT_ChIP_Essrb_Chen2008 -> List1.cons "SRR001992" ["SRR001993";"SRR001994";"SRR001995"]

  let source x =
    List1.map (srr_id x) ~f:(fun srr_id -> Fastq_sample.SRA_dataset { srr_id ; library_type = `single_end })

  let string_of_sample = function
    | `ES_WT_ChIP_Nanog_Chen2008 -> "ES_WT_ChIP_Nanog_Chen2008"
    | `ES_WT_ChIP_Pou5f1_Chen2008 -> "ES_WT_ChIP_Pou5f1_Chen2008"
    | `ES_WT_ChIP_Sox2_Chen2008 -> "ES_WT_ChIP_Sox2_Chen2008"
    | `ES_WT_ChIP_Essrb_Chen2008 -> "ES_WT_ChIP_Essrb_Chen2008"

  let base_url = "ftp://ftp.ncbi.nlm.nih.gov/pub/geo/DATA/supplementary/samples/GSM288nnn/"

  let published_peaks_url_suffix = function
    | `ES_WT_ChIP_Nanog_Chen2008 -> "GSM288345/GSM288345_ES_Nanog.txt.gz"
    | `ES_WT_ChIP_Pou5f1_Chen2008 -> "GSM288346/GSM288346_ES_Oct4.txt.gz"
    | `ES_WT_ChIP_Sox2_Chen2008 -> "GSM288347/GSM288347_ES_Sox2.txt.gz"
    | `ES_WT_ChIP_Essrb_Chen2008 -> "GSM288355/GSM288355%5FES%5FEsrrb%2Etxt%2Egz"

  let published_peaks x : text file =
    let url = base_url ^ published_peaks_url_suffix x in
    Bistro_unix.(wget url |> gunzip |> crlf2lf)

  let to_string x = show x

  let reference_genome _ = Dnaseq_with_reference_genome.Ucsc_gb `mm10
end

module FQS = Fastq_sample.Make(Sample)

module Dnaseq = Dnaseq_with_reference_genome.Make(struct
    include Sample
    include FQS
  end
  )
OCaml

Innovation. Community. Security.