package bistro-bio

  1. Overview
  2. Docs
Bistro workflows for computational biology

Install

Dune Dependency

Authors

Maintainers

Sources

bistro-0.6.0.tbz
sha256=146177faaaa9117a8e2bf0fd60cb658662c0aa992f35beb246e6fd0766050e66
sha512=553fe0c20f236316449b077a47e6e12626d193ba1916e9da233e5526dd39090e8677277e1c79baace3bdc940cb009f25431730a8efc00ae4ed9cc42a0add9609

doc/src/bistro-bio/bedtools.ml.html

Source file bedtools.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
open Core_kernel
open Bistro
open Bistro.Shell_dsl

let img = [ docker_image ~account:"pveber" ~name:"bedtools" ~tag:"2.21.0" () ]

let bedtools ?stdout subcmd args =
  cmd "bedtools" ?stdout (string subcmd :: args)

type 'a input = Bed | Gff

let bed = Bed
let gff = Gff

module Cmd = struct
  let slop_args ?strand ?header mode = [
    option (flag string "-s") strand ;
    option (flag string "-header") header ;
    seq (
      match mode with
      | `both n      -> [ opt "-b" int n ]
      | `left n      -> [ opt "-l" int n ]
      | `right n     -> [ opt "-r" int n ]
      | `left_pct p  -> [ opt "-l" float p ; string "-pct" ]
      | `right_pct p -> [ opt "-l" float p ; string "-pct" ]
      | `both_pct p  -> [ opt "-b" float p ; string "-pct" ]
    )
  ]

  let slop ?strand ?header ~mode input chrom_size =
    bedtools "slop" ~stdout:dest [
      seq (slop_args ?strand ?header mode) ;
      opt "-i" dep input ;
      opt "-g" dep chrom_size ;
    ]
end

let slop ?strand ?header ~mode _ input chrom_size =
  Workflow.shell ~descr:"bedtools.slop" ~img [
    Cmd.slop ?strand ?header ~mode input chrom_size
  ]

let intersect ?ubam ?wa ?wb ?loj ?wo ?wao ?u ?c ?v ?f ?_F ?r ?e ?s ?_S
    ?split ?sorted ?g ?header ?filenames ?sortout _ file files =
  Workflow.shell ~descr:"bedtools.intersect" ~img [
    cmd "bedtools intersect" ~stdout:dest [
      option (flag string "-ubam") ubam ;
      option (flag string "-wa") wa ;
      option (flag string "-wb") wb ;
      option (flag string "-loj") loj ;
      option (flag string "-wo") wo ;
      option (flag string "-wao") wao ;
      option (flag string "-u") u ;
      option (flag string "-c") c ;
      option (flag string "-v") v ;
      option (opt "-f" float) f ;
      option (opt "-F" float) _F ;
      option (flag string "-r") r ;
      option (flag string "-e") e ;
      option (flag string "-s") s ;
      option (flag string "-S") _S ;
      option (flag string "-split") split ;
      option (flag string "-sorted") sorted ;
      option (opt "-g" dep) g ;
      option (flag string "-header") header ;
      option (flag string "-filenames") filenames ;
      option (flag string "-sortout") sortout ;
      opt "-a" dep file ;
      opt "-b" (list dep ~sep:" ") files ;
    ]
  ]

let closest ?strand ?io ?iu ?id ?fu ?fd ?ties ?mdb ?k ?header _ query beds =
  Workflow.shell ~descr:"bedtools.intersect" ~img [
    cmd "bedtools.closest" ~stdout:dest [
      option ((function `same -> "-s" | `opposite -> "-S") % string) strand ;
      option (flag string "-io") io ;
      option (flag string "-iu") iu ;
      option (flag string "-id") id ;
      option (flag string "-fu") fu ;
      option (flag string "-fd") fd ;
      option (opt "-t" ((function `all -> "all" | `first -> "first" | `last -> "last") % string)) ties ;
      option (opt "-mdb" ((function `each -> "each" | `all -> "all") % string)) mdb ;
      option (opt "-k" int) k ;
      option (flag string "-header") header ;
      opt "-a" dep query ;
      opt "-b" (list dep ~sep:" ") beds ;
    ]
  ]

let bamtobed ?bed12 ?split ?splitD ?ed ?tag ?cigar bam =
  Workflow.shell ~descr:"bedtools.bamtobed" ~img ~mem:(Workflow.int  (3 * 1024)) ~np:8 [
    cmd "bedtools bamtobed" ~stdout:dest [
      option (flag string "-bed12") bed12 ;
      option (flag string "-split") split ;
      option (flag string "-splitD") splitD ;
      option (flag string "-ed") ed ;
      option (flag string "-tag") tag ;
      option (flag string "-cigar") cigar ;
      opt "-i" dep bam ;
    ]
  ]


let strand_arg x =
  string (
    match x with
    | `plus -> "+"
    | `minus -> "-"
  )

let operation_arg x =
  string (
    match x with
    | `sum -> "sum"
    | `min -> "min"
    | `max -> "max"
    | `absmin -> "absmin"
    | `mean -> "mean"
    | `median -> "median"
    | `collapse -> "collapse"
    | `distinct -> "distinct"
    | `count -> "count"
    | `count_distinct -> "count_distinct"
  )

let concat_beds_dep = function
  | [] -> string ""
  | xs ->
    seq ~sep:"" [
      string "<(cat " ;
      list ~sep:" " dep xs ;
      string "| sort -k1,1 -k2,2n)"
    ]

let merge ?s ?_S ?d ?c ?o beds =
  Workflow.shell ~descr:"bedtools.merge" ~img [
    cmd "bedtools" ~stdout:dest [
      string "merge" ;
      option (flag string "-s") s ;
      option (opt "-S" strand_arg) _S ;
      option (opt "-d" int) d ;
      option (opt "-c" (list ~sep:"," int)) c ;
      option (opt "-o" (list ~sep:"," operation_arg)) o ;
      opt "-i" concat_beds_dep beds ;
    ]
  ]
OCaml

Innovation. Community. Security.