package bistro-bio

  1. Overview
  2. Docs
Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source

Source file bedtools.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
open Core_kernel
open Bistro
open Bistro.Shell_dsl

let img = [ docker_image ~account:"pveber" ~name:"bedtools" ~tag:"2.21.0" () ]

let bedtools ?stdout subcmd args =
  cmd "bedtools" ?stdout (string subcmd :: args)

type 'a input = Bed | Gff

let bed = Bed
let gff = Gff

module Cmd = struct
  let slop_args ?strand ?header mode = [
    option (flag string "-s") strand ;
    option (flag string "-header") header ;
    seq (
      match mode with
      | `both n      -> [ opt "-b" int n ]
      | `left n      -> [ opt "-l" int n ]
      | `right n     -> [ opt "-r" int n ]
      | `left_pct p  -> [ opt "-l" float p ; string "-pct" ]
      | `right_pct p -> [ opt "-l" float p ; string "-pct" ]
      | `both_pct p  -> [ opt "-b" float p ; string "-pct" ]
    )
  ]

  let slop ?strand ?header ~mode input chrom_size =
    bedtools "slop" ~stdout:dest [
      seq (slop_args ?strand ?header mode) ;
      opt "-i" dep input ;
      opt "-g" dep chrom_size ;
    ]
end

let slop ?strand ?header ~mode _ input chrom_size =
  Workflow.shell ~descr:"bedtools.slop" ~img [
    Cmd.slop ?strand ?header ~mode input chrom_size
  ]

let intersect ?ubam ?wa ?wb ?loj ?wo ?wao ?u ?c ?v ?f ?_F ?r ?e ?s ?_S
    ?split ?sorted ?g ?header ?filenames ?sortout _ file files =
  Workflow.shell ~descr:"bedtools.intersect" ~img [
    cmd "bedtools intersect" ~stdout:dest [
      option (flag string "-ubam") ubam ;
      option (flag string "-wa") wa ;
      option (flag string "-wb") wb ;
      option (flag string "-loj") loj ;
      option (flag string "-wo") wo ;
      option (flag string "-wao") wao ;
      option (flag string "-u") u ;
      option (flag string "-c") c ;
      option (flag string "-v") v ;
      option (opt "-f" float) f ;
      option (opt "-F" float) _F ;
      option (flag string "-r") r ;
      option (flag string "-e") e ;
      option (flag string "-s") s ;
      option (flag string "-S") _S ;
      option (flag string "-split") split ;
      option (flag string "-sorted") sorted ;
      option (opt "-g" dep) g ;
      option (flag string "-header") header ;
      option (flag string "-filenames") filenames ;
      option (flag string "-sortout") sortout ;
      opt "-a" dep file ;
      opt "-b" (list dep ~sep:" ") files ;
    ]
  ]

let closest ?strand ?io ?iu ?id ?fu ?fd ?ties ?mdb ?k ?header _ query beds =
  Workflow.shell ~descr:"bedtools.intersect" ~img [
    cmd "bedtools.closest" ~stdout:dest [
      option ((function `same -> "-s" | `opposite -> "-S") % string) strand ;
      option (flag string "-io") io ;
      option (flag string "-iu") iu ;
      option (flag string "-id") id ;
      option (flag string "-fu") fu ;
      option (flag string "-fd") fd ;
      option (opt "-t" ((function `all -> "all" | `first -> "first" | `last -> "last") % string)) ties ;
      option (opt "-mdb" ((function `each -> "each" | `all -> "all") % string)) mdb ;
      option (opt "-k" int) k ;
      option (flag string "-header") header ;
      opt "-a" dep query ;
      opt "-b" (list dep ~sep:" ") beds ;
    ]
  ]

let bamtobed ?bed12 ?split ?splitD ?ed ?tag ?cigar bam =
  Workflow.shell ~descr:"bedtools.bamtobed" ~img ~mem:(Workflow.int  (3 * 1024)) ~np:8 [
    cmd "bedtools bamtobed" ~stdout:dest [
      option (flag string "-bed12") bed12 ;
      option (flag string "-split") split ;
      option (flag string "-splitD") splitD ;
      option (flag string "-ed") ed ;
      option (flag string "-tag") tag ;
      option (flag string "-cigar") cigar ;
      opt "-i" dep bam ;
    ]
  ]


let strand_arg x =
  string (
    match x with
    | `plus -> "+"
    | `minus -> "-"
  )

let operation_arg x =
  string (
    match x with
    | `sum -> "sum"
    | `min -> "min"
    | `max -> "max"
    | `absmin -> "absmin"
    | `mean -> "mean"
    | `median -> "median"
    | `collapse -> "collapse"
    | `distinct -> "distinct"
    | `count -> "count"
    | `count_distinct -> "count_distinct"
  )

let concat_beds_dep = function
  | [] -> string ""
  | xs ->
    seq ~sep:"" [
      string "<(cat " ;
      list ~sep:" " dep xs ;
      string "| sort -k1,1 -k2,2n)"
    ]

let merge ?s ?_S ?d ?c ?o beds =
  Workflow.shell ~descr:"bedtools.merge" ~img [
    cmd "bedtools" ~stdout:dest [
      string "merge" ;
      option (flag string "-s") s ;
      option (opt "-S" strand_arg) _S ;
      option (opt "-d" int) d ;
      option (opt "-c" (list ~sep:"," int)) c ;
      option (opt "-o" (list ~sep:"," operation_arg)) o ;
      opt "-i" concat_beds_dep beds ;
    ]
  ]
OCaml

Innovation. Community. Security.