package rune

  1. Overview
  2. Docs

Source file backend_intf.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
(* backend_intf.ml *)

open Ir

type 'a compiled_artifact = { native_artifact : 'a; entry_points : string list }
type 'a callable_kernel = { native_kernel : 'a; name : string }

type ('a, 'b) device_buffer = {
  native_buffer : 'b;
  size_in_bytes : int;
  dtype : 'a Dtype.t;
}

type 'b any_device_buffer =
  | Any_Device_Buffer : ('a, 'b) device_buffer -> 'b any_device_buffer
[@@unboxed]

module type S = sig
  val name : string

  (* ───── shared opaque handles ───── *)

  type device_info
  type device_buffer_native
  type compiled_artifact_native
  type callable_kernel_native

  (* ───── high-level buffer wrappers ───── *)

  type nonrec 'a device_buffer = ('a, device_buffer_native) device_buffer
  type nonrec any_device_buffer = device_buffer_native any_device_buffer
  type nonrec compiled_artifact = compiled_artifact_native compiled_artifact
  type nonrec callable_kernel = callable_kernel_native callable_kernel

  (* ───── sub-module interfaces ───── *)

  module Device_info : sig
    val get_default : unit -> device_info
    val max_shared_memory : device_info -> int
    val max_workgroup_size : device_info -> int array
    val supports_dtype : device_info -> Dtype.any -> bool

    (* helpers for the renderer *)
    val renderer_float4_str : device_info -> string option
    val renderer_smem_prefix : device_info -> string
    val renderer_barrier_str : device_info -> string
  end

  module Renderer : sig
    val render :
      device_info:device_info ->
      lowered_ir:Ir.Lowered.graph_t ->
      kernel_name:string ->
      string
  end

  module Compiler : sig
    type compile_options

    val default_options : device_info -> compile_options

    val compile :
      device_info:device_info ->
      source_code:string ->
      options:compile_options ->
      (compiled_artifact, string) result
  end

  module Runtime : sig
    val allocate_buffer :
      device_info:device_info ->
      size_in_bytes:int ->
      dtype:'a Dtype.t ->
      ('a device_buffer, string) result

    val copy_to_device :
      dest_buffer:'a device_buffer ->
      host_data:nativeint ->
      host_data_offset_bytes:int ->
      copy_size_bytes:int ->
      (unit, string) result

    val copy_from_device :
      src_buffer:'a device_buffer ->
      host_dest_ptr:nativeint ->
      device_data_offset_bytes:int ->
      copy_size_bytes:int ->
      (unit, string) result

    val get_kernel :
      artifact:compiled_artifact ->
      kernel_name:string ->
      (callable_kernel, string) result

    val launch_kernel :
      ?local_dims:int array (* None = let backend pick *) ->
      device_info:device_info ->
      global_dims:int array (* [|gx; gy; gz|] *) ->
      args:any_device_buffer list ->
      callable_kernel ->
      (unit, string) result

    val synchronize : device_info:device_info -> unit
  end
end
OCaml

Innovation. Community. Security.