package oranger
OCaml wrapper for the ranger (C++) random forests implementation
Install
Dune Dependency
Authors
Maintainers
Sources
v4.3.5.tar.gz
sha256=bb15d9a6f1c42b6b0cb716efa70660cebb6d3ff1a1e821ac216e31a88d3a3b35
md5=daf20938fe65d211b6121a6c2e473827
doc/src/oranger/RF.ml.html
Source file RF.ml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
open Printf module A = BatArray module L = BatList module LO = Line_oriented module Log = Dolog.Log type filename = string type mode = Regression | Classification let int_of_mode = function | Classification -> 1 | Regression -> 3 let train ?debug:(debug = false) ?nprocs:(nprocs = 1) (mode: mode) (nb_trees: int) (mtry: int option) (data_fn: filename) (dep_var_name: string) (model_out_fn: filename): bool = let mtry_str = match mtry with | None -> "" | Some m -> sprintf "--mtry %d" m in let cmd = sprintf "ml_rf_ranger %s --file %s --depvarname %s --treetype %d --ntree %d \ %s --write --outprefix %s --nthreads %d" (if debug then "--verbose" else "") data_fn dep_var_name (int_of_mode mode) nb_trees mtry_str model_out_fn nprocs in Log.info "cmd: %s" cmd; let status, log = BatUnix.run_and_read cmd in Log.info "%s" log; match status with | WEXITED 0 -> (Sys.rename (model_out_fn ^ ".forest") model_out_fn; true) | _ -> false let robust_float_of_string s = Scanf.sscanf (BatString.strip s) "%f" (fun x -> x) let alpha_start = Re.Str.regexp "^[a-zA-Z]" let ok_line l = (l <> "") && (not (Re.Str.string_match alpha_start l 0)) let read_raw_class_predictions nb_trees fn = let pred_strings = (* keep only numeric lines; they don't start with a letter; remove empty lines *) LO.filter fn ok_line in let nb_preds = L.length pred_strings in Log.info "nb integer preds: %d" (L.length pred_strings); let pred_classes = L.map robust_float_of_string pred_strings in let nb_samples = nb_preds / nb_trees in Log.info "nb samples: %d" nb_samples; let preds = A.of_list pred_classes in let res = ref [] in for samp_i = 0 to nb_samples - 1 do (* gather class predictions for this sample *) let curr_preds = ref [] in for tree_j = 0 to nb_trees - 1 do let offset = (tree_j * nb_samples) + samp_i in curr_preds := preds.(offset) :: !curr_preds done; (* compute mean and stddev *) let avg = L.favg !curr_preds in let std = Utls.stddev !curr_preds in res := (avg, std) :: !res done; L.rev !res let predict ?debug:(debug = false) ?nprocs:(nprocs = 1) (nb_trees: int) (data_fn: filename) (model_fn: filename): (float * float) list option = Utls.with_temp_file "/tmp" "oranger_" "" (fun predictions_fn -> let cmd = sprintf "ml_rf_ranger %s \ --file %s --predict %s --nthreads %d --outprefix %s --predall" (if debug then "--verbose" else "") data_fn model_fn nprocs predictions_fn in Log.info "cmd: %s" cmd; let status, log = BatUnix.run_and_read cmd in Log.info "%s" log; match status with | WEXITED 0 -> let raw_preds_fn = predictions_fn ^ ".prediction" in (* the log file is created by ranger, even if we did not ask for it *) let log_fn = predictions_fn ^ ".log" in let res = Some (read_raw_class_predictions nb_trees raw_preds_fn) in L.iter Sys.remove [raw_preds_fn; log_fn]; res | _ -> None )
sectionYPositions = computeSectionYPositions($el), 10)"
x-init="setTimeout(() => sectionYPositions = computeSectionYPositions($el), 10)"
>