Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Page
Library
Module
Module type
Parameter
Class
Class type
Source
RegrStats.ml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110
(* Performance measures for regression models cf. chapter 12 "regression models" in book Varnek, A. ed., 2017. Tutorials in chemoinformatics. John Wiley & Sons. *) module A = BatArray module L = BatList let square x = x *. x (** Root Mean Squared Error [rmse exp pred] *) let rmse (l1: float list) (l2: float list): float = let a1 = A.of_list l1 in let a2 = A.of_list l2 in let m = A.length a1 in let n = A.length a2 in assert(m = n); let sum_squared_diffs = A.fold_lefti (fun acc i x -> let y = a2.(i) in acc +. square (x -. y) ) 0.0 a1 in sqrt (sum_squared_diffs /. (float n)) (** Mean Absolute Error [mae exp pred] *) let mae (l1: float list) (l2: float list): float = let a1 = A.of_list l1 in let a2 = A.of_list l2 in let m = A.length a1 in let n = A.length a2 in assert(m = n); let sum_abs_diffs = A.fold_lefti (fun acc i x -> let y = a2.(i) in acc +. abs_float (x -. y) ) 0.0 a1 in sum_abs_diffs /. (float n) (** standard deviation of residuals [std_dev_res exp pred] *) let std_dev_res (l1: float list) (l2: float list): float = let a1 = A.of_list l1 in let a2 = A.of_list l2 in let m = A.length a1 in let n = A.length a2 in assert(m = n); let sum_squared_diffs = A.fold_lefti (fun acc i x -> let y = a2.(i) in acc +. square (x -. y) ) 0.0 a1 in sqrt (sum_squared_diffs /. (float (n - 2))) (** coefficient of determination (for arrays) [r2_a exp pred] *) let r2_a a1 a2 = let m = A.length a1 in let n = A.length a2 in assert(m = n); let sum_squared_diffs = A.fold_lefti (fun acc i x -> let y = a2.(i) in acc +. square (x -. y) ) 0.0 a1 in let sum_squared_exp_diffs = let avg_exp = A.favg a1 in A.fold_left (fun acc x -> acc +. square (x -. avg_exp) ) 0.0 a1 in 1.0 -. (sum_squared_diffs /. sum_squared_exp_diffs) (** coefficient of determination [r2 exp pred] *) let r2 (l1: float list) (l2: float list): float = r2_a (A.of_list l1) (A.of_list l2) (** raw Regression Error Characteristic Curve (raw means not scaled by a null model) [raw_REC_curve exp pred] Cf. Bi, J. and Bennett, K.P., 2003. Regression error characteristic curves. In Proceedings of the 20th international conference on machine learning (ICML-03) (pp. 43-50). *) let raw_REC_curve (l1: float list) (l2: float list): (float * float) list = let array_filter_count p a = float (A.fold_left (fun acc x -> if p x then acc + 1 else acc ) 0 a) in let a1 = A.of_list l1 in let a2 = A.of_list l2 in let n = A.length a1 in let errors = A.map2 (fun x y -> abs_float (x -. y) ) a1 a2 in A.sort BatFloat.compare errors; let max_err = errors.(n - 1) in (* 100 steps on the X axis *) let xs = L.frange 0.0 `To max_err 100 in (* WARNING: not very efficient algorithm *) let m = float n in L.map (fun err_tol -> let percent_ok = let ok_count = array_filter_count (fun err -> err <= err_tol) errors in (ok_count /. m) in (err_tol, percent_ok) ) xs