package catala

  1. Overview
  2. Docs
Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source

Source file suggestions.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
(* This file is part of the Catala compiler, a specification language for tax
   and social benefits computation rules. Copyright (C) 2023 Inria, contributor:
   Aminata Boiguillé <aminata.boiguille@etu.sorbonne-universite.fr>, Emile
   Rolley <emile.rolley@tuta.io>

   Licensed under the Apache License, Version 2.0 (the "License"); you may not
   use this file except in compliance with the License. You may obtain a copy of
   the License at

   http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
   WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
   License for the specific language governing permissions and limitations under
   the License. *)

(** Computes the levenshtein distance between two strings, used to provide error
    messages suggestions *)
let levenshtein_distance (s : string) (t : string) : int =
  let three_way_minimum a b c = min a (min b c) in
  let m = String.length s and n = String.length t in
  (* for all i and j, d.(i).(j) will hold the Levenshtein distance between the
     first i characters of s and the first j characters of t *)
  let d = Array.make_matrix (m + 1) (n + 1) 0 in

  for i = 0 to m do
    d.(i).(0) <- i
    (* the distance of any first string to an empty second string *)
  done;
  for j = 0 to n do
    d.(0).(j) <- j
    (* the distance of any second string to an empty first string *)
  done;

  for j = 1 to n do
    for i = 1 to m do
      if s.[i - 1] = t.[j - 1] then d.(i).(j) <- d.(i - 1).(j - 1)
        (* no operation required *)
      else
        d.(i).(j) <-
          three_way_minimum
            (d.(i - 1).(j) + 1) (* a deletion *)
            (d.(i).(j - 1) + 1) (* an insertion *)
            (d.(i - 1).(j - 1) + 1) (* a substitution *)
    done
  done;

  d.(m).(n)

(*We create a list composed by strings that satisfy the following rule : they
  have the same levenshtein distance, which is the minimum distance between the
  reference word "keyword" and all the strings in "candidates" (with the
  condition that this minimum is equal to or less than one third of the length
  of keyword + 1, in order to get suggestions close to "keyword")*)
let suggestion_minimum_levenshtein_distance_association
    (candidates : string list)
    (keyword : string) : string list =
  let rec strings_minimum_levenshtein_distance
      (minimum : int)
      (result : string list)
      (candidates' : string list) : string list =
    (*As we iterate through the "candidates'" list, we create a list "result"
      with all strings that have the last minimum levenshtein distance found
      ("minimum").*)
    match candidates' with
    (*When a new minimum levenshtein distance is found, the new result list is
      our new element "current_string" followed by strings that have the same
      minimum distance. It will be the "result" list if there is no levenshtein
      distance smaller than this new minimum.*)
    | current_string :: tail ->
      let current_levenshtein_distance =
        levenshtein_distance current_string keyword
      in
      if current_levenshtein_distance < minimum then
        strings_minimum_levenshtein_distance current_levenshtein_distance
          [current_string] tail
        (*The "result" list is updated (we append "current_string" to "result")
          when a new string shares the same minimum levenshtein distance
          "minimum"*)
      else if current_levenshtein_distance = minimum then
        strings_minimum_levenshtein_distance minimum
          (result @ [current_string])
          tail
        (*If a levenshtein distance greater than the minimum is found, "result"
          doesn't change*)
      else strings_minimum_levenshtein_distance minimum result tail
    (*The "result" list is returned at the end of the "candidates'" list.*)
    | [] -> result
  in
  strings_minimum_levenshtein_distance
    (1 + (String.length keyword / 3))
    (*In order to select suggestions that are not too far away from the
      keyword*)
    [] candidates

let format (ppf : Format.formatter) (suggestions_list : string list) =
  match suggestions_list with
  | [] -> ()
  | _ :: _ ->
    Format.pp_print_string ppf "Maybe you wanted to write : ";
    Format.pp_print_list
      ~pp_sep:(fun ppf () -> Format.fprintf ppf ",@ or ")
      (fun ppf string -> Format.fprintf ppf "@{<yellow>\"%s\"@}" string)
      ppf suggestions_list;
    Format.pp_print_string ppf " ?"
OCaml

Innovation. Community. Security.