package fuzzy_match
Libraries for fuzzy string matching
Install
Dune Dependency
Authors
Maintainers
Sources
fuzzy_match-v0.16.0.tar.gz
sha256=4a175c1df53f1b93cc69ff01d5cd1e02726611808df696e493aebe47c4fdeb67
doc/src/fuzzy_match.search/fuzzy_search.ml.html
Source file fuzzy_search.ml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298
open! Core module Case_mode = struct type t = Smart end module Query = struct type t = { queries : string array (* The pattern split by spaces *) ; raw : string ; case_sensitive : bool } let create query = let queries = String.split query ~on:' ' |> List.filter ~f:(fun s -> not (String.is_empty s)) |> Array.of_list in let case_mode = Case_mode.Smart in let case_sensitive = match case_mode with | Smart -> String.exists query ~f:Char.is_uppercase in { queries; raw = query; case_sensitive } ;; let is_empty t = String.is_empty t.raw end module Start_end_indices = struct type t = { mutable found : bool ; mutable start_idx : int ; mutable end_idx : int } let the_one_and_only = { found = false; start_idx = 0; end_idx = 0 } end let equal_ignore_case char1 char2 = Char.equal (Char.lowercase char1) (Char.lowercase char2) ;; let[@inline] find_start_end_indices ~query ~item () = let dst = Start_end_indices.the_one_and_only in let item_idx = ref 0 in let query_idx = ref 0 in while !item_idx < String.length item && !query_idx < String.length query do let item_char = String.get item !item_idx in let query_char = String.get query !query_idx in if equal_ignore_case item_char query_char then incr query_idx; incr item_idx done; if !query_idx < String.length query then dst.found <- false else ( let end_idx = !item_idx in let item_idx = ref (end_idx - 1) in let query_idx = ref (String.length query - 1) in while !query_idx >= 0 do let item_char = String.get item !item_idx in let query_char = String.get query !query_idx in if equal_ignore_case item_char query_char then decr query_idx; decr item_idx done; let start_idx = !item_idx + 1 in dst.found <- true; dst.start_idx <- start_idx; dst.end_idx <- end_idx); dst ;; let fold_matching_indices_single_query query ~item ~init ~f = let { Start_end_indices.start_idx; end_idx; found } = find_start_end_indices ~query ~item () in let acc = ref init in if found then ( let query_idx = ref 0 in for item_idx = start_idx to end_idx - 1 do let item_char = String.get item item_idx in let query_char = String.get query !query_idx in if equal_ignore_case item_char query_char then ( acc := f !acc item_idx; incr query_idx) done); !acc ;; let matching_indices (query : Query.t) ~item = if Query.is_empty query then Some [||] else ( let indices = Array.fold query.queries ~init:Int.Set.empty ~f:(fun set query -> fold_matching_indices_single_query query ~item ~init:set ~f:Set.add) in if Set.is_empty indices then None else Some (Set.to_array indices)) ;; let split_by_matching_sections (query : Query.t) ~item = match matching_indices query ~item with | None -> None | Some [||] -> Some [ `Not_matching, item ] | Some matches -> let sections = Queue.create () in let add_section matching start end_inclusive = Queue.enqueue sections (matching, String.sub item ~pos:start ~len:(end_inclusive - start + 1)) in let first = matches.(0) in if first > 0 then add_section `Not_matching 0 (first - 1); let matching_range_start = ref first in let matching_range_end = ref first in Array.iter matches ~f:(fun idx -> if idx > !matching_range_end + 1 then ( add_section `Matching !matching_range_start !matching_range_end; add_section `Not_matching (!matching_range_end + 1) (idx - 1); matching_range_start := idx); matching_range_end := idx); add_section `Matching !matching_range_start !matching_range_end; if !matching_range_end < String.length item - 1 then add_section `Not_matching (!matching_range_end + 1) (String.length item - 1); Some (Queue.to_list sections) ;; module Char_class = struct type t = | Upper | Lower | Digit | Non_word let of_char = function | 'A' .. 'Z' -> Upper | 'a' .. 'z' -> Lower | '0' .. '9' -> Digit | _ -> Non_word ;; end let start_of_item_bonus = 1 let start_of_word_bonus = 480 let camel_case_bonus = 360 let non_word_bonus = 480 let match_bonus = 320 let start_gap_penalty = 60 let continue_gap_penalty = 20 (* Wrong case penalty is only used when there is at least one uppercase character so we make it extreme *) let wrong_case_penalty = 120 let first_char_multiplier = 2 let score_upper_bound ~query_length = (* Only characters that are part of the query can have positive score. The maximum positive score is when the first character of the query matches the first character of the item and it is also the start of a word. *) ((start_of_item_bonus + start_of_word_bonus) * first_char_multiplier * query_length) + 1 ;; (* For a [single_score_query] higher scores are better and we take score_upper_bound - score_single_query in score. Assumption: - [query] and [item] are not empty. - When there is no match, return 0. When the query is "" and item is not "" (infinitely many matches?), return score upper bound. *) let score_single_query query ~item ~case_sensitive = let { Start_end_indices.found; start_idx; end_idx } = find_start_end_indices ~query ~item () in if not found then 0 else ( let start_char_class = if start_idx = 0 then Char_class.Non_word else Char_class.of_char (String.get item (start_idx - 1)) in let query_idx = ref 0 in let score = ref 0 in let in_gap = ref false in let prev_char_class = ref start_char_class in let prev_char_score = ref 0 in for item_idx = start_idx to end_idx - 1 do let item_char = String.get item item_idx in let query_char = String.get query !query_idx in if equal_ignore_case item_char query_char then ( let char_class = Char_class.of_char item_char in let this_char_score = let base_score = match !prev_char_class, char_class with | Non_word, (Upper | Lower | Digit) -> start_of_word_bonus | Lower, Upper | _, Digit -> camel_case_bonus | _, Non_word -> non_word_bonus | _, _ -> match_bonus in let with_start_of_item_bonus = if item_idx = 0 then base_score + start_of_item_bonus else base_score in let with_consecutive_bonus = if !in_gap then with_start_of_item_bonus else Int.max with_start_of_item_bonus !prev_char_score in let with_wrong_case_penalty = if Char.equal item_char query_char || not case_sensitive then with_consecutive_bonus else with_consecutive_bonus - wrong_case_penalty in if !query_idx = 0 then first_char_multiplier * with_wrong_case_penalty else with_wrong_case_penalty in incr query_idx; score := !score + this_char_score; prev_char_class := char_class; prev_char_score := this_char_score; in_gap := false) else ( let this_char_score = if !in_gap then -continue_gap_penalty else -start_gap_penalty in score := !score + this_char_score; in_gap := true) done; !score) ;; let score query ~item = match String.is_empty item, Query.is_empty query with | true, _ -> 0 | _, true -> 1 | _, _ -> let { Query.queries; raw; case_sensitive } = query in let raw_score = ref 0 in let any_mismatch = ref false in for i = 0 to Array.length queries - 1 do let query = queries.(i) in let score = score_single_query query ~item ~case_sensitive in if score = 0 then any_mismatch := true; raw_score := !raw_score + score done; (match !raw_score with | 0 -> 0 | _ -> (* All subqueries must match some part of the item. *) if !any_mismatch then 0 else score_upper_bound ~query_length:(String.length raw) - !raw_score) ;; let score_opt query ~item = match score query ~item with | 0 -> None | x -> Some x ;; module For_testing = struct let find_start_end_indices ~query ~item () = let { Start_end_indices.found; start_idx; end_idx } = find_start_end_indices ~query ~item () in Option.some_if found (start_idx, end_idx) ;; end let search query ~items = List.filter_map items ~f:(fun item -> match score query ~item with | 0 -> None | score -> (* Tie break by shortest string first, then alphabetical. *) Some (score, String.length item, item)) |> List.sort ~compare:[%compare: int * int * string] |> List.map ~f:Tuple3.get3 ;; let search' query ~items = let items_by_score = Array.filter_map items ~f:(fun item -> match score query ~item with | 0 -> None | score -> (* Tie break by shortest string first, then alphabetical. *) Some (score, String.length item, item)) in Array.sort items_by_score ~compare:[%compare: int * int * string]; Array.map items_by_score ~f:Tuple3.get3 ;;
sectionYPositions = computeSectionYPositions($el), 10)"
x-init="setTimeout(() => sectionYPositions = computeSectionYPositions($el), 10)"
>