Module `Stringx`Source

Sourcemodule Levenshtein : sig ... end

Sourceval center : string -> int -> string -> string

center s len pad centers s in a string of length len, padding with pad. If s is longer than len, it is returned unchanged. Padding is inserted symmetrically. pad must be non-empty or it is ignored.

This function is Unicode-aware and counts characters, not bytes. If pad is multibyte, it is repeated and truncated as needed.

Examples:

center "hello" 10 " " returns " hello "
center "abc" 7 "あ" returns "ああabcああ"

parameter s
The string to center (UTF-8)

parameter len
The total length (in Unicode characters) of the result

parameter pad
The padding string (UTF-8, non-empty)

returns
The centered string

Sourceval count : string -> string -> int

count str pattern counts how many Unicode characters in str match pattern.

The pattern supports:

character sets: e.g., "aeiou"
ranges: e.g., "a-k", "あ-ん"
negation with ^: e.g., "^a-k", "^0-9"

This function is Unicode-aware and handles UTF-8 properly.

Examples:

count "hello" "aeiou" returns 2
count "abc123" "^a-z" returns 3
count "こんにちは" "あ-ん" returns 5

parameter str
The input string (UTF-8)

parameter pattern
The character pattern (see above)

returns
The number of matching characters

Sourceval delete : string -> string -> string

delete str pattern removes all Unicode characters in str that match pattern.

The pattern supports:

character sets: e.g., "aeiou"
ranges: e.g., "a-k", "あ-ん"
negation with ^: e.g., "^a-k", "^0-9"

This function is Unicode-aware and handles UTF-8 properly.

Examples:

delete "hello" "aeiou" returns "hll"
delete "こんにちは" "こ" returns "んにちは"
delete "abc123" "^a-z" returns "abc"

parameter str
The input string (UTF-8)

parameter pattern
The character pattern (see above)

returns
The string with matched characters removed

Sourceval len : string -> int

len str returns the number of Unicode code points (runes) in UTF-8 string str.

This function is Unicode-aware and counts characters, not bytes.

Examples:

len "hello" returns 5
len "こんにちは" returns 5
len "🍎🍏🍊" returns 3

parameter str
The input string (UTF-8)

returns
The number of Unicode code points in str

Sourceval reverse : string -> string

reverse s reverses a UTF-8 encoded string s.

This function is Unicode-aware and reverses by code points, not bytes.

Examples:

reverse "hello" returns "olleh"
reverse "こんにちは" returns "はちにんこ"
reverse "🍎🍏🍊" returns "🍊🍏🍎"

parameter s
The input string (UTF-8)

returns
The reversed string

Sourceval contains : string -> string -> bool

contains s substr reports whether substr is within s.

Returns true if substr is the empty string, or if substr occurs anywhere in s. Returns false otherwise.

This function is Unicode-agnostic and operates on bytes, not code points.

Examples:

contains "seafood" "foo" returns true
contains "seafood" "bar" returns false
contains "seafood" "" returns true
contains "" "" returns true

parameter s
The input string

parameter substr
The substring to search for

returns
true if substr is found in s, false otherwise

Sourceval contains_any : string -> string -> bool

contains_any s chars reports whether any Unicode code points in chars are within s.

Returns false if chars is empty. Unicode-aware and compares by code points.

Examples:

contains_any "team" "i" returns false
contains_any "fail" "ui" returns true
contains_any "ure" "ui" returns true
contains_any "failure" "ui" returns true
contains_any "foo" "" returns false
contains_any "" "" returns false

parameter s
The input string (UTF-8)

parameter chars
The set of Unicode code points to search for (UTF-8)

returns
true if any code point in chars is found in s, false otherwise

Sourceval has_prefix : string -> string -> bool

has_prefix s prefix reports whether the string s begins with prefix.

Returns true if prefix is the empty string, or if s starts with prefix. Returns false otherwise.

This function is Unicode-agnostic and operates on bytes, not code points.

Examples:

has_prefix "Gopher" "Go" returns true
has_prefix "Gopher" "C" returns false
has_prefix "Gopher" "" returns true

parameter s
The input string

parameter prefix
The prefix to test

returns
true if s starts with prefix, false otherwise

Sourceval has_suffix : string -> string -> bool

has_suffix s suffix reports whether the string s ends with suffix.

Returns true if suffix is the empty string, or if s ends with suffix. Returns false otherwise.

This function is Unicode-agnostic and operates on bytes, not code points.

Examples:

has_suffix "Amigo" "go" returns true
has_suffix "Amigo" "O" returns false
has_suffix "Amigo" "Ami" returns false
has_suffix "Amigo" "" returns true

parameter s
The input string

parameter suffix
The suffix to test

returns
true if s ends with suffix, false otherwise

Sourceval count_substring : string -> string -> int

count_substring s substr counts the number of non-overlapping instances of substr in s.

If substr is the empty string, returns 1 + the number of Unicode code points in s.

This function is Unicode-agnostic and operates on bytes, not code points.

Examples:

count_substring "cheese" "e" returns 3
count_substring "five" "" returns 5
count_substring "banana" "na" returns 2
count_substring "aaaaa" "aa" returns 2
count_substring "" "" returns 1
count_substring "" "a" returns 0

parameter s
The input string

parameter substr
The substring to count

returns
The number of non-overlapping instances of substr in s

Sourceval equal_fold : string -> string -> bool

equal_fold s t reports whether s and t, interpreted as UTF-8 strings, are equal under simple Unicode case-folding (ASCII only).

This is a simple case-insensitive comparison for ASCII letters only. (It does not perform full Unicode case folding.)

Examples:

equal_fold "Go" "go" returns true
equal_fold "AB" "ab" returns true
equal_fold "ß" "ss" returns false

parameter s
The first string (UTF-8)

parameter t
The second string (UTF-8)

returns
true if s and t are equal under simple case folding, false otherwise

Sourceval fields : string -> string list

fields s splits the string s around each instance of one or more consecutive Unicode whitespace characters, returning a list of substrings of s or an empty list if s contains only whitespace.

Whitespace is defined by Unicode (see is_space).

Examples:

fields " foo bar baz " returns ["foo"; "bar"; "baz"]
fields " " returns []
fields "a\tb\nc" returns ["a"; "b"; "c"]

parameter s
The input string (UTF-8)

returns
List of non-whitespace substrings of s

Sourceval fields_func : string -> (Uchar.t -> bool) -> string list

fields_func s f splits the string s at each run of Unicode code points c satisfying f c, returning a list of substrings of s or an empty list if all code points in s satisfy f or s is empty.

Examples:

fields_func " foo1;bar2,baz3..." (fun c -> not (is_letter c || is_number c)) returns ["foo1"; "bar2"; "baz3"]

parameter s
The input string (UTF-8)

parameter f
The predicate function on Unicode code points

returns
List of non-separator substrings of s

Sourceval index : string -> string -> int

index s substr returns the index of the first instance of substr in s, or -1 if substr is not present.

The index is a byte offset (not code point index).

Examples:

index "chicken" "ken" returns 4
index "chicken" "dmr" returns -1
index "abc" "" returns 0
index "" "" returns 0
index "" "a" returns -1

parameter s
The input string

parameter substr
The substring to search for

returns
The byte index of the first occurrence, or -1 if not found

Sourceval repeat : string -> int -> string

repeat s count returns a new string consisting of count copies of s.

Raises Invalid_argument if count is negative.

Examples:

repeat "na" 2 returns "nana"
repeat "🍎" 3 returns "🍎🍎🍎"
repeat "" 5 returns ""
repeat "a" 0 returns ""
repeat "abc" (-1) raises Invalid_argument

parameter s
The string to repeat

parameter count
The number of times to repeat s

returns
The repeated string

Sourceval join : string list -> string -> string

join elems sep concatenates the elements of elems, inserting sep between each element.

Returns the empty string if elems is empty.

Examples:

join ["foo"; "bar"; "baz"] ", " returns "foo, bar, baz"
join [] ", " returns ""
join ["a"] ", " returns "a"

parameter elems
The list of strings to join

parameter sep
The separator string

returns
The joined string

Sourceval trim : string -> string -> string

trim s cutset returns s with all leading and trailing Unicode code points contained in cutset removed.

This function is Unicode-aware and trims by code points, not bytes.

Examples:

trim "¡¡¡Hello, Camels!!!" "!¡" returns "Hello, Camels"

parameter s
The input string (UTF-8)

parameter cutset
The set of Unicode code points to trim (UTF-8)

returns
The trimmed string

Sourceval trim_func : string -> (Uchar.t -> bool) -> string

trim_func s f returns s with all leading and trailing Unicode code points c satisfying f c removed.

This function is Unicode-aware and trims by code points, not bytes.

Examples:

trim_func "¡¡¡Hello, Camels!!!" (fun c -> not (is_letter c || is_number c)) returns "Hello, Camels"

parameter s
The input string (UTF-8)

parameter f
The predicate function on Unicode code points

returns
The trimmed string

Sourceval trim_left : string -> string -> string

trim_left s cutset returns s with all leading Unicode code points contained in cutset removed.

This function is Unicode-aware and trims by code points, not bytes.

Examples:

trim_left "¡¡¡Hello, Camels!!!" "!¡" returns "Hello, Camels!!!"

parameter s
The input string (UTF-8)

parameter cutset
The set of Unicode code points to trim (UTF-8)

returns
The trimmed string

Sourceval trim_left_func : string -> (Uchar.t -> bool) -> string

trim_left_func s f returns s with all leading Unicode code points c satisfying f c removed.

This function is Unicode-aware and trims by code points, not bytes.

Examples:

trim_left_func "¡¡¡Hello, Camels!!!" (fun c -> not (is_letter c || is_number c)) returns "Hello, Camels!!!"

parameter s
The input string (UTF-8)

parameter f
The predicate function on Unicode code points

returns
The trimmed string

Sourceval trim_right : string -> string -> string

trim_right s cutset returns s with all trailing Unicode code points contained in cutset removed.

This function is Unicode-aware and trims by code points, not bytes.

Examples:

trim_right "¡¡¡Hello, Camels!!!" "!¡" returns "¡¡¡Hello, Camels"

parameter s
The input string (UTF-8)

parameter cutset
The set of Unicode code points to trim (UTF-8)

returns
The trimmed string

Sourceval trim_right_func : string -> (Uchar.t -> bool) -> string

trim_right_func s f returns s with all trailing Unicode code points c satisfying f c removed.

This function is Unicode-aware and trims by code points, not bytes.

Examples:

trim_right_func "¡¡¡Hello, Camels!!!" (fun c -> not (is_letter c || is_number c)) returns "¡¡¡Hello, Camels"

parameter s
The input string (UTF-8)

parameter f
The predicate function on Unicode code points

returns
The trimmed string

Sourceval trim_space : string -> string

trim_space s returns s with all leading and trailing Unicode whitespace removed.

This function is Unicode-aware and trims by code points, not bytes. Whitespace is defined by Unicode (see is_space).

Examples:

trim_space " \t\n Hello, Camels \n\t\r\n" returns "Hello, Camels"

parameter s
The input string (UTF-8)

returns
The trimmed string

Sourceval trim_suffix : string -> string -> string

trim_suffix s suffix returns s without the provided trailing suffix string. If s does not end with suffix, s is returned unchanged.

This function is byte-based, not Unicode-aware.

Examples:

trim_suffix "¡¡¡Hello, Camels!!!" ", Camels!!!" returns "¡¡¡Hello"
trim_suffix "¡¡¡Hello, Camels!!!" ", Marmots!!!" returns "¡¡¡Hello, Camels!!!"
trim_suffix "abc" "" returns "abc"

parameter s
The input string

parameter suffix
The suffix to remove

returns
s without the trailing suffix, or s if suffix is not present

Sourceval to_lower : string -> string

to_lower s returns s with all Unicode letters mapped to their lower case.

This function currently only lowercases ASCII letters (A-Z). Unicode-aware lowercasing is not yet implemented.

Examples:

to_lower "Camel" returns "camel"
to_lower "CAMEL" returns "camel"
to_lower "こんにちは" returns "こんにちは"

parameter s
The input string (UTF-8)

returns
The lowercased string

Sourceval to_title : string -> string

to_title s returns s with all Unicode letters mapped to their Unicode title case.

Currently, only ASCII letters are supported (A-Z, a-z). TODO: Support full Unicode title case in the future.

Examples:

to_title "her royal highness" returns "HER ROYAL HIGHNESS"
to_title "loud noises" returns "LOUD NOISES"
to_title "брат" returns "брат"

parameter s
The input string (UTF-8)

returns
The title-cased string

Sourceval to_upper : string -> string

to_upper s returns s with all Unicode letters mapped to their upper case.

This function currently only uppercases ASCII letters (a-z). TODO: Support full Unicode uppercasing in the future.

Examples:

to_upper "Camel" returns "CAMEL"
to_upper "camel" returns "CAMEL"
to_upper "こんにちは" returns "こんにちは"

parameter s
The input string (UTF-8)

returns
The uppercased string

Sourceval to_camel_case : string -> string

Convert words separated by space, underscore, or hyphen to camel case.

Words are split on '_', '-', or space.
The first word is lowercased (even if originally all uppercase).
Subsequent words are capitalized (first letter uppercase, rest lowercase).
All-uppercase words are handled (e.g. "GOLANG_IS_GREAT" → "golangIsGreat").
If there are no separators, the original string is returned (e.g. "alreadyCamel" → "alreadyCamel").
Leading and trailing underscores are preserved (e.g. "_complex__case_" → "_complexCase_").
Multiple consecutive separators are treated as a single word boundary.
Hyphens and spaces are also treated as word boundaries.

Examples:

to_camel_case "some_words" = "someWords"
to_camel_case "_complex__case_" = "_complexCase_"
to_camel_case "OCAML_IS_GREAT" = "ocamlIsGreat"
to_camel_case "alreadyCamel" = "alreadyCamel"
to_camel_case "foo-BarBaz" = "fooBarBaz"
to_camel_case "word" = "word"
to_camel_case "" = ""

Sourceval to_kebab_case : string -> string

to_kebab_case s converts a string to kebab-case.

Uppercase ASCII letters are converted to lowercase.
Word boundaries are detected at transitions from lowercase to uppercase, from letter to digit, and at underscores, spaces, or hyphens.
All word boundaries are replaced with a single hyphen '-'.
Multiple consecutive separators are treated as a single hyphen.
Leading and trailing hyphens are removed.
If the input is empty, returns the empty string.

Examples:

to_kebab_case "FirstName" = "first-name"
to_kebab_case "HTTPServer" = "http-server"
to_kebab_case "NoHTTPS" = "no-https"
to_kebab_case "GO_PATH" = "go-path"
to_kebab_case "GO PATH" = "go-path"
to_kebab_case "GO-PATH" = "go-path"
to_kebab_case "http2xx" = "http-2xx"
to_kebab_case "HTTP20xOK" = "http-20x-ok"
to_kebab_case "Duration2m3s" = "duration-2m-3s"
to_kebab_case "Bld4Floor3rd" = "bld4-floor-3rd"
to_kebab_case "abc" = "abc"
to_kebab_case "A" = "a"
to_kebab_case "FooBarBaz" = "foo-bar-baz"
to_kebab_case "" = ""

Sourceval to_pascal_case : string -> string

Convert words separated by space, underscore, or hyphen to PascalCase.

Words are split on '_', '-', or space.
Each word is capitalized (first letter uppercase, rest lowercase).
All-uppercase words are handled (e.g. "OCAML_IS_GREAT" → "OcamlIsGreat").
If there are no separators, the first letter is uppercased, the rest are unchanged.
Leading and trailing underscores and separators are removed in the output.
Multiple consecutive separators are treated as a single word boundary.
Hyphens and spaces are also treated as word boundaries. Examples:
to_pascal_case "some_words" = "SomeWords"
to_pascal_case "_complex__case_" = "ComplexCase"
to_pascal_case "OCAML_IS_GREAT" = "OcamlIsGreat"
to_pascal_case "alreadyPascal" = "AlreadyPascal"
to_pascal_case "foo-BarBaz" = "FooBarBaz"
to_pascal_case "word" = "Word"
to_pascal_case "" = ""

Sourceval to_snake_case : string -> string

to_snake_case s converts a string to snake_case.

Uppercase ASCII letters are converted to lowercase.
Word boundaries are detected at transitions from lowercase to uppercase, from letter to digit, and at underscores, spaces, or hyphens.
All word boundaries are replaced with a single underscore '_'.
Multiple consecutive separators are treated as a single underscore.
Leading and trailing underscores are removed.
If the input is empty, returns the empty string.

Examples:

to_snake_case "FirstName" = "first_name"
to_snake_case "HTTPServer" = "http_server"
to_snake_case "NoHTTPS" = "no_https"
to_snake_case "GO_PATH" = "go_path"
to_snake_case "GO PATH" = "go_path"
to_snake_case "GO-PATH" = "go_path"
to_snake_case "http2xx" = "http_2xx"
to_snake_case "HTTP20xOK" = "http_20x_ok"
to_snake_case "Duration2m3s" = "duration_2m3s"
to_snake_case "Bld4Floor3rd" = "bld4_floor_3rd"

package stringx

Module StringxSource

Module `Stringx`Source