Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Source file zone.ml
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563(* Functions for parsing time zone database files (zic files).
A time zone file consists (conceptually - the representation is more
compact) of an ordered list of (Time.t * [local_time_type]) that mark
the boundaries (marked from the epoch) at which various time adjustment
regimes are in effect. This can also be thought of as breaking down all
time past the epoch into ranges with a [local_time_type] that describes the
offset from GMT to apply to each range to get local time.
*)openImportopenStd_internalopen!Int.Replace_polymorphic_compareincludeZone_intfexceptionInvalid_file_formatofstring[@@derivingsexp]moduleStable=structmoduleFull_data=structmoduleV1=structmoduleIndex=structtypet=intletnext=Int.succletprev=Int.predletbefore_first_transition=-1(* Some existing clients expect [index >= 0], so we never serialize a negative
index. This conversion can be removed if new stable versions are minted. *)letto_externalt=max0t(* When the index of a time zone with no transitions is converted via to_external,
its value becomes 0 even though its transition array is empty (and it should
have been -1). When the converted value is changed back to a Zone.t through
of_external, returning this value for its index could result in unsafe array
accesses to the transition array of the zone (since there is no transition at
index 0). Also, it does not make sense to keep the converted index because it
is intended to be a mutable value used for caching. So of_external always sets
the index to -1, which is a safe value. *)letof_external(_:t)=-1includeBinable.Of_binable_without_uuid[@alert"-legacy"](Int)(structtypet=intletto_binable=to_externalletof_binable=of_externalend)includeSexpable.Of_sexpable(Int)(structtypet=intletto_sexpable=to_externalletof_sexpable=of_externalend)endmoduleRegime=structtypet={utc_offset_in_seconds:Int63.Stable.V1.t;is_dst:bool;abbrv:string}[@@derivingbin_io,sexp]end(* holds information about when leap seconds should be applied - unused
because we are translating based on a epoch system clock (see the Core_zone
documentation). *)moduleLeap_second=structtypet={time_in_seconds_since_epoch:Int63.Stable.V1.t;seconds:int}[@@derivingbin_io,sexp]endmoduleTransition=structtypet={start_time_in_seconds_since_epoch:Int63.Stable.V1.t;new_regime:Regime.t}[@@derivingbin_io,sexp]endtypet={name:string;original_filename:stringoption;digest:Md5.As_binary_string.toption;transitions:Transition.tarray;(* caches the index of the last transition we used to make lookups faster *)mutablelast_regime_index:Index.t;default_local_time_type:Regime.t;leap_seconds:Leap_second.tlist}[@@derivingbin_io,sexp](* this relies on zones with the same name having the same transitions *)letcomparet1t2=String.comparet1.namet2.nameletoriginal_filenamezone=zone.original_filenameletdigestzone=zone.digestmoduleZone_file:sigvalinput_tz_file:zonename:string->filename:string->tend=structletbool_of_inti=i<>0letinput_long_as_int32=letlong=Bytes.create4inletint32_of_charchr=Int32.of_int_exn(int_of_charchr)infunic->In_channel.really_input_exnic~buf:long~pos:0~len:4;letsb1=Int32.shift_left(int32_of_char(Bytes.getlong0))24inletsb2=Int32.shift_left(int32_of_char(Bytes.getlong1))16inletsb3=Int32.shift_left(int32_of_char(Bytes.getlong2))8inletsb4=int32_of_char(Bytes.getlong3)inInt32.bit_or(Int32.bit_orsb1sb2)(Int32.bit_orsb3sb4);;(* Note that this is only safe to use on numbers that will fit into a 31-bit
int. UNIX timestamps won't, for example. In our case this is only used
to hold small numbers that are never interpreted as timestamps. *)letinput_long_as_intic=Int32.to_int_exn(input_long_as_int32ic)letinput_long_as_int63ic=Int63.of_int32(input_long_as_int32ic)letinput_long_long_as_int63ic=letint63_of_charchr=Int63.of_int_exn(int_of_charchr)inletshiftcbits=Int63.shift_left(int63_of_charc)bitsinletlong_long=Bytes.create8inIn_channel.really_input_exnic~buf:long_long~pos:0~len:8;letresult=shift(Bytes.getlong_long0)56inletresult=Int63.bit_orresult(shift(Bytes.getlong_long1)48)inletresult=Int63.bit_orresult(shift(Bytes.getlong_long2)40)inletresult=Int63.bit_orresult(shift(Bytes.getlong_long3)32)inletresult=Int63.bit_orresult(shift(Bytes.getlong_long4)24)inletresult=Int63.bit_orresult(shift(Bytes.getlong_long5)16)inletresult=Int63.bit_orresult(shift(Bytes.getlong_long6)8)inletresult=Int63.bit_orresult(int63_of_char(Bytes.getlong_long7))inresult;;letinput_listic~len~f=letrecloopclst=ifc>0thenloop(c-1)(fic::lst)elseList.revlstinlooplen[];;letinput_arrayic~len~f=Array.of_list(input_listic~len~f)letinput_regimeic=letutc_offset_in_seconds=input_long_as_int63icinletis_dst=bool_of_int(Option.value_exn(In_channel.input_byteic))inletabbrv_index=Option.value_exn(In_channel.input_byteic)inletltabbrv={Regime.utc_offset_in_seconds;is_dst;abbrv}inlt,abbrv_index;;letinput_abbreviationsic~len=letraw_abbrvs=input_listic~len~f:(funic->Option.value_exn(In_channel.input_charic))inletbuf=Buffer.createleninlet_,indexed_abbrvs=List.foldraw_abbrvs~init:(0,Map.Poly.empty)~f:(fun(index,abbrvs)c->matchcwith|'\000'->letdata=Buffer.contentsbufinletnext_index=index+String.lengthdata+1inletabbrvs=Map.setabbrvs~key:index~datainBuffer.clearbuf;next_index,abbrvs|c->Buffer.add_charbufc;index,abbrvs)inifBuffer.lengthbuf<>0thenraise(Invalid_file_format"missing \000 terminating character in input_abbreviations");indexed_abbrvs;;letinput_tz_file_gen~input_transition~input_leap_secondic=letutc_local_count=input_long_as_inticinletstd_wall_count=input_long_as_inticinletleap_count=input_long_as_inticinlettransition_count=input_long_as_inticinlettype_count=input_long_as_inticinletabbrv_char_count=input_long_as_inticinlettransition_times=input_listic~f:input_transition~len:transition_countinlettransition_indices=input_listic~f:(funic->Option.value_exn(In_channel.input_byteic))~len:transition_countinletregimes=input_listic~f:input_regime~len:type_countinletabbreviations=input_abbreviationsic~len:abbrv_char_countinletleap_seconds=input_listic~f:input_leap_second~len:leap_countin(* The following two arrays indicate two boolean values per regime that
represent a three-value type that would translate to:
type transition_type = UTC | Standard | Wall_clock
However, these are only used by the system library when handling the case where the
TZ variable is set, not to a time zone name, but instead is of the form:
TZ = "std offset dst offset, rule"
Which is deeply obscure, and almost certainly a mistake to use. This library makes
no pretense about handling this case. We continue to read them in for
completeness, and because it's possible that we will later discover a case where
they are used. *)let_std_wall_indicators=input_arrayic~len:std_wall_count~f:(funic->bool_of_int(Option.value_exn(In_channel.input_byteic)))inlet_utc_local_indicators=input_arrayic~len:utc_local_count~f:(funic->bool_of_int(Option.value_exn(In_channel.input_byteic)))inletregimes=Array.of_list(List.mapregimes~f:(fun(lt,abbrv_index)->letabbrv=Map.find_exnabbreviationsabbrv_indexinltabbrv))inletraw_transitions=List.map2_exntransition_timestransition_indices~f:(funtimeindex->letregime=regimes.(index)intime,regime)inlettransitions=letrecmake_transitionsaccl=matchlwith|[]->Array.of_list(List.revacc)|(start_time_in_seconds_since_epoch,new_regime)::rest->make_transitions({Transition.start_time_in_seconds_since_epoch;new_regime}::acc)restinmake_transitions[]raw_transitionsinletdefault_local_time_type=matchArray.findregimes~f:(funr->notr.Regime.is_dst)with|None->regimes.(0)|Someltt->lttinfunname~original_filename~digest->{name;original_filename=Someoriginal_filename;digest=Somedigest;transitions;last_regime_index=Index.before_first_transition;default_local_time_type;leap_seconds};;letinput_leap_second_gen~input_leap_secondic=lettime_in_seconds_since_epoch=input_leap_secondicinletseconds=input_long_as_inticin{Leap_second.time_in_seconds_since_epoch;seconds};;letread_headeric=letmagic=letbuf=Bytes.create4inIn_channel.really_input_exnic~buf~pos:0~len:4;Bytes.unsafe_to_string~no_mutation_while_string_reachable:bufinifnot(String.equalmagic"TZif")thenraise(Invalid_file_format"magic characters TZif not present");letversion=matchIn_channel.input_charicwith|Some'\000'->`V1|Some'2'->`V2|Some'3'->`V3|None->raise(Invalid_file_format"expected version, found nothing")|Somebad_version->raise(Invalid_file_format(sprintf"version (%c) is invalid"bad_version))in(* space reserved for future use in the format *)In_channel.really_input_exnic~buf:(Bytes.create15)~pos:0~len:15;version;;letinput_tz_file_v1ic=letinput_leap_second=input_leap_second_gen~input_leap_second:input_long_as_int63ininput_tz_file_gen~input_transition:input_long_as_int63~input_leap_secondic;;(*
version 2 timezone files have the format:
part 1 - exactly the same as v1
part 2 - same format as v1, except that 8 bytes are used to store
transition times and leap seconds
part 3 - a newline-encloded, POSIX-TZ-environment-variable-style
string for use in handling instants after the last transition time
stored in the file (with nothing between the newlines if there is no
POSIX representation for such instants)
We handle files in this format by parsing the first part exactly as a v1
timezone file and then continuing to parse with 64bit reading functions in the
right places.
Version 3 timezone files are the same as version 2, except the
POSIX-TZ-environment-variable-style string in part 3 may use two minor
extensions to the POSIX TZ format (the hours part of its transition
times may be signed and range from -167 through 167 instead of the
POSIX-required unsigned values from 0 through 24; and DST is in effect all
year if it starts January 1 at 00:00 and ends December 31 at 24:00 plus the
difference between daylight saving and standard time).
As we don't actually do anything with part 3 anyway, we can just read v3
files as v2.
*)letinput_tz_file_v2_or_v3~versionic=let(_:string->original_filename:string->digest:Md5_lib.t->t)=input_tz_file_v1icin(* the header is fully repeated *)assert([%compare.equal:[`V1|`V2|`V3]](read_headeric)version);letinput_leap_second=input_leap_second_gen~input_leap_second:input_long_long_as_int63ininput_tz_file_gen~input_transition:input_long_long_as_int63~input_leap_secondic;;letinput_tz_file~zonename~filename=tryprotectx(In_channel.createfilename)~finally:In_channel.close~f:(funic->letmake_zone=matchread_headericwith|`V1->input_tz_file_v1ic|(`V2|`V3)asversion->input_tz_file_v2_or_v3~versionicinletdigest=Md5.digest_file_blockingfilenameinletr=make_zonezonename~original_filename:filename~digestinr)with|Invalid_file_formatreason->raise(Invalid_file_format(sprintf"%s - %s"filenamereason));;endletof_utc_offset_explicit_name~name~hours:offset=assert(offset>=-24&&offset<=24);letutc_offset_in_seconds=Int63.of_int(offset*60*60)in{name;original_filename=None;digest=None;transitions=[||];last_regime_index=Index.before_first_transition;default_local_time_type={Regime.utc_offset_in_seconds;is_dst=false;abbrv=name};leap_seconds=[]};;letof_utc_offset~hours:offset=letname=ifoffset=0then"UTC"elsesprintf"UTC%s%d"(ifoffset<0then"-"else"+")(absoffset)inof_utc_offset_explicit_name~name~hours:offset;;endendendincludeStable.Full_data.V1letsexp_of_tt=Sexp.Atomt.nameletlikely_machine_zones=ref["America/New_York";"Europe/London";"Asia/Hong_Kong";"America/Chicago"];;letinput_tz_file=Zone_file.input_tz_fileletutc=of_utc_offset~hours:0letnamezone=zone.nameletreset_transition_cachet=t.last_regime_index<-Index.before_first_transition(* Raises if [index >= Array.length t.transitions] *)letget_regime_exntindex=ifindex<0thent.default_local_time_typeelset.transitions.(index).new_regime;;(* In "absolute mode", a number of seconds is interpreted as an offset of that many
seconds from the UNIX epoch, ignoring leap seconds.
In "date and ofday mode", you interpret the number of seconds as a number of days in
combination with a number of seconds since midnight, which gives you a calendar day and
a clock face time. Then you take the time that those represent in some relevant
timezone.
Of course, if the timezone in question has DST shifts, the date and ofday might
represent two or zero times. These times will be interpreted according to either the
previous UTC offset or the next one, in a way whose precise details you probably
shouldn't depend on.
(For the curious, what we do is: compute the "relative time" of the shift according to
the new regime, and assign relative times to the old regime or new regime depending on
which side of the shift time they occur. Since this amounts to using the old regime
when the clocks move forward and the new regime when the clocks move back, it's
equivalent to calculating the corresponding Time.t's relative to both the old and the
new regime and picking the one that occurs later. Yes, later. I had to draw a diagram
to persuade myself that it's that way round, but it is.)
*)moduleMode=structtypet=|Absolute|Date_and_ofdayendleteffective_start_time~mode(x:Transition.t)=letopenInt63.Oinmatch(mode:Mode.t)with|Absolute->x.start_time_in_seconds_since_epoch|Date_and_ofday->x.start_time_in_seconds_since_epoch+x.new_regime.utc_offset_in_seconds;;letindex_lower_bound_contains_seconds_since_epochtindex~modeseconds=index<0||Int63.(>=)seconds(effective_start_time~modet.transitions.(index));;letindex_upper_bound_contains_seconds_since_epochtindex~modeseconds=index+1>=Array.lengtht.transitions||Int63.(<)seconds(effective_start_time~modet.transitions.(index+1));;letbinary_search_index_of_seconds_since_epocht~modeseconds:Index.t=Array.binary_search_segmentedt.transitions`Last_on_left~segment_of:(funtransition->ifInt63.(<=)(effective_start_timetransition~mode)secondsthen`Leftelse`Right)|>Option.value~default:Index.before_first_transition;;letindex_of_seconds_since_epocht~modeseconds=letindex=letindex=t.last_regime_indexinifnot(index_lower_bound_contains_seconds_since_epochtindex~modeseconds)(* time is before cached index; try previous index *)then(letindex=index-1inifnot(index_lower_bound_contains_seconds_since_epochtindex~modeseconds)(* time is before previous index; fall back on binary search *)thenbinary_search_index_of_seconds_since_epocht~modeseconds(* time is before cached index and not before previous, so within previous *)elseindex)elseifnot(index_upper_bound_contains_seconds_since_epochtindex~modeseconds)(* time is after cached index; try next index *)then(letindex=index+1inifnot(index_upper_bound_contains_seconds_since_epochtindex~modeseconds)(* time is after next index; fall back on binary search *)thenbinary_search_index_of_seconds_since_epocht~modeseconds(* time is after cached index and not after next, so within next *)elseindex(* time is within cached index *))elseindexint.last_regime_index<-index;index;;moduleTime_in_seconds:sigincludeZone_intf.Time_in_secondsend=structmoduleSpan=structtypet=Int63.tletof_int63_seconds=Fn.idletto_int63_seconds_round_down_exn=Fn.idendmoduleAbsolute=structtypet=Int63.tletof_span_since_epoch=Fn.idletto_span_since_epoch=Fn.idendmoduleDate_and_ofday=structtypet=Int63.tletof_synthetic_span_since_epoch=Fn.idletto_synthetic_span_since_epoch=Fn.idendincludeAbsoluteendletindexttime=Time_in_seconds.to_span_since_epochtime|>Time_in_seconds.Span.to_int63_seconds_round_down_exn|>index_of_seconds_since_epocht~mode:Absolute;;letindex_of_date_and_ofdayttime=Time_in_seconds.Date_and_ofday.to_synthetic_span_since_epochtime|>Time_in_seconds.Span.to_int63_seconds_round_down_exn|>index_of_seconds_since_epocht~mode:Date_and_ofday;;letindex_has_prev_clock_shifttindex=index>=0&&index<Array.lengtht.transitionsletindex_has_next_clock_shifttindex=index_has_prev_clock_shiftt(index+1)letindex_prev_clock_shift_time_exntindex=lettransition=t.transitions.(index)intransition.start_time_in_seconds_since_epoch|>Time_in_seconds.Span.of_int63_seconds|>Time_in_seconds.of_span_since_epoch;;letindex_next_clock_shift_time_exntindex=index_prev_clock_shift_time_exnt(index+1)letindex_prev_clock_shift_amount_exntindex=lettransition=t.transitions.(index)inletafter=transition.new_regimeinletbefore=ifindex=0thent.default_local_time_typeelset.transitions.(index-1).new_regimeinInt63.(-)after.utc_offset_in_secondsbefore.utc_offset_in_seconds|>Time_in_seconds.Span.of_int63_seconds;;letindex_next_clock_shift_amount_exntindex=index_prev_clock_shift_amount_exnt(index+1);;letindex_abbreviation_exntindex=letregime=get_regime_exntindexinregime.abbrv;;letindex_offset_from_utc_exntindex=letregime=get_regime_exntindexinTime_in_seconds.Span.of_int63_secondsregime.utc_offset_in_seconds;;