Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Source file git.ml
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563(* Yoann Padioleau
*
* Copyright (C) 2009 Yoann Padioleau
* Copyright (C) 2010-2012 Facebook
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* version 2.1 as published by the Free Software Foundation, with the
* special exception on linking described in file license.txt.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file
* license.txt for more details.
*)openCommonmoduleDate=Common2openLib_vcs(*****************************************************************************)(* Prelude *)(*****************************************************************************)(*****************************************************************************)(* Types, globals *)(*****************************************************************************)letext_git_annot_cache=".git_annot"(*****************************************************************************)(* Wrappers *)(*****************************************************************************)letpr2,_pr2_once=Common2.mk_pr2_wrappersFlag_version_control.verbose(*****************************************************************************)(* Helpers *)(*****************************************************************************)(* this may loop forever ... better to realpath +> split "/" and then
* process I think. Also dangerous. I think it would be good
* also when walking the parents to check if there is a .svn or .hg
* and whatever and then raise an exception
*
* let parent_path_with_dotgit_opt subdir =
* let subdir = Common.relative_to_absolute subdir in
* let rec aux subdir =
* if Sys.file_exists (Filename.concat subdir "/.git")
* then Some subdir
* else
* let parent = Common.dirname subdir in
* if parent = "/"
* then None
* else aux parent
* in
* aux subdir
*
* let parent_path_with_dotgit a =
* Common.some (parent_path_with_dotgit_opt a)
*
* todo: walking of the parent (subject to GIT_CEILING_DIRS)
*)letis_git_repositorybasedir=Version_control.detect_vcs_source_treebasedir=*=Some(Version_control.Git)letfind_root_from_absolute_pathfile=letxs=Common.split"/"fileinletxxs=Common2.initsxsinxxs+>List.rev+>Common.find_some(funxs->letdir="/"^Common.join"/"xsinletgitdir=Filename.concatdir".git"inifSys.file_existsgitdirthenSomedirelseNone)letcleanup_cache_filesdir=letcache_ext=[ext_git_annot_cache]incache_ext+>List.iter(funext->letfiles=Common2.files_of_dir_or_files_no_vcsext[dir]infiles+>List.iter(funfile->assert(Common2.filesuffixfile=ext);pr2file;Common.command2(spf"rm -f %s"file);));()letclean_git_patchxs=xs+>Common.exclude(funs->s=~"^index[ \t]"||s=~"^deleted file mode"||s=~"^new file mode"||s=~"^old mode"||s=~"^new mode"||s=~".*No newline at end of file"||false)letexec_cmd~basedirs=letcmd=Lib_vcs.goto_dirbasedir^sinpr2(spf"executing: %s"s);letret=Sys.commandcmdinif(ret<>0)thenfailwith("pb with command: "^s)(*****************************************************************************)(* Single file operations, "command output binding" *)(*****************************************************************************)(* ex:
e7ff626d (Linus Torvalds 2004-09-23 18:49:25 -0700 1)/*
^9a235ca ( pad 2009-11-21 15:50:04 -0800 1) <?php
can use -M and -C to use better tracking algorithm, can detect
move of lines in same file or accross file in same commit.
does git annotate follow rename ?
*)letannotate_regexp="^\\([A-Za-z0-9]+\\)[ \t]+"^"([ \t]*\\(.*\\)"^"[ \t]+"^"\\([0-9]+\\)"^"-"^"\\([0-9]+\\)"^"-"^"\\([0-9]+\\)"^"[ \t]"^"[0-9]+"^":"^"[0-9]+"^":"^"[0-9]+"^"[ \t]"^"[-+]"^"[0-9]+"^"[ \t]+"^"[0-9]+"^")"^".*$"(* the rest is line of code *)(* related? git blame and git pickaxe ? *)letannotate2?(basedir="")?(use_cache=false)?(use_dash_C=true)filename=letfull_filename=Filename.concatbasedirfilenamein(* git blame is really slow, so cache its result *)Common.cache_computation~use_cachefull_filenameext_git_annot_cache(fun()->(* adding -C leads to better information
* adding HEAD so that can get the full information of a file that
* has been modified in the working tree.
*)letcmd=(goto_dirbasedir^spf"git annotate %s HEAD -- \"%s\" 2>&1"(ifuse_dash_Cthen"-C"else"")filename)in(* pr2 cmd; *)(* todo? check status. can have a file not under git in which case we
* get a 'fatal: no such path ... in HEAD
*)let(xs,_status)=Common2.cmd_to_list_and_statuscmdin(*let ys = Common.cat (Common.filename_of_db (basedir,filename)) in*)letannots=xs+>Common.map_filter(funs->ifs=~annotate_regexpthenlet(commitid,author,year,month,day)=Common.matched5sinSome(VersionIdcommitid,Authorauthor,Common2.mk_date_dmy(s_to_iday)(s_to_imonth)(s_to_iyear))elsebeginpr2("git annotate wrong line: "^s);Noneend)in(* files lines are 1_based, so add this dummy 0 entry *)Array.of_list(dummy_annotation::annots))letannotate?basedir?use_cache?use_dash_Ca=Common.profile_code"Git.annotate"(fun()->annotate2?basedir?use_cache?use_dash_Ca)(* ------------------------------------------------------------------------ *)letannotate_raw?(basedir="")filename=letcmd=(goto_dirbasedir^"git annotate HEAD -- "^filename^" 2>&1")in(* pr2 cmd; *)letxs=Common.cmd_to_listcmdin(*let ys = Common.cat (Common.filename_of_db (basedir,filename)) in*)letannots=xs+>Common.map_filter(funs->ifs=~annotate_regexpthenSomeselsebegin(* pr2 ("git annotate wrong line: " ^ s); *)Noneend)in(* files lines are 1_based, so add this dummy 0 entry *)Array.of_list(""::annots)(* ------------------------------------------------------------------------ *)(* ex:
Sat, 31 Dec 2005 15:21:18 +0800
*)letdate_regexp="[A-Za-z]+,"^"[ \t]+"^"\\([0-9]+\\)"^"[ \t]+"^"\\([A-Za-z]+\\)"^"[ \t]+"^"\\([0-9]+\\)"^"[ \t]+"^".*$"letdate_file_creation2?(basedir="")file=(* note: can't use -1 with git log cos it will show only 1 entry, but
* the last one, despite the use of --reverse
*)letcmd=(goto_dirbasedir^"git log --reverse --pretty=format:%aD "^file^" 2>&1")in(* pr2 cmd; *)letxs=Common.cmd_to_listcmdinmatchxswith|s::_xs->ifs=~date_regexpthenlet(day,month_str,year)=matched3sinDate.DMY(Date.Day(s_to_iday),Common2.month_of_stringmonth_str,Date.Year(s_to_iyear))elsefailwith("git log wrong line: "^s)|_->failwith("git log wrong output")letdate_file_creation?basedira=Common.profile_code"Git.date_file"(fun()->date_file_creation2?basedira)(*****************************************************************************)(* Repository operations *)(*****************************************************************************)letbranches~basedir=letcmd=(goto_dirbasedir^"git branch --no-color")inletxs=Common.cmd_to_listcmdinxs+>List.map(funs->ifs=~"[ \t]*\\*[ \t]+\\(.*\\)"thenmatched1selseifs=~"[ \t]+\\(.*\\)"thenmatched1selsefailwith("wrong line in git branch: "^s))letid_and_summary_onelines=ifs=~"\\([^ ]+\\) \\(.*\\)"thenlet(commit,summary)=Common.matched2sinVersionIdcommit,summaryelsefailwith("wrong line in git log: "^s)letcommits?(extra_args="")~basedir()=letcmd=(goto_dirbasedir^(spf"git log --no-color --pretty=oneline %s"extra_args))inletxs=Common.cmd_to_listcmdinxs+>List.mapid_and_summary_onelineletgrep~basedirstr=letcmd=(goto_dirbasedir^(spf"git grep --files-with-matches %s"str))inlet(xs,status)=Common2.cmd_to_list_and_statuscmdin(* According to git grep man page, non-zero exit code is expected when
* there are no matches
*)matchxs,statuswith|[],Unix.WEXITED1->[]|xs,Unix.WEXITED0->xs|_->raise(CmdError(status,(spf"CMD = %s, RESULT = %s"cmd(String.concat"\n"xs))))letshow~basedirfilecommitid=lettmpfile=Common.new_temp_file"git_show"".cat"inletstr_commit=Lib_vcs.s_of_versionidcommitidinletcmd=(spf"git show %s:%s > %s"str_commitfiletmpfile)inexec_cmd~basedircmd;tmpfile(*****************************************************************************)(* single commit operations *)(*****************************************************************************)letcommit_raw_patch~basedircommitid=let(VersionIdscommit)=commitidinletcmd=(goto_dirbasedir^(spf"git show --no-color %s"scommit))inletxs=Common.cmd_to_listcmdinxsletcommit_summary~basedircommitid=let(VersionIdscommit)=commitidinletcmd=(goto_dirbasedir^(* (spf "git show --no-color --pretty=oneline %s" scommit)) in *)(spf"git log --pretty=oneline -1 %s"scommit))inletxs=Common.cmd_to_listcmdinList.hdxs+>id_and_summary_oneline+>sndletcommit_info~basedircommitid=let(VersionIdscommit)=commitidinletcmd=(goto_dirbasedir^(* (spf "git show --no-color --pretty=oneline %s" scommit)) in *)(spf"git log --format='%%b' -1 %s"scommit))inletxs=Common.cmd_to_listcmdinxsletcommit_patch~basedircommitid=let(VersionIdscommit)=commitidinletcmd=(goto_dirbasedir^(spf"git show --no-color %s"scommit))inletxs=Common.cmd_to_listcmdinletxs=clean_git_patchxsinLib_vcs.parse_commit_patchxsletcommit_of_relative_time~basedirrelative_data_string=letcmd=(goto_dirbasedir^(spf"git log --no-color --pretty=oneline --since=\"%s\""relative_data_string))inletxs=Common.cmd_to_listcmdinletlast=Common2.list_lastxsinid_and_summary_onelinelast+>fstletfiles_involved_in_diff~basedircommitid=letstr_commit=Lib_vcs.s_of_versionidcommitidinletcmd=goto_dirbasedir^spf"git show --name-status --pretty=\"format:\" %s"str_commitinletxs=Common.cmd_to_listcmdinassert(List.hdxs="");(* the previous command has a first empty line before the list of files *)List.tlxs+>List.mapLib_vcs.parse_file_status(*****************************************************************************)(* multiple commits operations *)(*****************************************************************************)letcommits_between_commitids~basedir~old_id~recent_id=letcmd=(goto_dirbasedir^(spf"git log --no-color --pretty=oneline %s..%s"(s_of_versionidold_id)(s_of_versionidrecent_id)))inletxs=Common.cmd_to_listcmdinxs+>List.mapid_and_summary_oneline+>List.mapfst+>List.revletfile_to_commits~basedircommits=leth=Common2.hash_with_default(fun()->[])inlettotal=List.lengthcommitsincommits+>Common.index_list_1+>List.iter(fun(vid,cnt)->Common2.log2(spf"patch %d/%d"cnttotal);tryletpatch=commit_patch~basedirvidinlet(_strs,patchinfo)=patchinpatchinfo+>List.iter(fun(filename,fileinfo)->(* TODO use fileinfo *)h#updatefilename(funold->(vid,fileinfo)::old));withe->pr2(spf"PB with patch: %s, exn = %s"(Lib_vcs.s_of_versionidvid)(Common.exn_to_se));(* TODO *));h#to_list(* very useful when have to send automatic diffs to people, to not penalize
* the people who have just refactored the code and are actually not really
* responsible for the code in the file.
*)letrefactoring_commits?(since="--since='1 year ago'")?(threshold=50)repo=letbasedir=Common.fullpathrepoinletcommits=commits~basedir~extra_args:since()inpr2(spf"#commits = %d"(List.lengthcommits));letrefactoring_ids=commits+>Console.progress(funk->List.filter(fun(id,_x)->k();let(Lib_vcs.VersionIdscommit)=idinletcmd=(spf"cd %s; git show --oneline --no-color --stat %s"basedirscommit)inletxs=Common.cmd_to_listcmdin(* basic heuristic: more than N files in a diff => refactoring diff *)List.lengthxs>threshold))inlettmpfile="/tmp/refactoring_diffs.list"inpr2(spf"writing data in %s"tmpfile);Common.with_open_outfiletmpfile(fun(pr,_chan)->refactoring_ids+>List.iter(fun(id,s)->pr2_gen(id,s);pr(spf"%s %s\n"(Lib_vcs.s_of_versionidid)s);););()letparse_skip_revs_filefile=file+>Common.cat+>List.map(funs->ifs=~"^\\([^ ]+\\) "(* git annotate returns commitid of length 8, so must match that *)thenLib_vcs.VersionId(String.sub(Common.matched1s)08)elsefailwith("wrong entry in skiprevs file: "^s))(*****************************************************************************)(* line level operations, preparing commits *)(*****************************************************************************)letapply_patch~basedirpatch_string_list=lettmpfile=Common.new_temp_file"git"".patch"inlets=Common2.unlinespatch_string_listinCommon.write_file~file:tmpfiles;letcmd=(goto_dirbasedir^"git apply "^tmpfile^" 2>&1")inletxs=Common.cmd_to_listcmdinxs+>List.iterpr2;()(* ------------------------------------------------------------------------ *)(*
* Return which person(s) to blame for some deadcode (in fact certain lines).
* Do majority, except a whitelist, and if nothing found then
* do majority of file, and if nothing found (because of whitelist)
* then say "NOBODYTOBLAME"
*
* One improvement suggested by sgrimm is to use git annotate -C (or
* git blame -C) which tries to detect move of code and give a more
* accurate author. See h_version-control/git.ml.
*
* For instance on www/lib/common.php,
* git annotate -C vs git annotate gives:
*
* 138,147c138,147
* < 2ea63cc5 ( jwiseman 2007-07-03 01:39:41 +0000 138) *
* < d6106bdb ( jwiseman 2007-07-05 21:58:37 +0000 139) * @param int $id the id of a user or an object
* < d6106bdb ( jwiseman 2007-07-05 21:58:37 +0000 140) * @param string $exit_fn the function to call when the connection fails
* < d6106bdb ( jwiseman 2007-07-05 21:58:37 +0000 141) * @param array $args arguments to $exit_fn
* < 2ea63cc5 ( jwiseman 2007-07-03 01:39:41 +0000 142) * @return resource a write connection to the specified db
* < 2ea63cc5 ( jwiseman 2007-07-03 01:39:41 +0000 143) * @author jwiseman
* < 2ea63cc5 ( jwiseman 2007-07-03 01:39:41 +0000 144) */
* < d6106bdb ( jwiseman 2007-07-05 21:58:37 +0000 145)function require_write_conn($id, $exit_fn='go_home', $args=null) {
* < 2ea63cc5 ( jwiseman 2007-07-03 01:39:41 +0000 146) $conn_w = id_get_conn($id, 'w');
* < 2ea63cc5 ( jwiseman 2007-07-03 01:39:41 +0000 147) if (!$conn_w) {
* ---
* > effa6f73 ( mcslee 2007-10-18 06:43:09 +0000 138) *
* > effa6f73 ( mcslee 2007-10-18 06:43:09 +0000 139) * @param int $id the id of a user or an object
* > effa6f73 ( mcslee 2007-10-18 06:43:09 +0000 140) * @param string $exit_fn the function to call when the connection fails
* > effa6f73 ( mcslee 2007-10-18 06:43:09 +0000 141) * @param array $args arguments to $exit_fn
* > effa6f73 ( mcslee 2007-10-18 06:43:09 +0000 142) * @return resource a write connection to the specified db
* > effa6f73 ( mcslee 2007-10-18 06:43:09 +0000 143) * @author jwiseman
* > effa6f73 ( mcslee 2007-10-18 06:43:09 +0000 144) */
* > effa6f73 ( mcslee 2007-10-18 06:43:09 +0000 145)function require_write_conn($id, $exit_fn='go_home', $args=null) {
* > effa6f73 ( mcslee 2007-10-18 06:43:09 +0000 146) $conn_w = id_get_conn($id, 'w');
* > effa6f73 ( mcslee 2007-10-18 06:43:09 +0000 147) if (!$conn_w) {
*
* It is clear that the first series of blame is better, as
* it contains multiple commits, and because mcslee was probably just
* moving code around and not actually modifying the code.
*
* Note that by default git blame does already some analysis such as
* detecting renaming of files. But it does not do more than that. For
* intra files moves, you want git annotate -C.
*
* With -C it takes 130min to run the deadcode analysis on www.
* Fortunately once it's cached, it takes only 2 minutes.
*
*)letget_2_best_blamers_of_lines~basedir?use_cache?(is_valid_author=(fun_->true))?(skip_revs=[])filenamelines_to_remove=(* git blame is really slow, so useful to cache its result *)letannots=annotate~basedir?use_cachefilenameinlettoblame=lines_to_remove+>Common.map_filter(funi->let(version,Lib_vcs.Authorauthor,_date)=annots.(i)in(* todo: commitid string sometimes are specified by their full
* length, somtimes only by its first 8 characters. Maybe should
* have a commitid_equal and use that. Right now
* I assume the skip_revs contain just like the result from
* git annotate 8-chars commit ids
*)ifis_valid_authorauthor&¬(List.memversionskip_revs)thenSomeauthorelseNone)inlethblame=Common.hashset_of_listtoblameinletother_authors=annots+>Array.to_list+>Common.map_filter(funx->let(version,Lib_vcs.Authorauthor,_date)=xinifis_valid_authorauthor&¬(Common2.hmemauthorhblame)&¬(List.memversionskip_revs)thenSomeauthorelseNone)inletcounts=Common2.count_elements_sorted_highfirsttoblame+>List.mapfstinletcounts'=Common2.count_elements_sorted_highfirstother_authors+>List.mapfstinCommon2.take_safe2(counts@counts')letmax_date_of_lines~basedir?use_cache?(skip_revs=[])filenamelines_to_remove=letannots=annotate~basedir?use_cachefilenamein(* todo? use only the lines_to_remove or the whole file to
* decide of the "date" of the patch ? *)lettoblame=lines_to_remove+>Common.map_filter(funi->let(version,Lib_vcs.Author_author,date)=annots.(i)inifnot(List.memversionskip_revs)thenSomedateelseNone)inCommon2.maximum_dmytoblame(*****************************************************************************)(* Archeology *)(*****************************************************************************)(* src:
* http://www.bramschoenmakers.nl/en/node/645
*
* Sometimes it's handy to checkout a branch based on a point in time but:
*
* $ git checkout master@{2009-07-27 13:37}
*
* will not work, because it uses the reflog (which expires after some time).
*
* The trick (as found on Nabble) is to lookup the revision on a certain
* date and check out that revision. This can be done in a single command:
*
* $ git checkout `git rev-list -n 1 --before="2009-07-27 13:37" master`
*)