View source with raw comments or as raw
    1/*  Part of SWISH
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2014-2022, VU University Amsterdam
    7                              SWI-Prolog Solutions b.v.
    8    All rights reserved.
    9
   10    Redistribution and use in source and binary forms, with or without
   11    modification, are permitted provided that the following conditions
   12    are met:
   13
   14    1. Redistributions of source code must retain the above copyright
   15       notice, this list of conditions and the following disclaimer.
   16
   17    2. Redistributions in binary form must reproduce the above copyright
   18       notice, this list of conditions and the following disclaimer in
   19       the documentation and/or other materials provided with the
   20       distribution.
   21
   22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   33    POSSIBILITY OF SUCH DAMAGE.
   34*/
   35
   36:- module(gitty,
   37          [ gitty_open/2,               % +Store, +Options
   38            gitty_close/1,              % +Store
   39            gitty_driver/2,             % +Store, -Driver
   40
   41            gitty_file/3,               % +Store, ?Name, ?Hash
   42            gitty_file/4,               % +Store, ?Name, ?Ext, ?Hash
   43            gitty_create/5,             % +Store, +Name, +Data, +Meta, -Commit
   44            gitty_update/5,             % +Store, +Name, +Data, +Meta, -Commit
   45            gitty_commit/3,             % +Store, +Name, -Meta
   46            gitty_plain_commit/3,       % +Store, +Name, -Meta
   47            gitty_data/4,               % +Store, +Name, -Data, -Meta
   48            gitty_history/4,            % +Store, +Name, -History, +Options
   49            gitty_hash/2,               % +Store, ?Hash
   50
   51            gitty_fsck/1,               % +Store
   52            gitty_save/4,               % +Store, +Data, +Type, -Hash
   53            gitty_load/4,               % +Store, +Hash, -Data, -Type
   54
   55            gitty_reserved_meta/1,      % ?Key
   56            is_gitty_hash/1,            % @Term
   57
   58            gitty_diff/4,               % +Store, ?Start, +End, -Diff
   59
   60            data_diff/3,                % +String1, +String2, -Diff
   61            udiff_string/2              % +Diff, -String
   62          ]).   63:- use_module(library(sha)).   64:- use_module(library(lists)).   65:- use_module(library(apply)).   66:- use_module(library(option)).   67:- use_module(library(process)).   68:- use_module(library(debug)).   69:- use_module(library(error)).   70:- use_module(library(filesex)).   71
   72:- if(exists_source(library(bdb))).   73:- use_module(gitty_driver_bdb, []).   74:- endif.   75:- use_module(gitty_driver_files, []).

Single-file GIT like version system

This library provides a first implementation of a lightweight versioned file store with dynamic meta-data. The store is partly modelled after GIT. Like GIT, it uses a content-based store. In fact, the stored objects are compatible with GIT. Unlike GIT though, there are no trees. Each entry (file) has its own history. Each commit is associated with a dict that can carry aribitrary meta-data. The following fields are reserved for gitties bookkeeping:

name:Name
Name of the entry (file)
time:TimeStamp
Float representing when the object was added to the store
data:Hash
Object hash of the contents
previous:Hash
Hash of the previous commit.

The key commit is reserved and returned as part of the meta-data of the newly created (gitty_create/5) or updated object (gitty_update/5). */

  101:- dynamic
  102    gitty_store_type/2.             % +Store, -Module
 gitty_open(+Store, +Options) is det
Open a gitty store according to Options. Defined options are:
driver(+Driver)
Backend driver to use. One of files or bdb. When omitted and the store exists, the current store is examined. If the store does not exist, the default is files.

Other options are passed to the driver method gitty_open(Store, Options).

  118gitty_open(Store, Options) :-
  119    (   exists_directory(Store)
  120    ->  true
  121    ;   existence_error(directory, Store)
  122    ),
  123    (   option(driver(Driver), Options)
  124    ->  true
  125    ;   default_driver(Store, Driver)
  126    ),
  127    set_driver(Store, Driver),
  128    gitty_driver_open(Store, Options).
  129
  130default_driver(Store, Driver) :-
  131    directory_file_path(Store, ref, RefDir),
  132    exists_directory(RefDir),
  133    !,
  134    Driver = files.
  135default_driver(Store, Driver) :-
  136    directory_file_path(Store, heads, RefDir),
  137    exists_file(RefDir),
  138    !,
  139    Driver = bdb.
  140default_driver(_, files).
  141
  142set_driver(Store, Driver) :-
  143    must_be(atom, Store),
  144    (   driver_module(Driver, Module)
  145    ->  retractall(gitty_store_type(Store, _)),
  146        asserta(gitty_store_type(Store, Module))
  147    ;   domain_error(gitty_driver, Driver)
  148    ).
  149
  150driver_module(files, gitty_driver_files).
  151driver_module(bdb,   gitty_driver_bdb).
  152
  153store_driver_module(Store, Module) :-
  154    atom(Store),
  155    !,
  156    gitty_store_type(Store, Module).
 gitty_driver(+Store, -Driver)
Get the current gitty driver
  162gitty_driver(Store, Driver) :-
  163    store_driver_module(Store, Module),
  164    driver_module(Driver, Module),
  165    !.
 gitty_driver_open(+Store, +Options) is det
Initialise the driver
  171gitty_driver_open(Store, Options) :-
  172    store_driver_module(Store, M),
  173    M:gitty_open(Store, Options).
 gitty_close(+Store) is det
Close access to the Store.
  179gitty_close(Store) :-
  180    store_driver_module(Store, M),
  181    M:gitty_close(Store).
 gitty_file(+Store, ?Head, ?Hash) is nondet
 gitty_file(+Store, ?Head, ?Ext, ?Hash) is nondet
True when Hash is an entry in the gitty Store and Head is the HEAD revision.
  189gitty_file(Store, Head, Hash) :-
  190    gitty_file(Store, Head, _Ext, Hash).
  191gitty_file(Store, Head, Ext, Hash) :-
  192    store_driver_module(Store, M),
  193    M:gitty_file(Store, Head, Ext, Hash).
 gitty_create(+Store, +Name, +Data, +Meta, -Commit) is det
Create a new object Name from Data and meta information.
Arguments:
Commit- is a dit describing the new Commit
  201gitty_create(Store, Name, _Data, _Meta, _) :-
  202    gitty_file(Store, Name, _Hash),
  203    !,
  204    throw(error(gitty(file_exists(Name)),_)).
  205gitty_create(Store, Name, Data, Meta, CommitRet) :-
  206    save_object(Store, Data, blob, Hash),
  207    get_time(Now),
  208    Commit = gitty{time:Now}.put(Meta)
  209                            .put(_{ name:Name,
  210                                    data:Hash
  211                                  }),
  212    format(string(CommitString), '~q.~n', [Commit]),
  213    save_object(Store, CommitString, commit, CommitHash),
  214    CommitRet = Commit.put(commit, CommitHash),
  215    catch(gitty_update_head(Store, Name, -, CommitHash, Hash),
  216          E,
  217          ( delete_object(Store, CommitHash),
  218            throw(E))).
 gitty_update(+Store, +Name, +Data, +Meta, -Commit) is det
Update document Name using Data and the given meta information
  224gitty_update(Store, Name, Data, Meta, CommitRet) :-
  225    gitty_file(Store, Name, OldHead),
  226    (   _{previous:OldHead} >:< Meta
  227    ->  true
  228    ;   throw(error(gitty(commit_version(Name, OldHead, Meta.previous)), _))
  229    ),
  230    gitty_plain_commit(Store, OldHead, OldMeta0),
  231    filter_identity(OldMeta0, OldMeta),
  232    get_time(Now),
  233    save_object(Store, Data, blob, Hash),
  234    Commit = gitty{}.put(OldMeta)
  235                    .put(_{time:Now})
  236                    .put(Meta)
  237                    .put(_{ name:Name,
  238                            data:Hash,
  239                            previous:OldHead
  240                          }),
  241    format(string(CommitString), '~q.~n', [Commit]),
  242    save_object(Store, CommitString, commit, CommitHash),
  243    CommitRet = Commit.put(commit, CommitHash),
  244    catch(gitty_update_head(Store, Name, OldHead, CommitHash, Hash),
  245          E,
  246          ( delete_object(Store, CommitHash),
  247            throw(E))).
 filter_identity(+Meta0, -Meta)
Remove identification information from the previous commit.
To be done
- : the identity properties should not be hardcoded here.
  256filter_identity(Meta0, Meta) :-
  257    delete_keys([ author,user,avatar,identity,peer,
  258                  external_identity, identity_provider, profile_id,
  259                  commit_message
  260                ], Meta0, Meta).
  261
  262delete_keys([], Dict, Dict).
  263delete_keys([H|T], Dict0, Dict) :-
  264    del_dict(H, Dict0, _, Dict1),
  265    !,
  266    delete_keys(T, Dict1, Dict).
  267delete_keys([_|T], Dict0, Dict) :-
  268    delete_keys(T, Dict0, Dict).
 gitty_update_head(+Store, +Name, +OldCommit, +NewCommit, +DataHash) is det
Update the head of a gitty store for Name. OldCommit is the current head and NewCommit is the new head. If Name is created, and thus there is no head, OldCommit must be -.

This operation can fail because another writer has updated the head. This can both be in-process or another process.

Errors
- gitty(file_exists(Name) if the file already exists
- gitty(not_at_head(Name, OldCommit) if the head was moved by someone else.
  285gitty_update_head(Store, Name, OldCommit, NewCommit, DataHash) :-
  286    store_driver_module(Store, Module),
  287    Module:gitty_update_head(Store, Name, OldCommit, NewCommit, DataHash).
 gitty_data(+Store, +NameOrHash, -Data, -Meta) is semidet
Get the data in object Name and its meta-data
  293gitty_data(Store, Name, Data, Meta) :-
  294    gitty_commit(Store, Name, Meta),
  295    load_object(Store, Meta.data, Data).
 gitty_commit(+Store, +NameOrHash, -Meta) is semidet
True if Meta holds the commit data of NameOrHash. A key commit is added to the meta-data to specify the commit hash.
  302gitty_commit(Store, Hash, Meta) :-
  303    is_gitty_hash(Hash),
  304    !,
  305    load_commit(Store, Hash, Meta).
  306gitty_commit(Store, Name, Meta) :-
  307    must_be(atom, Name),
  308    gitty_file(Store, Name, Head),
  309    load_commit(Store, Head, Meta).
  310
  311load_commit(Store, Hash, Meta) :-
  312    gitty_plain_commit(Store, Hash, Meta0),
  313    (   gitty_file(Store, Meta0.name, Hash)
  314    ->  Meta = Meta0.put(symbolic, "HEAD")
  315    ;   Meta = Meta0
  316    ).
 gitty_plain_commit(+Store, +Hash, -Meta) is semidet
Load the commit object with Hash.
  322gitty_plain_commit(Store, Hash, Meta) :-
  323    store_driver_module(Store, Module),
  324    Module:load_plain_commit(Store, Hash, Meta0),
  325    Meta = Meta0.put(commit, Hash).
 gitty_history(+Store, +NameOrHash, -History, +Options) is det
History is a dict holding a key history with a list of dicts representating the history of Name in Store. The toplevel dict also contains skipped, indicating the number of skipped items from the HEAD. Options:
depth(+Depth)
Number of entries in the history. If not present, defaults to 5.
includes(+HASH)
Ensure Hash is included in the history. This means that the history includes the entry with HASH an (depth+1)//2 entries after the requested HASH.
  342gitty_history(Store, Name, json{history:History,skipped:Skipped}, Options) :-
  343    history_hash_start(Store, Name, Hash0),
  344    option(depth(Depth), Options, 5),
  345    (   option(includes(Hash), Options)
  346    ->  read_history_to_hash(Store, Hash0, Hash, History00),
  347        length(History00, Before),
  348        After is max(Depth-Before, (Depth+1)//2),
  349        read_history_depth(Store, Hash, After, History1),
  350        length(History1, AfterLen),
  351        BeforeLen is Depth - AfterLen,
  352        list_prefix(BeforeLen, History00, History0),
  353        length(History00, Len00),
  354        length(History0, Len0),
  355        Skipped is Len00-Len0,
  356        append(History0, History1, History)
  357    ;   read_history_depth(Store, Hash0, Depth, History),
  358        Skipped is 0
  359    ).
  360
  361history_hash_start(Store, Name, Hash) :-
  362    gitty_file(Store, Name, Head),
  363    !,
  364    Hash = Head.
  365history_hash_start(_, Hash, Hash).
  366
  367
  368read_history_depth(_, _, 0, []) :- !.
  369read_history_depth(Store, Hash, Left, [H|T]) :-
  370    load_commit(Store, Hash, H),
  371    !,
  372    Left1 is Left-1,
  373    (   read_history_depth(Store, H.get(previous), Left1, T)
  374    ->  true
  375    ;   T = []
  376    ).
  377read_history_depth(_, _, _, []).
 read_history_to_hash(+Store, +Start, +Upto, -History)
Read the history upto, but NOT including Upto.
  383read_history_to_hash(Store, Hash, Upto, [H|T]) :-
  384    Upto \== Hash,
  385    load_commit(Store, Hash, H),
  386    (   read_history_to_hash(Store, H.get(previous), Upto, T)
  387    ->  true
  388    ;   T = []
  389    ).
  390read_history_to_hash(_, _, _, []).
  391
  392list_prefix(0, _, []) :- !.
  393list_prefix(_, [], []) :- !.
  394list_prefix(N, [H|T0], [H|T]) :-
  395    N2 is N - 1,
  396    list_prefix(N2, T0, T).
 save_object(+Store, +Data:string, +Type, -Hash) is det
Save an object in a git compatible way. Data provides the data as a string.
See also
- http://www.gitguys.com/topics/what-is-the-format-of-a-git-blob/
bug
- We currently delete objects if the head cannot be moved. This can lead to a race condition. We need to leave that to GC.
  409save_object(Store, Data, Type, Hash) :-
  410    size_in_bytes(Data, Size),
  411    format(string(Hdr), '~w ~d\u0000', [Type, Size]),
  412    sha_new_ctx(Ctx0, []),
  413    sha_hash_ctx(Ctx0, Hdr, Ctx1, _),
  414    sha_hash_ctx(Ctx1, Data, _, HashBin),
  415    hash_atom(HashBin, Hash),
  416    store_object(Store, Hash, Hdr, Data).
  417
  418store_object(Store, Hash, Hdr, Data) :-
  419    store_driver_module(Store, Module),
  420    Module:store_object(Store, Hash, Hdr, Data).
  421
  422size_in_bytes(Data, Size) :-
  423    setup_call_cleanup(
  424        open_null_stream(Out),
  425        ( format(Out, '~s', [Data]),
  426          byte_count(Out, Size)
  427        ),
  428        close(Out)).
 gitty_fsck(+Store) is det
Check the integrity of store.
  435gitty_fsck(Store) :-
  436    forall(gitty_hash(Store, Hash),
  437           fsck_object_msg(Store, Hash)),
  438    store_driver_module(Store, M),
  439    M:gitty_fsck(Store).
  440
  441fsck_object_msg(Store, Hash) :-
  442    fsck_object(Store, Hash),
  443    !.
  444fsck_object_msg(Store, Hash) :-
  445    print_message(error, gitty(Store, fsck(bad_object(Hash)))).
 fsck_object(+Store, +Hash) is semidet
Test the integrity of object Hash in Store.
  451:- public
  452    fsck_object/2,
  453    check_object/4.  454
  455fsck_object(Store, Hash) :-
  456    load_object(Store, Hash, Data, Type, Size),
  457    check_object(Hash, Data, Type, Size).
  458
  459check_object(Hash, Data, Type, Size) :-
  460    format(string(Hdr), '~w ~d\u0000', [Type, Size]),
  461    sha_new_ctx(Ctx0, []),
  462    sha_hash_ctx(Ctx0, Hdr, Ctx1, _),
  463    sha_hash_ctx(Ctx1, Data, _, HashBin),
  464    hash_atom(HashBin, Hash).
 load_object(+Store, +Hash, -Data) is det
 load_object(+Store, +Hash, -Data, -Type, -Size) is det
Load the given object.
  474load_object(Store, Hash, Data) :-
  475    load_object(Store, Hash, Data, _, _).
  476load_object(Store, Hash, Data, Type, Size) :-
  477    store_driver_module(Store, Module),
  478    Module:load_object(Store, Hash, Data, Type, Size).
 gitty_save(+Store, +Data, +Type, -Hash) is det
 gitty_load(+Store, +Hash, -Data, -Type) is det
Low level objects store. These predicate allows for using the store as an arbitrary content store.
Arguments:
Data- is a string
Type- is an atom denoting the object type.
  489gitty_save(Store, Data, Type, Hash) :-
  490    save_object(Store, Data, Type, Hash).
  491gitty_load(Store, Hash, Data, Type) :-
  492    load_object(Store, Hash, Data, Type, _Size).
 gitty_hash(+Store, ?Hash) is nondet
True when Hash is an object in the store.
  498gitty_hash(Store, Hash) :-
  499    store_driver_module(Store, Module),
  500    Module:gitty_hash(Store, Hash).
 delete_object(+Store, +Hash)
Delete an existing object
  506delete_object(Store, Hash) :-
  507    store_driver_module(Store, Module),
  508    Module:delete_object(Store, Hash).
 gitty_reserved_meta(?Key) is nondet
True when Key is a gitty reserved key for the commit meta-data
  514gitty_reserved_meta(name).
  515gitty_reserved_meta(time).
  516gitty_reserved_meta(data).
  517gitty_reserved_meta(previous).
 is_gitty_hash(@Term) is semidet
True if Term is a possible gitty (SHA1) hash
  524is_gitty_hash(SHA1) :-
  525    atom(SHA1),
  526    atom_length(SHA1, 40),
  527    atom_codes(SHA1, Codes),
  528    maplist(hex_digit, Codes).
  529
  530hex_digit(C) :- between(0'0, 0'9, C), !.
  531hex_digit(C) :- between(0'a, 0'f, C).
  532
  533
  534                 /*******************************
  535                 *          FSCK SUPPORT        *
  536                 *******************************/
  537
  538:- public
  539    delete_object/2,
  540    delete_head/2,
  541    set_head/3.
 delete_head(+Store, +Head) is det
Delete Head from the administration. Used if the head is inconsistent.
  548delete_head(Store, Head) :-
  549    store_driver_module(Store, Module),
  550    Module:delete_head(Store, Head).
 set_head(+Store, +File, +Head) is det
Register Head as the Head hash for File, removing possible old head.
  557set_head(Store, File, Head) :-
  558    store_driver_module(Store, Module),
  559    Module:set_head(Store, File, Head).
  560
  561
  562                 /*******************************
  563                 *             DIFF             *
  564                 *******************************/
 gitty_diff(+Store, ?Hash1, +FileOrHash2OrData, -Dict) is det
True if Dict representeds the changes in Hash1 to FileOrHash2. If Hash1 is unbound, it is unified with the previous of FileOrHash2. Returns _{initial:true} if Hash1 is unbound and FileOrHash2 is the initial commit. Dict contains:
from:Meta1
to:Meta2
Meta-data for the two diffed versions
data:UDiff
String holding unified diff representation of changes to the data. Only present of data has changed
tags:_315402{added:AddedTags, deleted:DeletedTags}
If tags have changed, the added and deleted ones.
Arguments:
FileOrHash2OrData- is a file name, hash or a term data(String) to compare a given string with a gitty version.
  586gitty_diff(Store, C1, data(Data2), Dict) :-
  587    !,
  588    must_be(atom, C1),
  589    gitty_data(Store, C1, Data1, _Meta1),
  590    (   Data1 \== Data2
  591    ->  udiff_string(Data1, Data2, UDIFF),
  592        Dict = json{data:UDIFF}
  593    ;   Dict = json{}
  594    ).
  595gitty_diff(Store, C1, C2, Dict) :-
  596    gitty_data(Store, C2, Data2, Meta2),
  597    (   var(C1)
  598    ->  C1 = Meta2.get(previous)
  599    ;   true
  600    ),
  601    !,
  602    gitty_data(Store, C1, Data1, Meta1),
  603    Pairs = [ from-Meta1, to-Meta2|_],
  604    (   Data1 \== Data2
  605    ->  udiff_string(Data1, Data2, UDIFF),
  606        memberchk(data-UDIFF, Pairs)
  607    ;   true
  608    ),
  609    meta_tag_set(Meta1, Tags1),
  610    meta_tag_set(Meta2, Tags2),
  611    (   Tags1 \== Tags2
  612    ->  ord_subtract(Tags1, Tags2, Deleted),
  613        ord_subtract(Tags2, Tags1, Added),
  614        memberchk(tags-_{added:Added, deleted:Deleted}, Pairs)
  615    ;   true
  616    ),
  617    once(length(Pairs,_)),                  % close list
  618    dict_pairs(Dict, json, Pairs).
  619gitty_diff(_Store, '0000000000000000000000000000000000000000', _C2,
  620           json{initial:true}).
  621
  622
  623meta_tag_set(Meta, Tags) :-
  624    sort(Meta.get(tags), Tags),
  625    !.
  626meta_tag_set(_, []).
 udiff_string(+Data1, +Data2, -UDIFF) is det
Produce a unified difference between two strings. Note that we can avoid one temporary file using diff's - arg and the second by passing =/dev/fd/NNN= on Linux systems. See http://stackoverflow.com/questions/3800202
  635:- if(true).  636
  637% Note that cleanup on possible errors is   rather hard. The created tmp
  638% stream must be closed and the file must  be deleted. We also close the
  639% file before running diff (necessary  on   Windows  to  avoid a sharing
  640% violation). Therefore reclaim_tmp_file/2 first uses   close/2 to close
  641% if not already done and then deletes the file.
  642
  643udiff_string(Data1, Data2, UDIFF) :-
  644    setup_call_cleanup(
  645        tmp_file_stream(utf8, File1, Tmp1),
  646        ( save_string(Data1, Tmp1),
  647          setup_call_cleanup(
  648              tmp_file_stream(utf8, File2, Tmp2),
  649              ( save_string(Data2, Tmp2),
  650                process_diff(File1, File2, UDIFF)
  651              ),
  652              reclaim_tmp_file(File2, Tmp2))
  653        ),
  654        reclaim_tmp_file(File1, Tmp1)).
  655
  656save_string(String, Stream) :-
  657    call_cleanup(
  658        format(Stream, '~s', [String]),
  659        close(Stream)).
  660
  661reclaim_tmp_file(File, Stream) :-
  662    close(Stream, [force(true)]),
  663    delete_file(File).
  664
  665process_diff(File1, File2, String) :-
  666    setup_call_cleanup(
  667        process_create(path(diff),
  668                       ['-u', file(File1), file(File2)],
  669                       [ stdout(pipe(Out)),
  670                         process(PID)
  671                       ]),
  672        read_string(Out, _, String),
  673        ( close(Out),
  674          process_wait(PID, Status)
  675        )),
  676    assertion(normal_diff_exit(Status)).
  677
  678normal_diff_exit(exit(0)).              % equal
  679normal_diff_exit(exit(1)).              % different
  680
  681:- else.  682
  683udiff_string(Data1, Data2, UDIFF) :-
  684    data_diff(Data1, Data2, Diffs),
  685    maplist(udiff_string, Diffs, Strings),
  686    atomics_to_string(Strings, UDIFF).
  687
  688:- endif.  689
  690
  691                 /*******************************
  692                 *         PROLOG DIFF          *
  693                 *******************************/
  694
  695/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  696Attempt at a built-in diff utility. Doing   it in Prolog may seem weird,
  697but is good for tasting  ones  own   dog  food.  In  addition, it avoids
  698temporary files and relatively expensive fork()  calls. As it turns out,
  699implementing an efficient LCS (Longest  Common   Sequence)  in Prolog is
  700rather hard. We'll leave the  code  for   reference,  but  might  seek a
  701different solution for the real thing.  Options are:
  702
  703  - Use external diff after all
  704  - Add a proper Prolog implementation of LCS
  705  - Add LCS in C.
  706- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 data_diff(+Data1, +Data2, -UDiff) is det
Diff two data strings line-by-line. UDiff is a list of terms of the form below, where L1 and L2 provide the starting line in Data1 and Data2 and S1 and S2 provide the number of affected lines.
udiff(L1,S1,L2,S2,Diff)

Diff is a list holding

+ Line
Line was added to Data1 to get Data2
- Line
Line was deleted from Data1 to get Data2
Line1 - Line2
Line was replaced
=(Line)
Line is identical (context line).
  731data_diff(Data, Data, UDiff) :-
  732    !,
  733    UDiff = [].
  734data_diff(Data1, Data2, Diff) :-
  735    split_string(Data1, "\n", "", List1),
  736    split_string(Data2, "\n", "", List2),
  737    list_diff(List1, List2, Diff).
  738
  739list_diff(List1, List2, UDiff) :-
  740    list_lcs(List1, List2, Lcs),
  741    make_diff(List1, List2, Lcs, c(), 1, 1, Diff),
  742    join_diff(Diff, UDiff).
 make_diff(+List1, +List2, +Lcs, +Context0, +Line1, +Line2, -Diff)
  746make_diff([], [], [], _, _, _, []) :- !.
  747make_diff([H|T1], [H|T2], [H|C], c(_,C0,C1), L1, L2, Diff) :-
  748    !,
  749    L11 is L1+1,
  750    L21 is L2+1,
  751    make_diff(T1, T2, C, c(C0,C1,H), L11, L21, Diff).
  752make_diff([H|T1], [H|T2], [H|C], C0, L1, L2, Diff) :-
  753    !,
  754    L11 is L1+1,
  755    L21 is L2+1,
  756    add_context(C0, H, C1),
  757    (   compound_name_arity(C1, _, L1)
  758    ->  Diff = Diff1
  759    ;   Diff = [=(H)|Diff1]
  760    ),
  761    make_diff(T1, T2, C, C1, L11, L21, Diff1).
  762make_diff([H|T1], [H2|T2], [H|C], C0, L1, L2, [d(L1,L2,C0,+H2)|Diff]) :-
  763    !,
  764    L21 is L2+1,
  765    make_diff([H|T1], T2, [H|C], c(), L1, L21, Diff).
  766make_diff([], [H2|T2], [], C0, L1, L2, [d(L1,L2,C0,+H2)|Diff]) :-
  767    !,
  768    L21 is L2+1,
  769    make_diff([], T2, [], c(), L1, L21, Diff).
  770make_diff([H1|T1], [H|T2], [H|C], C0, L1, L2, [d(L1,L2,C0,-H1)|Diff]) :-
  771    !,
  772    L11 is L1+1,
  773    make_diff(T1, [H|T2], [H|C], c(), L11, L2, Diff).
  774make_diff([H1|T1], [], [], C0, L1, L2, [d(L1,L2,C0,-H1)|Diff]) :-
  775    !,
  776    L11 is L1+1,
  777    make_diff(T1, [], [], c(), L11, L2, Diff).
  778make_diff([H1|T1], [H2|T2], C, C0, L1, L2, [d(L1,L2,C0,H1-H2)|Diff]) :-
  779    !,
  780    L11 is L1+1,
  781    L21 is L2+1,
  782    make_diff(T1, T2, C, c(), L11, L21, Diff).
  783
  784add_context(c(_,B,C),N,c(B,C,N)).
  785add_context(c(A,B),  N,c(A,B,N)).
  786add_context(c(A),    N,c(A,N)).
  787add_context(c(),     N,c(N)).
 join_diff(+Diff, -UDiff) is det
  791join_diff([], []).
  792join_diff([d(L10,L20,C,L)|T0], [udiff(L1,S1,L2,S2,Diff)|T]) :-
  793    pre_context(C, S0, Diff, [L|DiffT]),
  794    L1 is L10-S0,
  795    L2 is L20-S0,
  796    diff_affected(L,S10,S20),
  797    S11 is S10+S0,
  798    S21 is S20+S0,
  799    collect_diff(T0,S11,S21,S1,S2,0,DiffT,T1),
  800    join_diff(T1, T).
  801
  802pre_context(c(),      0, L, L).
  803pre_context(c(A),     1, [=(A)|L], L).
  804pre_context(c(A,B),   2, [=(A),=(B)|L], L).
  805pre_context(c(A,B,C), 3, [=(A),=(B),=(C)|L], L).
  806
  807collect_diff([d(_,_,_,L)|T0], S10,S20,S1,S2,C,[L|Diff],T) :-
  808    C < 3,
  809    !,
  810    diff_affected(L,S1x,S2x),
  811    S11 is S10+S1x,
  812    S21 is S20+S2x,
  813    collect_diff(T0,S11,S21,S1,S2,0,Diff,T).
  814collect_diff([=(L)|T0], S10,S20,S1,S2,C0,[=(L)|Diff],T) :-
  815    !,
  816    S11 is S10+1,
  817    S21 is S20+1,
  818    C1 is C0+1,
  819    collect_diff(T0,S11,S21,S1,S2,C1,Diff,T).
  820collect_diff(T,S1,S2,S1,S2,_,[],T).
  821
  822diff_affected(+(_),   0, 1).
  823diff_affected(-(_),   0, 1).
  824diff_affected(-(_,_), 1, 1).
 udiff_string(+UDiff, -String) is det
True when String is the string representation of UDiff.
  830udiff_string(udiff(L1,S1,L2,S2,Diff), Final) :-
  831    format(string(Hdr), '@@ -~d,~d +~d,~d @@', [L1,S1,L2,S2]),
  832    udiff_blocks(Diff, Blocks),
  833    maplist(block_lines, Blocks, LineSets),
  834    append(LineSets, Lines),
  835    atomics_to_string([Hdr|Lines], "\n", Final).
  836
  837block_lines(=(U), Lines) :- maplist(string_concat(' '), U, Lines).
  838block_lines(+(U), Lines) :- maplist(string_concat('+'), U, Lines).
  839block_lines(-(U), Lines) :- maplist(string_concat('-'), U, Lines).
  840
  841udiff_blocks([], []) :- !.
  842udiff_blocks([=(H)|T0], [=([H|E])|T]) :-
  843    !,
  844    udiff_cp(T0, E, T1),
  845    udiff_blocks(T1, T).
  846udiff_blocks(U, List) :-
  847    udiff_block(U, D, A, T1),
  848    udiff_add(D,A,List,ListT),
  849    udiff_blocks(T1, ListT).
  850
  851udiff_add([],A,[+A|T],T) :- !.
  852udiff_add(D,[],[-D|T],T) :- !.
  853udiff_add(D,A,[-D,+A|T],T).
  854
  855udiff_cp([=(H)|T0], [H|E], T) :-
  856    !,
  857    udiff_cp(T0, E, T).
  858udiff_cp(L, [], L).
  859
  860udiff_block([-L|T], [L|D], A, Rest) :-
  861    !,
  862    udiff_block(T, D, A, Rest).
  863udiff_block([+L|T], D, [L|A], Rest) :-
  864    !,
  865    udiff_block(T, D, A, Rest).
  866udiff_block([L1-L2|T], [L1|D], [L2|A], Rest) :-
  867    !,
  868    udiff_block(T, D, A, Rest).
  869udiff_block(T, [], [], T).
 list_lcs(+List1, +List2, -Lcs) is det
To be done
- Too slow. See http://wordaligned.org/articles/longest-common-subsequence
  875:- thread_local lcs_db/2.  876
  877list_lcs([], [], []) :- !.
  878list_lcs([H|L1], [H|L2], [H|Lcs]) :-
  879    !,
  880    list_lcs(L1, L2, Lcs).
  881list_lcs(List1, List2, Lcs) :-
  882    reverse(List1, Rev1),
  883    reverse(List2, Rev2),
  884    copy_prefix(Rev1, Rev2, RevDiff1, RevDiff2, RevLcs, RevT),
  885    list_lcs2(RevDiff1, RevDiff2, RevT),
  886    reverse(RevLcs, Lcs).
  887
  888list_lcs2(List1, List2, Lcs) :-
  889    variant_sha1(List1+List2, Hash),
  890    call_cleanup(
  891        lcs(List1, List2, Hash, Lcs),
  892        retractall(lcs_db(_,_))).
  893
  894copy_prefix([H|T1], [H|T2], L1, L2, [H|L], LT) :-
  895    !,
  896    copy_prefix(T1, T2, L1, L2, L, LT).
  897copy_prefix(R1, R2, R1, R2, L, L).
  898
  899
  900lcs(_,_,Hash,Lcs) :-
  901    lcs_db(Hash,Lcs),
  902    !.
  903lcs([H|L1], [H|L2], _, [H|Lcs]) :-
  904    !,
  905    variant_sha1(L1+L2,Hash),
  906    lcs(L1, L2, Hash, Lcs).
  907lcs(List1, List2, Hash, Lcs) :-
  908    List1 = [H1|L1],
  909    List2 = [H2|L2],
  910    variant_sha1(L1+[H2|L2],Hash1),
  911    variant_sha1([H1|L1]+L2,Hash2),
  912    lcs(    L1 , [H2|L2], Hash1, Lcs1),
  913    lcs([H1|L1],     L2 , Hash2, Lcs2),
  914    longest(Lcs1, Lcs2, Lcs),
  915    !,
  916    asserta(lcs_db(Hash, Lcs)).
  917lcs(_,_,_,[]).
  918
  919longest(L1, L2, Longest) :-
  920    length(L1, Length1),
  921    length(L2, Length2),
  922    (   Length1 > Length2
  923    ->  Longest = L1
  924    ;   Longest = L2
  925    ).
  926
  927                 /*******************************
  928                 *            MESSAGES          *
  929                 *******************************/
  930:- multifile
  931    prolog:error_message//1.  932
  933prolog:error_message(gitty(not_at_head(Name, _OldCommit))) -->
  934    [ 'Gitty: cannot update head for "~w" because it was \c
  935           updated by someone else'-[Name] ].
  936prolog:error_message(gitty(file_exists(Name))) -->
  937    [ 'Gitty: File exists: ~p'-[Name] ].
  938prolog:error_message(gitty(commit_version(Name, _Head, _Previous))) -->
  939    [ 'Gitty: ~p: cannot update (modified by someone else)'-[Name] ]