1/* Part of SWI-Prolog 2 3 Author: Jan Wielemaker 4 E-mail: J.Wielemaker@vu.nl 5 WWW: http://www.swi-prolog.org 6 Copyright (c) 2009-2022, VU University Amsterdam 7 CWI, Amsterdam, 8 SWI-Prolog Solutions b.v. 9 All rights reserved. 10 11 Redistribution and use in source and binary forms, with or without 12 modification, are permitted provided that the following conditions 13 are met: 14 15 1. Redistributions of source code must retain the above copyright 16 notice, this list of conditions and the following disclaimer. 17 18 2. Redistributions in binary form must reproduce the above copyright 19 notice, this list of conditions and the following disclaimer in 20 the documentation and/or other materials provided with the 21 distribution. 22 23 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 26 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 27 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 28 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 31 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 33 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 POSSIBILITY OF SUCH DAMAGE. 35*/ 36 37:- module(csv, 38 [ csv//1, % +Rows 39 csv//2, % +Rows, +Options 40 41 csv_read_file/2, % +File, -Data 42 csv_read_file/3, % +File, -Data, +Options 43 csv_read_stream/3, % +Stream, -Data, +Options 44 45 csv_read_file_row/3, % +File, -Row, +Options 46 csv_read_row/3, % +Stream, -Row, +CompiledOptions 47 csv_options/2, % -Compiled, +Options 48 49 csv_write_file/2, % +File, +Data 50 csv_write_file/3, % +File, +Data, +Options 51 csv_write_stream/3 % +Stream, +Data, +Options 52 ]). 53:- use_module(library(record),[(record)/1, op(_,_,record)]). 54 55:- autoload(library(apply),[maplist/2]). 56:- use_module(library(debug),[debug/3]). 57:- autoload(library(error),[must_be/2,domain_error/2]). 58:- autoload(library(lists),[append/3]). 59:- autoload(library(option),[option/2,select_option/4]). 60:- autoload(library(pure_input), 61 [phrase_from_file/3,phrase_from_stream/2]). 62:- autoload(library(readutil),[read_line_to_codes/2]). 63:- autoload(library(dcg/basics),[string//1,eos//0]).
79:- predicate_options(csv//2, 2, 80 [ separator(code), % must be code 81 strip(boolean), 82 ignore_quotes(boolean), 83 convert(boolean), 84 case(oneof([down,preserve,up])), 85 functor(atom), 86 arity(-nonneg), % actually ?nonneg 87 match_arity(boolean) 88 ]). 89:- predicate_options(csv_read_file/3, 3, 90 [ pass_to(csv//2, 2), 91 pass_to(phrase_from_file/3, 3) 92 ]). 93:- predicate_options(csv_read_file_row/3, 3, 94 [ line(-integer), 95 pass_to(csv//2, 2), 96 pass_to(open/4, 4) 97 ]). 98:- predicate_options(csv_write_file/3, 3, 99 [ pass_to(csv//2, 2), 100 pass_to(open/4, 4) 101 ]). 102:- predicate_options(csv_write_stream/3, 3, 103 [ pass_to(csv//2, 2) 104 ]). 105 106 107:- record 108 csv_options(separator:integer=0',, 109 strip:boolean=false, 110 ignore_quotes:boolean=false, 111 convert:boolean=true, 112 case:oneof([down,preserve,up])=preserve, 113 functor:atom=row, 114 arity:integer, 115 match_arity:boolean=true, 116 skip_header:atom).
\t
for
.tsv
files and ,
otherwise.
Suppose we want to create a predicate table/6 from a CSV file
that we know contains 6 fields per record. This can be done
using the code below. Without the option arity(6)
, this would
generate a predicate table/N, where N is the number of fields
per record in the data.
?- csv_read_file(File, Rows, [functor(table), arity(6)]), maplist(assert, Rows).
140csv_read_file(File, Rows) :- 141 csv_read_file(File, Rows, []). 142 143csv_read_file(File, Rows, Options) :- 144 default_separator(File, Options, Options1), 145 make_csv_options(Options1, Record, RestOptions), 146 phrase_from_file(csv_roptions(Rows, Record), File, RestOptions). 147 148 149default_separator(File, Options0, Options) :- 150 ( option(separator(_), Options0) 151 -> Options = Options0 152 ; file_name_extension(_, Ext0, File), 153 downcase_atom(Ext0, Ext), 154 ext_separator(Ext, Sep) 155 -> Options = [separator(Sep)|Options0] 156 ; Options = Options0 157 ). 158 159ext_separator(csv, 0',). 160ext_separator(tsv, 0'\t).
167csv_read_stream(Stream, Rows, Options) :-
168 make_csv_options(Options, Record, _),
169 phrase_from_stream(csv_roptions(Rows, Record), Stream).
separator(0';)
parses
a semicolon separated file.true
(default false), threat double quotes as a normal
character.true
(default false
), strip leading and trailing
blank space. RFC4180 says that blank space is part of the
data.#
. After
skipping comment lines this option causes csv//2 to skip empty
lines. Note that an empty line may not contain white space
characters (space or tab) as these may provide valid data.true
(default), use name/2 on the field data. This
translates the field into a number if possible.down
, downcase atomic values. If up
, upcase them
and if preserve
(default), do not change the case.row
.domain_error(row_arity(Expected), Found)
if a row is
found with different arity.false
(default true
), do not reject CSV files where
lines provide a varying number of fields (columns). This
can be a work-around to use some incorrect CSV files.221csv(Rows) --> 222 csv(Rows, []). 223 224csv(Rows, Options) --> 225 { make_csv_options(Options, Record, _) }, 226 csv_roptions(Rows, Record). 227 228csv_roptions(Rows, Record) --> 229 { ground(Rows) }, 230 !, 231 emit_csv(Rows, Record). 232csv_roptions(Rows, Record) --> 233 skip_header(Record), 234 csv_data(Rows, Record). 235 236skip_header(Options) --> 237 { csv_options_skip_header(Options, CommentStart), 238 nonvar(CommentStart), 239 atom_codes(CommentStart, Codes) 240 }, 241 !, 242 skip_header_lines(Codes), 243 skip_blank_lines. 244skip_header(_) --> 245 []. 246 247skip_header_lines(CommentStart) --> 248 string(CommentStart), 249 !, 250 ( string(_Comment), 251 end_of_record 252 -> skip_header_lines(CommentStart) 253 ). 254skip_header_lines(_) --> 255 []. 256 257skip_blank_lines --> 258 eos, 259 !. 260skip_blank_lines --> 261 end_of_record, 262 !, 263 skip_blank_lines. 264skip_blank_lines --> 265 []. 266 267csv_data([], _) --> 268 eos, 269 !. 270csv_data([Row|More], Options) --> 271 row(Row, Options), 272 !, 273 { debug(csv, 'Row: ~p', [Row]) }, 274 csv_data(More, Options). 275 276 277row(Row, Options) --> 278 fields(Fields, Options), 279 { csv_options_functor(Options, Functor), 280 Row =.. [Functor|Fields], 281 functor(Row, _, Arity), 282 check_arity(Options, Arity) 283 }. 284 285check_arity(Options, Arity) :- 286 csv_options_arity(Options, Arity), 287 !. 288check_arity(Options, _) :- 289 csv_options_match_arity(Options, false), 290 !. 291check_arity(Options, Arity) :- 292 csv_options_arity(Options, Expected), 293 domain_error(row_arity(Expected), Arity). 294 295fields([F|T], Options) --> 296 field(F, Options), 297 ( separator(Options) 298 -> fields(T, Options) 299 ; end_of_record 300 -> { T = [] } 301 ). 302 303field(Value, Options) --> 304 "\"", 305 { csv_options_ignore_quotes(Options, false) }, 306 !, 307 string_codes(Codes), 308 { make_value(Codes, Value, Options) }. 309field(Value, Options) --> 310 { csv_options_strip(Options, true) }, 311 !, 312 stripped_field(Value, Options). 313field(Value, Options) --> 314 { csv_options_separator(Options, Sep) }, 315 field_codes(Codes, Sep), 316 { make_value(Codes, Value, Options) }. 317 318 319stripped_field(Value, Options) --> 320 ws, 321 ( "\"", 322 { csv_options_strip(Options, false) } 323 -> string_codes(Codes), 324 ws 325 ; { csv_options_separator(Options, Sep) }, 326 field_codes(Codes0, Sep), 327 { strip_trailing_ws(Codes0, Codes) } 328 ), 329 { make_value(Codes, Value, Options) }. 330 331ws --> " ", !, ws. 332ws --> "\t", !, ws. 333ws --> "". 334 335strip_trailing_ws(List, Stripped) :- 336 append(Stripped, WS, List), 337 all_ws(WS). 338 339all_ws([]). 340all_ws([32|T]) :- all_ws(T). 341all_ws([9|T]) :- all_ws(T).
349string_codes(List) --> 350 [H], 351 ( { H == 0'" } 352 -> ( "\"" 353 -> { List = [H|T] }, 354 string_codes(T) 355 ; { List = [] } 356 ) 357 ; { List = [H|T] }, 358 string_codes(T) 359 ). 360 361field_codes([], Sep), [Sep] --> [Sep], !. 362field_codes([], _), "\n" --> "\r\n", !. 363field_codes([], _), "\n" --> "\n", !. 364field_codes([], _), "\n" --> "\r", !. 365field_codes([H|T], Sep) --> [H], !, field_codes(T, Sep). 366field_codes([], _) --> []. % unterminated last record
373make_value(Codes, Value, Options) :- 374 csv_options_convert(Options, Convert), 375 csv_options_case(Options, Case), 376 make_value(Convert, Case, Codes, Value). 377 378make_value(true, preserve, Codes, Value) :- 379 !, 380 name(Value, Codes). 381make_value(true, Case, Codes, Value) :- 382 !, 383 ( number_string(Value, Codes) 384 -> true 385 ; make_value(false, Case, Codes, Value) 386 ). 387make_value(false, preserve, Codes, Value) :- 388 !, 389 atom_codes(Value, Codes). 390make_value(false, down, Codes, Value) :- 391 !, 392 string_codes(String, Codes), 393 downcase_atom(String, Value). 394make_value(false, up, Codes, Value) :- 395 string_codes(String, Codes), 396 upcase_atom(String, Value). 397 398separator(Options) --> 399 { csv_options_separator(Options, Sep) }, 400 [Sep]. 401 402end_of_record --> "\n". % Unix files 403end_of_record --> "\r\n". % DOS files 404end_of_record --> "\r". % MacOS files 405end_of_record --> eos. % unterminated last record
In addition to the options of csv_read_file/3, this predicate processes the option:
424csv_read_file_row(File, Row, Options) :- 425 default_separator(File, Options, Options1), 426 make_csv_options(Options1, RecordOptions, Options2), 427 select_option(line(Line), Options2, RestOptions, _), 428 setup_call_cleanup( 429 open(File, read, Stream, RestOptions), 430 csv_read_stream_row(Stream, Row, Line, RecordOptions), 431 close(Stream)). 432 433csv_read_stream_row(Stream, Row, Line, Options) :- 434 between(1, infinite, Line), 435 ( csv_read_row(Stream, Row0, Options), 436 Row0 \== end_of_file 437 -> Row = Row0 438 ; !, 439 fail 440 ).
end_of_file
upon reaching the
end of the input.450csv_read_row(Stream, Row, _Record) :- 451 at_end_of_stream(Stream), 452 !, 453 Row = end_of_file. 454csv_read_row(Stream, Row, Record) :- 455 read_lines_to_codes(Stream, Codes, Record, even), 456 phrase(row(Row0, Record), Codes), 457 !, 458 Row = Row0. 459 460read_lines_to_codes(Stream, Codes, Options, QuoteQuantity) :- 461 read_line_to_codes(Stream, Codes0), 462 Codes0 \== end_of_file, 463 ( ( csv_options_ignore_quotes(Options, true) 464 ; check_quotes(Codes0, QuoteQuantity, even) 465 ) 466 -> Codes = Codes0 467 ; append(Codes0, [0'\n|Tail], Codes), 468 read_lines_to_codes(Stream, Tail, Options, odd) 469 ). 470 471check_quotes([], QuoteQuantity, QuoteQuantity) :- 472 !. 473check_quotes([0'"|T], odd, Result) :- 474 !, 475 check_quotes(T, even, Result). 476check_quotes([0'"|T], even, Result) :- 477 !, 478 check_quotes(T, odd, Result). 479check_quotes([_|T], QuoteQuantity, Result) :- 480 check_quotes(T, QuoteQuantity, Result).
490csv_options(Compiled, Options) :- 491 make_csv_options(Options, Compiled, _Ignored). 492 493 494 /******************************* 495 * OUTPUT * 496 *******************************/
\t
for
.tsv
files and ,
otherwise.506csv_write_file(File, Data) :- 507 csv_write_file(File, Data, []). 508 509csv_write_file(File, Data, Options) :- 510 must_be(list, Data), 511 default_separator(File, Options, Options1), 512 make_csv_options(Options1, OptionsRecord, RestOptions), 513 setup_call_cleanup( 514 open(File, write, Out, RestOptions), 515 maplist(csv_write_row(Out, OptionsRecord), Data), 516 close(Out)). 517 518csv_write_row(Out, OptionsRecord, Row) :- 519 phrase(emit_row(Row, OptionsRecord), String), 520 format(Out, '~s', [String]). 521 522emit_csv([], _) --> []. 523emit_csv([H|T], Options) --> 524 emit_row(H, Options), 525 emit_csv(T, Options). 526 527emit_row(Row, Options) --> 528 { Row =.. [_|Fields] }, 529 emit_fields(Fields, Options), 530 "\r\n". % RFC 4180 demands \r\n 531 532emit_fields([], _) --> 533 "". 534emit_fields([H|T], Options) --> 535 emit_field(H, Options), 536 ( { T == [] } 537 -> [] 538 ; { csv_options_separator(Options, Sep) }, 539 [Sep], 540 emit_fields(T, Options) 541 ). 542 543emit_field(H, Options) --> 544 { ( atom(H) 545 -> atom_codes(H, Codes) 546 ; string(H) 547 -> string_codes(H, Codes) 548 ) 549 }, 550 !, 551 ( { needs_quotes(H, Options) } 552 -> "\"", emit_string(Codes), "\"" 553 ; emit_codes(Codes) 554 ). 555emit_field([], _) --> 556 !, 557 { atom_codes('[]', Codes) }, 558 emit_codes(Codes). 559emit_field(H, _) --> 560 { number_codes(H,Codes) }, 561 emit_codes(Codes). 562 563needs_quotes(Atom, _) :- 564 sub_atom(Atom, _, _, _, '"'), 565 !. 566needs_quotes(Atom, _) :- 567 sub_atom(Atom, _, _, _, '\n'), 568 !. 569needs_quotes(Atom, _) :- 570 sub_atom(Atom, _, _, _, '\r'), 571 !. 572needs_quotes(Atom, Options) :- 573 csv_options_separator(Options, Sep), 574 char_code(Char, Sep), 575 sub_atom(Atom, _, _, _, Char), 576 !. 577 578emit_string([]) --> "". 579emit_string([0'"|T]) --> !, "\"\"", emit_string(T). 580emit_string([H|T]) --> [H], emit_string(T). 581 582emit_codes([]) --> "". 583emit_codes([0'"|T]) --> !, "\"\"", emit_codes(T). 584emit_codes([H|T]) --> [H], emit_codes(T).
save_data(File) :- setup_call_cleanup( open(File, write, Out), forall(data(C1,C2,C3), csv_write_stream(Out, [row(C1,C2,C3)], [])), close(Out)).
603csv_write_stream(Stream, Data, Options) :-
604 must_be(list, Data),
605 make_csv_options(Options, OptionsRecord, _),
606 maplist(csv_write_row(Stream, OptionsRecord), Data)
Process CSV (Comma-Separated Values) data
This library parses and generates CSV data. CSV data is represented in Prolog as a list of rows. Each row is a compound term, where all rows have the same name and arity.