36
37:- module(csv,
38 [ csv//1, 39 csv//2, 40
41 csv_read_file/2, 42 csv_read_file/3, 43 csv_read_stream/3, 44
45 csv_read_file_row/3, 46 csv_read_row/3, 47 csv_options/2, 48
49 csv_write_file/2, 50 csv_write_file/3, 51 csv_write_stream/3 52 ]). 53:- use_module(library(record),[(record)/1, op(_,_,record)]). 54
55:- autoload(library(apply),[maplist/2]). 56:- use_module(library(debug),[debug/3]). 57:- autoload(library(error),[must_be/2,domain_error/2]). 58:- autoload(library(lists),[append/3]). 59:- autoload(library(option),[option/2,select_option/4]). 60:- autoload(library(pure_input),
61 [phrase_from_file/3,phrase_from_stream/2]). 62:- autoload(library(readutil),[read_line_to_codes/2]). 63:- autoload(library(dcg/basics),[string//1,eos//0]). 64
65
78
79:- predicate_options(csv//2, 2,
80 [ separator(code), 81 strip(boolean),
82 ignore_quotes(boolean),
83 convert(boolean),
84 case(oneof([down,preserve,up])),
85 functor(atom),
86 arity(-nonneg), 87 match_arity(boolean)
88 ]). 89:- predicate_options(csv_read_file/3, 3,
90 [ pass_to(csv//2, 2),
91 pass_to(phrase_from_file/3, 3)
92 ]). 93:- predicate_options(csv_read_file_row/3, 3,
94 [ line(-integer),
95 pass_to(csv//2, 2),
96 pass_to(open/4, 4)
97 ]). 98:- predicate_options(csv_write_file/3, 3,
99 [ pass_to(csv//2, 2),
100 pass_to(open/4, 4)
101 ]). 102:- predicate_options(csv_write_stream/3, 3,
103 [ pass_to(csv//2, 2)
104 ]). 105
106
107:- record
108 csv_options(separator:integer=0',,
109 strip:boolean=false,
110 ignore_quotes:boolean=false,
111 convert:boolean=true,
112 case:oneof([down,preserve,up])=preserve,
113 functor:atom=row,
114 arity:integer,
115 match_arity:boolean=true,
116 skip_header:atom). 117
118
138
139
140csv_read_file(File, Rows) :-
141 csv_read_file(File, Rows, []).
142
143csv_read_file(File, Rows, Options) :-
144 default_separator(File, Options, Options1),
145 make_csv_options(Options1, Record, RestOptions),
146 phrase_from_file(csv_roptions(Rows, Record), File, RestOptions).
147
148
149default_separator(File, Options0, Options) :-
150 ( option(separator(_), Options0)
151 -> Options = Options0
152 ; file_name_extension(_, Ext0, File),
153 downcase_atom(Ext0, Ext),
154 ext_separator(Ext, Sep)
155 -> Options = [separator(Sep)|Options0]
156 ; Options = Options0
157 ).
158
159ext_separator(csv, 0',).
160ext_separator(tsv, 0'\t).
161
162
166
167csv_read_stream(Stream, Rows, Options) :-
168 make_csv_options(Options, Record, _),
169 phrase_from_stream(csv_roptions(Rows, Record), Stream).
170
171
220
221csv(Rows) -->
222 csv(Rows, []).
223
224csv(Rows, Options) -->
225 { make_csv_options(Options, Record, _) },
226 csv_roptions(Rows, Record).
227
228csv_roptions(Rows, Record) -->
229 { ground(Rows) },
230 !,
231 emit_csv(Rows, Record).
232csv_roptions(Rows, Record) -->
233 skip_header(Record),
234 csv_data(Rows, Record).
235
(Options) -->
237 { csv_options_skip_header(Options, CommentStart),
238 nonvar(CommentStart),
239 atom_codes(CommentStart, Codes)
240 },
241 !,
242 skip_header_lines(Codes),
243 skip_blank_lines.
244skip_header(_) -->
245 [].
246
(CommentStart) -->
248 string(CommentStart),
249 !,
250 ( string(_Comment),
251 end_of_record
252 -> skip_header_lines(CommentStart)
253 ).
254skip_header_lines(_) -->
255 [].
256
257skip_blank_lines -->
258 eos,
259 !.
260skip_blank_lines -->
261 end_of_record,
262 !,
263 skip_blank_lines.
264skip_blank_lines -->
265 [].
266
267csv_data([], _) -->
268 eos,
269 !.
270csv_data([Row|More], Options) -->
271 row(Row, Options),
272 !,
273 { debug(csv, 'Row: ~p', [Row]) },
274 csv_data(More, Options).
275
276
277row(Row, Options) -->
278 fields(Fields, Options),
279 { csv_options_functor(Options, Functor),
280 Row =.. [Functor|Fields],
281 functor(Row, _, Arity),
282 check_arity(Options, Arity)
283 }.
284
285check_arity(Options, Arity) :-
286 csv_options_arity(Options, Arity),
287 !.
288check_arity(Options, _) :-
289 csv_options_match_arity(Options, false),
290 !.
291check_arity(Options, Arity) :-
292 csv_options_arity(Options, Expected),
293 domain_error(row_arity(Expected), Arity).
294
295fields([F|T], Options) -->
296 field(F, Options),
297 ( separator(Options)
298 -> fields(T, Options)
299 ; end_of_record
300 -> { T = [] }
301 ).
302
303field(Value, Options) -->
304 "\"",
305 { csv_options_ignore_quotes(Options, false) },
306 !,
307 string_codes(Codes),
308 { make_value(Codes, Value, Options) }.
309field(Value, Options) -->
310 { csv_options_strip(Options, true) },
311 !,
312 stripped_field(Value, Options).
313field(Value, Options) -->
314 { csv_options_separator(Options, Sep) },
315 field_codes(Codes, Sep),
316 { make_value(Codes, Value, Options) }.
317
318
319stripped_field(Value, Options) -->
320 ws,
321 ( "\"",
322 { csv_options_strip(Options, false) }
323 -> string_codes(Codes),
324 ws
325 ; { csv_options_separator(Options, Sep) },
326 field_codes(Codes0, Sep),
327 { strip_trailing_ws(Codes0, Codes) }
328 ),
329 { make_value(Codes, Value, Options) }.
330
331ws --> " ", !, ws.
332ws --> "\t", !, ws.
333ws --> "".
334
335strip_trailing_ws(List, Stripped) :-
336 append(Stripped, WS, List),
337 all_ws(WS).
338
339all_ws([]).
340all_ws([32|T]) :- all_ws(T).
341all_ws([9|T]) :- all_ws(T).
342
343
348
349string_codes(List) -->
350 [H],
351 ( { H == 0'" }
352 -> ( "\""
353 -> { List = [H|T] },
354 string_codes(T)
355 ; { List = [] }
356 )
357 ; { List = [H|T] },
358 string_codes(T)
359 ).
360
361field_codes([], Sep), [Sep] --> [Sep], !.
362field_codes([], _), "\n" --> "\r\n", !.
363field_codes([], _), "\n" --> "\n", !.
364field_codes([], _), "\n" --> "\r", !.
365field_codes([H|T], Sep) --> [H], !, field_codes(T, Sep).
366field_codes([], _) --> []. 367
372
373make_value(Codes, Value, Options) :-
374 csv_options_convert(Options, Convert),
375 csv_options_case(Options, Case),
376 make_value(Convert, Case, Codes, Value).
377
378make_value(true, preserve, Codes, Value) :-
379 !,
380 name(Value, Codes).
381make_value(true, Case, Codes, Value) :-
382 !,
383 ( number_string(Value, Codes)
384 -> true
385 ; make_value(false, Case, Codes, Value)
386 ).
387make_value(false, preserve, Codes, Value) :-
388 !,
389 atom_codes(Value, Codes).
390make_value(false, down, Codes, Value) :-
391 !,
392 string_codes(String, Codes),
393 downcase_atom(String, Value).
394make_value(false, up, Codes, Value) :-
395 string_codes(String, Codes),
396 upcase_atom(String, Value).
397
398separator(Options) -->
399 { csv_options_separator(Options, Sep) },
400 [Sep].
401
402end_of_record --> "\n". 403end_of_record --> "\r\n". 404end_of_record --> "\r". 405end_of_record --> eos. 406
407
423
424csv_read_file_row(File, Row, Options) :-
425 default_separator(File, Options, Options1),
426 make_csv_options(Options1, RecordOptions, Options2),
427 select_option(line(Line), Options2, RestOptions, _),
428 setup_call_cleanup(
429 open(File, read, Stream, RestOptions),
430 csv_read_stream_row(Stream, Row, Line, RecordOptions),
431 close(Stream)).
432
433csv_read_stream_row(Stream, Row, Line, Options) :-
434 between(1, infinite, Line),
435 ( csv_read_row(Stream, Row0, Options),
436 Row0 \== end_of_file
437 -> Row = Row0
438 ; !,
439 fail
440 ).
441
442
449
450csv_read_row(Stream, Row, _Record) :-
451 at_end_of_stream(Stream),
452 !,
453 Row = end_of_file.
454csv_read_row(Stream, Row, Record) :-
455 read_lines_to_codes(Stream, Codes, Record, even),
456 phrase(row(Row0, Record), Codes),
457 !,
458 Row = Row0.
459
460read_lines_to_codes(Stream, Codes, Options, QuoteQuantity) :-
461 read_line_to_codes(Stream, Codes0),
462 Codes0 \== end_of_file,
463 ( ( csv_options_ignore_quotes(Options, true)
464 ; check_quotes(Codes0, QuoteQuantity, even)
465 )
466 -> Codes = Codes0
467 ; append(Codes0, [0'\n|Tail], Codes),
468 read_lines_to_codes(Stream, Tail, Options, odd)
469 ).
470
471check_quotes([], QuoteQuantity, QuoteQuantity) :-
472 !.
473check_quotes([0'"|T], odd, Result) :-
474 !,
475 check_quotes(T, even, Result).
476check_quotes([0'"|T], even, Result) :-
477 !,
478 check_quotes(T, odd, Result).
479check_quotes([_|T], QuoteQuantity, Result) :-
480 check_quotes(T, QuoteQuantity, Result).
481
482
489
490csv_options(Compiled, Options) :-
491 make_csv_options(Options, Compiled, _Ignored).
492
493
494 497
505
506csv_write_file(File, Data) :-
507 csv_write_file(File, Data, []).
508
509csv_write_file(File, Data, Options) :-
510 must_be(list, Data),
511 default_separator(File, Options, Options1),
512 make_csv_options(Options1, OptionsRecord, RestOptions),
513 setup_call_cleanup(
514 open(File, write, Out, RestOptions),
515 maplist(csv_write_row(Out, OptionsRecord), Data),
516 close(Out)).
517
518csv_write_row(Out, OptionsRecord, Row) :-
519 phrase(emit_row(Row, OptionsRecord), String),
520 format(Out, '~s', [String]).
521
522emit_csv([], _) --> [].
523emit_csv([H|T], Options) -->
524 emit_row(H, Options),
525 emit_csv(T, Options).
526
527emit_row(Row, Options) -->
528 { Row =.. [_|Fields] },
529 emit_fields(Fields, Options),
530 "\r\n". 531
532emit_fields([], _) -->
533 "".
534emit_fields([H|T], Options) -->
535 emit_field(H, Options),
536 ( { T == [] }
537 -> []
538 ; { csv_options_separator(Options, Sep) },
539 [Sep],
540 emit_fields(T, Options)
541 ).
542
543emit_field(H, Options) -->
544 { ( atom(H)
545 -> atom_codes(H, Codes)
546 ; string(H)
547 -> string_codes(H, Codes)
548 )
549 },
550 !,
551 ( { needs_quotes(H, Options) }
552 -> "\"", emit_string(Codes), "\""
553 ; emit_codes(Codes)
554 ).
555emit_field([], _) -->
556 !,
557 { atom_codes('[]', Codes) },
558 emit_codes(Codes).
559emit_field(H, _) -->
560 { number_codes(H,Codes) },
561 emit_codes(Codes).
562
563needs_quotes(Atom, _) :-
564 sub_atom(Atom, _, _, _, '"'),
565 !.
566needs_quotes(Atom, _) :-
567 sub_atom(Atom, _, _, _, '\n'),
568 !.
569needs_quotes(Atom, _) :-
570 sub_atom(Atom, _, _, _, '\r'),
571 !.
572needs_quotes(Atom, Options) :-
573 csv_options_separator(Options, Sep),
574 char_code(Char, Sep),
575 sub_atom(Atom, _, _, _, Char),
576 !.
577
578emit_string([]) --> "".
579emit_string([0'"|T]) --> !, "\"\"", emit_string(T).
580emit_string([H|T]) --> [H], emit_string(T).
581
582emit_codes([]) --> "".
583emit_codes([0'"|T]) --> !, "\"\"", emit_codes(T).
584emit_codes([H|T]) --> [H], emit_codes(T).
585
586
602
603csv_write_stream(Stream, Data, Options) :-
604 must_be(list, Data),
605 make_csv_options(Options, OptionsRecord, _),
606 maplist(csv_write_row(Stream, OptionsRecord), Data)