summaryrefslogtreecommitdiff
path: root/header-parsers.scm
blob: ac41a093e0171307440f3c81def0f39c5f1b3b87 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
;;;; Header value accessor procedures

;; Get the raw contents of a header
(define (header-contents name headers #!optional default)
  (cond ((assq name (headers-v headers)) => cdr)
        (else default)))

;; Get all values of a header
(define (header-values header-name headers)
  (map (cut vector-ref <> 0) (header-contents header-name headers '())))

;; Get the value of a header, assuming it has only one value
(define (header-value header-name headers #!optional default)
  (let ((contents (header-contents header-name headers '())))
    (if (null? contents)
        default
        (get-value (car contents)))))

;; Get the parameters of a header, assuming it has only one value
(define (header-params header-name headers)
  (let ((contents (header-contents header-name headers '())))
    (if (null? contents)
        '()
        (get-params (car contents)))))

;; Get a specific parameter of a header, assuming it has only one value
(define (header-param param-name header-name headers #!optional default)
  (let ((params (header-params header-name headers)))
    (cond ((not (pair? params)) default)
          ((assq param-name params) => cdr)
          (else default))))

;; Get the value from one header entry
(define get-value (cut vector-ref <> 0))
;; Fast local version
(define-compiler-syntax get-value
  (syntax-rules ()
    ((_ header-entry)
     (vector-ref header-entry 0))))

;; Get all params from one header entry
(define get-params (cut vector-ref <> 1))
;; Fast local version
(define-compiler-syntax get-params
  (syntax-rules ()
    ((_ header-entry)
     (vector-ref header-entry 1))))

;; Get one specific parameter from one header entry
(define (get-param param contents #!optional (default #f))
  (let ((params (vector-ref contents 1)))
    (cond ((not (pair? params)) default)
          ((assq param params) => cdr)
          (else default))))

(define (get-no-newline-value header)
  (let ((v (get-value header)))
    (when (string-index v #\newline)
      ;; There's duplication with quote-string error here...
      (signal-http-condition
       'raw-value
       (conc "Unencoded newline in header contents! "
             "Please encode the newline in a way "
             "appropriate for this header")
       (list string) 'unencoded-header 'value string))
    v))

;;;; Header parsers

(define (single subparser #!optional (parameter-subparsers '()))
  (lambda (contents)
    (list ((with-params subparser parameter-subparsers) contents))))

(define (multiple subparser #!optional (parameter-subparsers '()))
  (lambda (contents)
    (map (with-params subparser parameter-subparsers)
         (split-multi-header contents))))

;; RFC 2616 4.2 says multi-headers are a comma-separated list of tokens
(define (split-multi-header value)
  (let ((len (string-length value)))
    (let loop ((result '())
               (start-pos 0)   ; Where the current header value starts
               (search-pos 0)) ; Where the searching starts
      (or (and-let* (((< search-pos len))
                     (pos (string-index value (char-set #\, #\") search-pos)))
            (if (char=? #\, (string-ref value pos))
                (loop (cons (string-copy value start-pos pos) result) (add1 pos) (add1 pos))
                (let ((end-pos (escaped-string-end value (add1 pos) (char-set #\"))))
                  (loop result start-pos (add1 end-pos)))))
          (reverse (cons (string-drop value start-pos) result))))))

;; Remove all escape characters from the input, recognising "escaped escapes"
(define (unescape str)
  (let ((last-char (sub1 (string-length str))))
    (let loop ((result "")
               (start-pos 0))
      (or (and-let* ((pos (string-index str #\\ start-pos)))
            (if (= pos last-char)
                (string-append result (string-copy str start-pos))
                (loop (string-append result (string-copy str start-pos pos)
                                     (string-copy str (add1 pos) (+ pos 2)))
                      (+ pos 2))))
          (string-append result (string-copy str start-pos))))))

;; Find a matching endpoint for a token, ignoring escaped copies of the token
(define (escaped-string-end str start stop-char-set)
  (let ((len (string-length str)))
    (let loop ((start start))
      (let ((pos (string-index str (char-set-adjoin stop-char-set #\\) start)))
        (if pos
            (if (char=? #\\ (string-ref str pos))
                ;; Escaped matching closing symbol
                (if (= len (add1 pos))
                    pos
                    (loop (+ pos 2)))
                ;; Reached the matching closing symbol
                pos)
            len))))) ; No matching closing symbol?  "Insert" it at the end

;; Try to parse a token, starting at the provided offset, up until the
;; char-set where we should stop.  Returns two values: the token or #f if
;; there is no token left, and the position on which the token ends.
(define (parse-token value start-pos
                     #!optional
                     (stop-char-set (char-set-adjoin char-set:whitespace #\,)))
  (if (>= start-pos (string-length value))
      (values #f start-pos)
      (let ((stop (char-set-adjoin stop-char-set #\")))
        (let ((pos (string-index value stop start-pos)))
          (if pos
              (if (not (char=? #\" (string-ref value pos)))
                  (values (string-trim-both
                           value char-set:whitespace start-pos pos)
                          pos) ; Stop-char found, but no quoting
                  (let ((end-pos (escaped-string-end value (add1 pos)
                                                     (char-set #\"))))
                    ;; Found the double quote? Recurse on the remainder
                    (receive (rest final-pos)
                      (parse-token value (add1 end-pos) stop-char-set)
                      (values (string-append
                               (string-trim-both
                                value char-set:whitespace start-pos pos)
                               (if (= pos end-pos)
                                   (unescape (string-copy value (add1 pos)))
                                   (unescape (string-copy value (add1 pos) end-pos)))
                               (or rest ""))
                              final-pos))))
              ;; Nothing found?  Then the remainder of the string is the token
              (values (string-trim-both
                       value char-set:whitespace start-pos)
                      (string-length value)))))))

;; Comments are a bit like tokens, except they can be nested
(define (parse-comment value start-pos)
  (let* ((len (string-length value))
         (nospace-pos (and (< start-pos len)
                           (string-skip value char-set:whitespace start-pos))))
    (if (and nospace-pos (char=? (string-ref value nospace-pos) #\())
        (let loop ((result "")
                   (start-pos (add1 nospace-pos)))
          (if (>= start-pos len)
              (values result len)
              (let ((pos (string-index value (char-set #\" #\( #\)) start-pos)))
                (if pos
                    (cond ((char=? #\( (string-ref value pos)) ; Nested comment
                           (receive (nested end-pos)
                               (parse-comment value pos)
                             (loop (sprintf "~A~A(~A)"
                                     result
                                     (string-copy value start-pos pos)
                                     nested)
                                   (add1 end-pos))))
                          ((char=? #\" (string-ref value pos))
                           (let lp ((end (add1 pos))
                                    (c (string-ref value (add1 pos)))
                                    (res '()))
                             (cond ((char=? #\" c)
                                    (loop (string-append
                                           result
                                           (reverse-list->string res))
                                          (add1 end)))
                                   ((char=? #\\ c)
                                    (lp (+ end 2)
                                        (string-ref value (+ end 2))
                                        (cons (string-ref value (add1 end))
                                              res)))
                                   (else
                                    (lp (add1 end)
                                        (string-ref value (add1 end))
                                        (cons c res))))))
                          ;; Else it's a )
                          (else (values (conc result (string-copy value start-pos pos)) (add1 pos))))
                    ;; Nothing found?  Then the remainder of the string is the token
                    (values (conc result (string-copy value start-pos))
                            (string-length value))))))
        ;; No (? Then fail to match the 'comment'
        (values #f start-pos))))

(define (parse-params string start-pos param-subparsers #!optional (stop-set (char-set #\;)) (separator-or-stop-set (char-set #\; #\=)))
  (let loop ((start-pos start-pos)
             (params '()))
    (unless separator-or-stop-set
      (error "The parse-params API has changed a bit for performance reasons: if you pass the optional stop char set, you need to pass one more argument which is the same char-set extended with an equals-sign"))
    (receive (attribute-name pos)
        (parse-token string start-pos separator-or-stop-set)
      (if attribute-name
          (let ((attribute (http-name->symbol attribute-name)))
            (if (and (< pos (string-length string))
                     (char=? (string-ref string pos) #\=))
                (receive (value pos)
                    (parse-token string (add1 pos) stop-set)
                  ;; In case of no value ("foo="), use the empty string as value
                  (let ((value ((alist-ref attribute param-subparsers
                                           eq? identity)
                                (or value ""))))
                    (loop (add1 pos) (cons (cons attribute value) params))))
                ;; Missing value is interpreted as "present",
                ;; so #t. If not present, it's #f when looking it up
                (loop (add1 pos) (cons (cons attribute #t) params))))
          (values (reverse params) pos)))))

(define (parse-value+params string start-pos value-subparser param-subparsers)
  (receive (value pos)
    (parse-token string start-pos (char-set #\;))
    (if (not value)
        (values #f pos) ;; XXX this is wrong and not expected by the caller!
        (receive (params pos)
          (parse-params string (add1 pos) param-subparsers)
          (values (vector (value-subparser value) params) pos)))))

(define (with-params value-subparser parameter-subparsers)
  (lambda (entry)
    (receive (type+params pos)
      (parse-value+params entry 0 value-subparser parameter-subparsers)
      type+params)))

(define (make-key/value-subparser key/value-subparsers)
  (lambda (k/v)
    ;; We're abusing parse-params here to read value
    ;; instead of params.  This is weird, but it works :)
    (receive (key+value pos)
      (parse-params k/v 0 key/value-subparsers)
      (vector (car key+value) '())))) ;; There's only one key/value pair

(foreign-declare "#include <locale.h>")

(define-foreign-variable LC_TIME int)

(define setlocale (foreign-lambda c-string setlocale int c-string))

(define-syntax let-locale
  (syntax-rules ()
    ((let-locale ((cat val) ...) body ...)
     (let ((backup '()))
       (dynamic-wind
           (lambda () (set! backup `((cat . ,(setlocale cat val)) ...)))
           (lambda () body ...)
           (lambda () (setlocale cat (alist-ref 'cat backup)) ...))))))

(define (make-date->string-parser spec)
  (let ((date-regex
         (irregex
          (string-translate*
           spec
           '((" "  . " +")   ; Any number of spaces is very permissive
             ("%a" . "(Sun|Mon|Tue|Wed|Thu|Fri|Sat)")
             ("%A" . "(Sunday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday)")
             ("%d" . "[0-9]{1,2}")
             ("%b" . "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)")
             ("%y" . "[0-9]{1,2}")
             ("%Y" . "[0-9]{4}")
             ("%X" . "[0-9]{2}:[0-9]{2}:[0-9]{2}"))))))
    (lambda (str)
      (and (irregex-match date-regex str) ; Or irregex-search?
           (let-locale ((LC_TIME "POSIX"))
                       (string->time str spec))))))

(define rfc1123-string->time (make-date->string-parser "%a, %d %b %Y %X GMT"))

;; This is a little more relaxed than strict rfc850 (it allows abbreviated
;; weekdays) - for example Google Web Server outputs cookies in this format.
(define rfc850-string->time
  (disjoin (make-date->string-parser "%A, %d-%b-%y %X GMT")
           (make-date->string-parser "%a, %d-%b-%Y %X GMT")))

(define asctime-string->time (make-date->string-parser "%a %b %d %X %Y"))

(define http-date-string->time
  (disjoin rfc1123-string->time rfc850-string->time asctime-string->time))

;; RFC 1123 updates RFC 822's datetime spec
(define (rfc1123-subparser str)
  (or (rfc1123-string->time str)
      (signal-http-condition
       'rfc1123-subparser
       "Error parsing RFC 1123 date/time" (list str)
       'rfc1123-subparser 'value str)))

(define (rfc850-subparser str)
  (or (rfc850-string->time str)
      (signal-http-condition
       'rfc850-subparser
       "Error parsing RFC850 date/time" (list str)
       'rfc850-subparser 'value str)))

(define (asctime-subparser str)
  (or (asctime-string->time str)
      (signal-http-condition
       'asctime-subparser
       "Error parsing asctime() date/time" (list str)
       'asctime-subparser 'value str)))

;; rfc1123-date | rfc850-date | asctime-date
(define (http-date-subparser str)
  (or (http-date-string->time str)
      (signal-http-condition
       'http-date-subparser
       "Error parsing date/time" (list str)
       'http-date-subparser 'value str)))

;; Change the accuracy of a number to 'digits' number of digits to the
;; right of the decimal point.
(define (chop-number num digits)
  (let ((factor (expt 10 digits)))
    (/ (round (* num factor)) factor)))

(define (quality-subparser str)
  ;; Anything that's not a number is seen as if the value is missing, hence 1.0
  (let* ((num       (or (string->number str) 1.0))
         (imprecise (chop-number num 3)))
    (max 0.0 (min 1.0 imprecise))))

;; Just put all header strings in a list, so we can pass it on.
;; Make no assumptions about the contents (only value, don't try to parse params)
;; This is different from (multiple (without-params generic-header-parser))
;; because this does not assume it can split up comma-separated values.
;; It also will ensure that the value is raw,
(define (unknown-header-parser contents)
  (list (vector contents 'raw)))

(define symbol-subparser
  (compose string->symbol string-trim-both))

(define symbol-subparser-ci
  (compose string->symbol string-trim-both string-downcase))

(define (natnum-subparser contents)
  (let ((num (string->number contents)))
    (if num (inexact->exact (max 0 (round num))) 0)))

(define (host/port-subparser contents)
  (let* ((idx (string-index-right contents #\:))
         (portnum (and idx (string->number
                            (substring/shared contents (add1 idx))))))
    (if (and idx portnum)
        (cons (substring/shared contents 0 idx)
              (inexact->exact (round portnum)))
        (cons contents #f))))

; base64 of 128 bit hex digest as per RFC1864 (eg, Content-md5)
(define base64-subparser base64-decode)

;; This is retarded.  The websocket spec (RFC6455) explicitly says the
;; product token must be compared case-insensitively.  RFC2817,
;; "upgrading to TLS within HTTP/1.1" doesn't mention anything about
;; case sensitivity, but defines an all-uppercase token.  This means
;; that there're going to be servers which accept only "TLS/1.0"
;; as-is.  This all just means we can't add the convenience of
;; downcasing and switching to a symbol, pushing the case-(in)sensitive
;; comparison down the the user level, causing more bugs :(
(define (product-subparser contents)
  (let* ((idx (string-index contents #\/))
         (version (and idx (substring/shared contents (add1 idx)))))
    (if (and idx version)
        (cons (substring/shared contents 0 idx) version)
        (cons contents #f))))

;; Try getting a submatch from an irregex match and parse it, or return #f
(define (try-parse-submatch match group-name parser)
  (and-let* ((value (irregex-match-substring match group-name)))
    (parser value)))

;; bytes=<start>-<end>
;; bytes=<start>-
;; bytes=-<end>
(define range-subparser
  (let ((range-regex
         (irregex '(seq "bytes="
                        (=> start (* digit)) "-" (=> end (* digit))))))
    (lambda (s)
      (and-let* ((m (irregex-match range-regex s)))
        (let ((start (try-parse-submatch m 'start string->number))
              (end (try-parse-submatch m 'end string->number)))
          (if (or start end)
            (list start end)
            #f))))))

;; bytes <start>-<end>/<total>
;; bytes <start>-<end>/*
;; bytes */<total>
(define content-range-subparser
  (let ((content-range-regex
         (irregex '(seq "bytes" (+ space)
                        (or (seq "*/" (=> total (+ digit)))
                            (seq (=> start (+ digit)) "-" (=> end (+ digit))
                                 "/" (or "*"
                                         (=> total (+ digit)))))))))
    (lambda (s)
      (and-let* ((m (irregex-match content-range-regex s)))
        (let ((start (try-parse-submatch m 'start string->number))
              (end (try-parse-submatch m 'end string->number))
              (total (try-parse-submatch m 'total string->number)))
          (list start end total))))))

;; Accept *just* a filename, not a full path (simply strips directories)
;; This matches the content-disposition recommendation in RFC2616, 19.5.1:
;; "The receiving user agent SHOULD NOT respect any directory path
;;  information present in the filename-parm parameter, which is the only
;;  parameter believed to apply to HTTP implementations at this time. The
;;  filename SHOULD be treated as a terminal component only."
;; This echoes RFC2183 (and RFC1806 which it supersedes), section 2.3:
;; "The receiving MUA SHOULD NOT respect any directory path information
;;  that may seem to be present in the filename parameter.  The filename
;;  should be treated as a terminal component only."
(define (filename-subparser fn)
  (let ((base-fn (pathname-strip-directory (string-trim-both fn))))
    (and (not (member base-fn '("" "." "..")))
         (not (string-index base-fn (char-set #\/ #\nul)))
         base-fn)))

;; [W/]<string>
;; This is a full parser, because it needs to be able to distinguish
;; between W/"foo" and "W/foo". If it's preprocessed by the tokenizer
;; both get "normalised" to the same thing: W/foo
;;
;; XXX It could be a good idea if the single/multiple token parsers
;; did not do anything to their contents.  If the consuming parsers
;; want tokens, they know how to have it. OTOH, it would mean much
;; more code for all the parsers as they need to tokenize more...
(define (etag-parser contents)
  (let ((contents (string-trim-both contents)))
    (list (vector
           (if (string-prefix? "W/" contents)
               `(weak . ,(parse-token contents 2 char-set:whitespace))
               `(strong . ,(parse-token contents 0 char-set:whitespace)))
           '()))))

;; Used for both if-match and if-none-match
;; This is either a wilcard ('*') or several entities
(define (if-match-parser contents)
  (let ((contents (string-trim-both contents)))
    (if (string=? "*" contents)
        (list (vector '* '()))
        (let loop ((pos 0)
                   (etags '()))
          (let ((weak (string-prefix? "W/" contents 0 2 pos)))
            (receive (etag newpos)
              (parse-token contents (+ pos (if weak 2 0)) char-set:whitespace)
              (let ((newpos (string-skip contents char-set:whitespace newpos))
                    (value (and etag (vector (cons (if weak 'weak 'strong)
                                                   etag) '()))))
                (if value
                    (if newpos
                        (loop newpos (cons value etags))
                        (reverse! (cons value etags)))
                    (reverse! etags)))))))))

;; ( <product>[/<version>] [<comment>] )+
;; This parser is a full parser because parse-token cannot handle
;; comments yet... (if a ; is in a comment, it breaks down)
(define software-parser
  (let ((char-set:space-or-paren (char-set-union (char-set #\()
                                                 char-set:whitespace))
        (char-set:slash-or-paren (char-set #\/ #\()))
   (lambda (contents)
     (let loop ((start-pos 0)
                (products '()))
       (let*-values (((product pos)
                      (parse-token contents start-pos
                                   char-set:slash-or-paren))
                     ((version pos2)
                      (parse-token contents pos ; (add1 pos)
                                   char-set:space-or-paren))
                     ((comment pos3) (parse-comment contents pos2))
                     ;; Ugh
                     ((real-version) (and version (not (string-null? version)) (string-trim version #\/))))
         (if product
             (loop pos3 (cons (list product real-version comment) products))
             (list (vector (reverse products) '()))))))))

;;;; MAJOR TODOs
;; RFC1123 mailbox parser - just strings for now
(define mailbox-subparser identity)

;; Either an entity-tag or a http-date
(define (if-range-parser contents)
  (let ((http-date ((with-params http-date-string->time '()) contents)))
    (if (get-value http-date)
        (list http-date)
        (etag-parser contents))))

;; Either delta-seconds or http-date
(define retry-after-subparser (disjoin http-date-subparser natnum-subparser))

;; Tricky - see 2616 14.45
;; We probably shouldn't try to do too much parsing here
(define via-parser (multiple identity))

;; Tricky - see 2616 14.46
(define warning-parser split-multi-header)
;;;; END MAJOR TODOs

(define (key/value-subparser str)
  (let ((idx (string-index str #\=)))
    (cons (string->symbol (string-take str idx)) (string-drop str (add1 idx)))))

;; The 'expires' header defined by the Netscape cookie spec contains
;; an embedded comma.  RFC 2109 cookies use Max-Age instead.
(define old-style-cookie?
  (let ((old-cookie-regex
         (irregex '(seq bos (+ (~ #\= #\")) "=" (* (~ #\;)) ";" ;
                        (* any) (w/nocase "expires") (* space) "="))))
   (lambda (cookie)
     (irregex-search old-cookie-regex cookie))))

(define (string->number-list str)
  (map string->number (string-split str ",")))

(define (relative-ref/path-only s)
  (and-let* ((ref (uri-reference s))
             ((not (uri-host ref)))
             ((null? (uri-query ref)))
             ((not (uri-fragment ref))))
    ref))

;; We're using http-date-subparser for 'expires' instead of rfc1123-subparser
;; (which would be the correct thing to do) because several well-known web
;; server software packages (tested: PHP and Rails) get it wrong.  So we
;; will go by the robustness principle and allow any kind of HTTP date.
(define set-cookie-parser
  (let ((param-subparsers `((expires . ,http-date-subparser)
                            (max-age . ,string->number)
                            (version . ,string->number)
                            (port    . ,string->number-list)
                            (path    . ,relative-ref/path-only)))
        (name/value-parser (lambda (str)
                             (let ((idx (string-index str #\=)))
                               (cons (string-take str idx)
                                     (string-drop str (add1 idx)))))))
    (lambda (contents)
      (if (old-style-cookie? contents)
          (list ((with-params name/value-parser param-subparsers) contents))
          (map (with-params name/value-parser param-subparsers)
               (split-multi-header contents))))))

(define cache-control-parser
  (let ((splitter (lambda (str) ;; Is this correct?
                    (map (compose string->symbol string-trim-both)
                         (string-split str ",")))))
    (lambda (contents)
      (map
       (make-key/value-subparser `((max-age . ,natnum-subparser)
                                   (s-maxage . ,natnum-subparser)
                                   (max-stale . ,natnum-subparser)
                                   (min-fresh . ,natnum-subparser)
                                   (private . ,splitter)
                                   (no-cache . ,splitter)))
       (split-multi-header contents)))))

(define (strict-transport-security-parser contents)
  ;; This is ridiculous; there are no parameters because everything
  ;; is a parameter (or everything is a value, depending on your point
  ;; of view).  The header has no main value.  For convenience and sanity
  ;; we just return an alist as a single value.
  (list (vector (parse-params contents 0 `((max-age . ,natnum-subparser))) '())))

(define (basic-auth-param-subparser contents pos)
   (receive (secret pos)
     (parse-token contents pos (char-set #\,))
     (let* ((decoded (base64-decode secret))
            (colon-idx (string-index decoded #\:))
            (user (string-take decoded colon-idx))
            (pass (string-drop decoded (add1 colon-idx))))
       (values `((username . ,user) (password . ,pass)) pos))))

(define (digest-auth-param-subparser contents pos)
  (parse-params contents pos
                `((nc . ,(lambda (n) (string->number n 16)))
                  (uri . ,uri-reference)
                  (qop . ,symbol-subparser)
                  (algorithm . ,symbol-subparser-ci))
                (char-set #\,) (char-set #\, #\=)))

(define authorization-param-subparsers
  (make-parameter `((basic . ,basic-auth-param-subparser)
                    (digest . ,digest-auth-param-subparser))))

(define (authorization-parser contents)
  (let loop ((pos 0)
             (result '()))
    (receive (authtype pos)
      (parse-token contents pos char-set:whitespace)
      (if (not authtype)
          (reverse result)
          (let ((authtype (http-name->symbol authtype))
                (default-subparser (lambda (contents pos)
                                     (parse-params contents pos '()))))
            (receive (params pos)
              ((alist-ref authtype (authorization-param-subparsers)
                          eq? default-subparser) contents (add1 pos))
              (loop (add1 pos)
                    (cons (vector authtype params) result))))))))

(define (authenticate-parser contents)
  (let loop ((pos 0)
             (result '()))
    (receive (authtype pos)
      (parse-token contents pos char-set:whitespace)
      (if (not authtype)
          (reverse result)
          (let ((authtype (http-name->symbol authtype)))
            (receive (params pos)
              (parse-params contents pos
                            `((domain . ,(lambda (s)
                                           (map uri-reference
                                                (string-split s))))
                              (qop . ,(lambda (s)
                                        (map (compose symbol-subparser
                                                      string-trim)
                                             (string-split s ","))))
                              (algorithm . ,symbol-subparser-ci)
                              (stale . ,(lambda (s)
                                          (string-ci=? (string-trim s)
                                                       "TRUE"))))
                            (char-set #\,) (char-set #\, #\=))
              (loop (add1 pos) (cons (vector authtype params) result))))))))

(define (pragma-parser contents)
  (map (make-key/value-subparser `()) (split-multi-header contents)))

(define (te-parser contents)
  (map (make-key/value-subparser `((q . ,quality-subparser)))
       (split-multi-header contents)))

;; Cookie headers are also braindead: there can be several cookies in one header,
;; separated by either commas or semicolons. The only way to distinguish a
;; new cookie from a parameter of the current cookie is the dollar in front
;; of all parameter names.
;; Also, there's a $Version attribute that prepends all cookies, which is
;; considered to apply to all cookies that follow.
(define (cookie-parser contents)
  ;; Local version of parse-params that stops when param without $ is seen
  (define (read-params start-pos)
    (let next-param ((start-pos start-pos)
                     (params '()))
      (receive (attribute-name pos)
        (parse-token contents start-pos (char-set #\; #\=))
        (if (or (not attribute-name) ;; Still reading params?
                (not (char=? (string-ref attribute-name 0) #\$)))
            (values (reverse! params) start-pos)
            (let ((attrib (http-name->symbol (string-drop attribute-name 1))))
              (if (and (< pos (string-length contents))
                       (char=? (string-ref contents pos) #\=))
                  (receive (value pos)
                    (parse-token contents (add1 pos) (char-set #\;))
                    (let ((value (case attrib
                                   ((version port) (string->number (or value "")))
                                   ((path) (relative-ref/path-only (or value "")))
                                   (else value))))
                      (next-param (add1 pos) (cons (cons attrib value) params))))
                  ;; Missing value is interpreted as "present", so #t
                  (next-param (add1 pos) (cons (cons attrib #t) params))))))))
  (receive (global-params pos)
    (read-params 0)
    (let loop ((cookies '())
               (pos pos))
      (let*-values (((name pos) (parse-token contents pos (char-set #\= #\;)))
                    ((val pos) (parse-token contents (add1 pos) (char-set #\;))))
        (if (or (not name) (not val))
            (reverse! cookies)
            (receive (local-params pos)
              (read-params (add1 pos))
              (loop (cons (vector (cons name val)
                                  (append! local-params global-params))
                          cookies) (add1 pos))))))))

;;; Unparsers ;;;
(define (unparse-params params unparsers #!key
                        (separator "; ") (grammar 'prefix)
                        (keyword-unparser ->string)
                        (value-unparser unparse-token))
  (let loop ((params params)
             (results '()))
    (if (null? params)
        (string-join (reverse results) separator grammar)
        (let* ((name (caar params))
               (val (cdar params))
               (str (case val
                      ;; #t means param is present (no value)
                      ((#t) (keyword-unparser name))
                      ;; #f means param is missing
                      ((#f) #f)
                      (else (let* ((unparser (assq name unparsers))
                                   (unparsed-val (if unparser
                                                     ((cdr unparser) val)
                                                     val)))
                              (string-append (keyword-unparser name) "="
                                             (value-unparser unparsed-val)))))))
          (loop (cdr params) (if str (cons str results) results))))))

(define must-be-quoted-chars (char-set-adjoin char-set:iso-control #\"))

(define (quote-string string)
  (reverse-list->string
   (cons #\"
         (string-fold (lambda (c result)
                        (cond ((char=? c #\newline)
                               (signal-http-condition
                                'quote-string
                                (conc "Unencoded newline in header contents! "
                                      "Please encode the newline in a way "
                                      "appropriate for this header")
                                (list string) 'unencoded-header 'value string))
                              ((char-set-contains? must-be-quoted-chars c)
                               (cons c (cons #\\ result)))
                              (else (cons c result))))
                      '(#\")
                      string))))

;; Unparse a value as token, converting it to a string first
(define unparse-token
  (let ((default-trigger-chars (char-set-union must-be-quoted-chars
                                               (char-set #\= #\; #\,)
                                               char-set:blank)))
    (lambda (token #!optional separator-chars)
      (let ((trigger-quoting-chars
             (if separator-chars
                 (char-set-union must-be-quoted-chars separator-chars char-set:blank)
                 default-trigger-chars))
            (token-string (->string token)))
        (if (string-any trigger-quoting-chars token-string)
            (quote-string token-string)
            token-string)))))

(define (unparse-etag etag)
  (string-append
   (if (eq? 'weak (car etag)) "W/" "")
   (quote-string (cdr etag)))) ;; Etags are _always_ quoted

(define (unparse-content-range content-range)
  ; False values are used to represent wildcards "*"
  (let ((content-range-value-unparser (lambda (value)
                                        (if value
                                          (number->string value)
                                          "*"))))
    (string-append "bytes "
                   ; If the start and end are wildcards, use only one
                   (if (not (or (car content-range) (cadr content-range)))
                     "*"
                     (string-append
                       (content-range-value-unparser (car content-range))
                       "-"
                       (content-range-value-unparser (cadr content-range))))
                   "/"
                   (content-range-value-unparser (caddr content-range)))))

;; There's no need to make a specific header unparser for every header type.
;; Usually, the Scheme value representing a header can unambiguously be
;; unparsed into a header just by checking its type.
(define (default-header-unparser header-contents)
  (let loop ((headers (reverse header-contents))
             (result '()))
    (if (null? headers)
        (list (string-join result ", "))
        (let* ((header (car headers))
               (contents (get-value header))
               (value (cond
                       ((pair? contents) ; alist?
                        (let ((attribute (symbol->http-name (car contents))))
                          (if (eq? (cdr contents) #t)
                              (unparse-token attribute)
                              (string-append attribute "="
                                             (unparse-token (cdr contents))))))
                       ((uri-reference? contents)
                        (unparse-token (uri->string contents) (char-set)))
                       (else (unparse-token contents)))))
          (loop (cdr headers)
                (cons
                 (string-append
                  value (unparse-params (get-params header) '()))
                 result))))))

;; RFC2616 19.5.1 says that the "filename" attribute _must_ be quoted.
;; It's a bit annoying that our API currently can't specify for particular
;; attributes that only those must be unparsed specially, so we quote _all_
;; attributes (which, strictly speaking, is always allowed for tokens) unless
;; otherwise specified by a hack (when the value is prefixed by RAW).
;; This may be dangerous or wrong, if a server doesn't accept quoted "name"
;; attributes, for example.  Not too likely since names can contain spaces etc.
(define (content-disposition-unparser header-contents)
  (let* ((type (get-value (car header-contents)))
         (RAW (list 'raw))
         (unparser (lambda (x) (if (and (pair? x) (eq? RAW (car x)))
                                   (cdr x)
                                   (quote-string (->string x))))))
    (list (conc (unparse-token type)
                (unparse-params (get-params (car header-contents))
                                `((filename . ,pathname-strip-directory)
                                  (size . ,(lambda (x) (cons RAW (number->string x))))
                                  (creation-date . ,rfc1123-time->string)
                                  (modification-date . ,rfc1123-time->string)
                                  (read-date . ,rfc1123-time->string))
                                value-unparser: unparser)))))

(define (content-range-unparser header-contents)
  (list (unparse-content-range (get-value (car header-contents)))))

(define (etag-unparser header-contents)
  (list (unparse-etag (get-value (car header-contents)))))

(define (if-match-unparser header-contents)
  (let loop ((headers (reverse header-contents))
             (result '()))
    (cond
     ((null? headers) (list (string-join result ", ")))
     ((eq? '* (get-value (car headers))) '("*")) ;; We're done. * means anything
     (else (loop (cdr headers)
                 (cons (unparse-etag (get-value (car headers))) result))))))

(define (host/port-unparser header-contents)
  (let ((contents (get-value (car header-contents))))
    ;; XXX: urlencode?
    (if (cdr contents)
        (list (conc (car contents) ":" (cdr contents)))
        (list (car contents)))))

;; Handled specially because cookie value is not an alist but a cons of strings
(define (set-cookie-unparser header-contents)
  (map (lambda (header)
         (let* ((unparsed-params
                 (map (lambda (p)
                        (if (eq? (cdr p) #t)
                            (unparse-token (symbol->http-name (car p)))
                            (string-append
                              (unparse-token (symbol->http-name (car p)))
                              "="
                              (cond
                               ((and (eq? (car p) 'port) (pair? (cdr p)))
                                (string-join
                                 (map number->string (cdr p)) ","))
                               ((and (eq? (car p) 'path) (cdr p))
                                (uri->string (cdr p)))
                               ((eq? (car p) 'expires)
                                (let-locale ((LC_TIME "POSIX"))
                                            (time->string (cdr p) "%A, %d-%b-%y %X GMT")))
                               (else (unparse-token (cdr p)))))))
                      ;; Remove #f values
                      (filter (lambda (p) (cdr p)) (get-params header))))
                (cookie (get-value header))
                (unparsed-cookie (string-append (car cookie) "="
                                                (unparse-token (cdr cookie)))))
           (string-join (cons unparsed-cookie unparsed-params) "; ")))
       header-contents))

(define (cookie-unparser header-contents)
  (let loop ((prefix "")
             (headers (reverse header-contents))
             (result '()))
    (if (null? headers)
        (list (conc prefix (string-join result "; ")))
        (let* ((version (get-param 'version (car headers) #f))
               (params (alist-delete 'version (get-params (car headers))))
               (unparsed-params
                (map (lambda (p)
                       (if (eq? (cdr p) #t)
                           (unparse-token (conc "$" (symbol->http-name (car p))))
                           (string-append
                            (unparse-token
                             (conc "$" (symbol->http-name (car p))))
                            "="
                            (cond
                             ((and (eq? (car p) 'port) (pair? (cdr p)))
                              (string-join
                               (map number->string (cdr p)) ","))
                             ((and (eq? (car p) 'path) (cdr p))
                              (uri->string (cdr p)))
                             (else (unparse-token (cdr p)))))))
                     ;; Remove #f values
                     (filter (lambda (p) (cdr p)) params)))
               (cookie (get-value (car headers)))
               (unparsed-cookie (string-append (car cookie) "="
                                               (unparse-token (cdr cookie)))))
          ;; Doing it like this means we can't unparse cookies of
          ;; mixed versions...
          (loop (if version (sprintf "$Version: ~A; " version) prefix)
                (cdr headers)
                (cons (string-join (cons unparsed-cookie unparsed-params) "; ")
                      result))))))

(define (software-unparser header-contents)
  (list
   (string-join
    (map (lambda (content)
           (conc (unparse-token (first content))
                 (if (second content)
                     (conc "/" (unparse-token (second content)))
                     "")
                 (if (third content)
                     (conc " (" (third content) ")")
                     "")))
         (get-value (car header-contents))))))

(define (product-unparser header-contents)
  (list
   (string-join
    (map (lambda (header)
           (let* ((default-unparser ; Not great, but better than nothing
                    (lambda (params) (unparse-params params '())))
                  (product+version (get-value header)))
             (conc (unparse-token (car product+version))
                   (if (cdr product+version)
                       (conc "/" (unparse-token (cdr product+version)))
                       ""))))
         header-contents)
    ", ")))

(define (via-unparser header-contents)
  (list (string-join (map get-value header-contents) ", ")))

(define (rfc1123-unparser header-contents)
  (list (rfc1123-time->string (get-value (car header-contents)))))

(define-constant short-weekdays `#("Sun" "Mon" "Tue" "Wed" "Thu" "Fri" "Sat"))
(define-constant short-months `#("Jan" "Feb" "Mar" "Apr" "May" "Jun" "Jul" "Aug" "Sep" "Oct" "Nov" "Dec"))

(define (rfc1123-time->string time)
  (let ((padded (lambda (n)
                  (if (fx< n 10)
                      (string-append "0" (number->string n))
                      (number->string n))))
        (secs (vector-ref time 0))
        (mins (vector-ref time 1))
        (hours (vector-ref time 2))
        (mday (vector-ref time 3))
        (month (vector-ref time 4))
        (year (vector-ref time 5))
        (wday (vector-ref time 6)))
    (string-append (vector-ref short-weekdays wday) ", "
                   (padded mday) " " (vector-ref short-months month) " "
                   (number->string (+ 1900 year)) " " (padded hours) ":"
                   (padded mins) ":" (padded secs) " GMT")))

(define (basic-auth-param-subunparser params)
  (let ((user (alist-ref 'username params))
        (pass (alist-ref 'password params)))
    (if (string-index user #\:)
        (signal-http-condition
         'basic-auth-param-subunparser
         "Colon detected in username. This is not supported by basic auth!"
         (list user) 'username-with-colon 'value user)
        (base64-encode (string-append user ":" pass)))))

(define (digest-auth-param-subunparser params)
  (unparse-params params
                  `((nc . ,identity) ;; see below
                    (uri . ,uri->string)
                    (qop . ,symbol->string)
                    (algorithm . ,symbol->string))
                  separator: ", "
                  grammar: 'infix
                  keyword-unparser: symbol->string
                  value-unparser:
                  ;; Nasty exception for "nc", which is an unquoted
                  ;; padded integer...
                  (lambda (x)
                    (if (number? x)
                        (string-pad (number->string x 16) 8 #\0)
                        (quote-string (->string x))))))

(define authorization-param-subunparsers
  (make-parameter `((basic . ,basic-auth-param-subunparser)
                    (digest . ,digest-auth-param-subunparser))))

(define (authorization-unparser header-contents)
  (map (lambda (header)
         (let* ((default-unparser        ; Not great, but better than nothing
                  (lambda (params) (unparse-params params '())))
                (auth-scheme (get-value header))
                (unparser (alist-ref auth-scheme
                                     (authorization-param-subunparsers)
                                     eq? default-unparser)))
           (string-append
            (symbol->http-name auth-scheme) " "
            (->string (unparser (get-params header))))))
       header-contents))

(define (authenticate-unparser header-contents)
  (map (lambda (header)
         (string-append
          (symbol->http-name (get-value header))
          " "
          (let* ((old (get-params header))
                 ;; A quick hack to get presence of "stale"
                 ;; coded as TRUE instead of value-less param
                 ;; false value is coded by its absense
                 (params (if (alist-ref 'stale old)
                             (alist-update! 'stale 'TRUE old)
                             (alist-delete 'stale old))))
            (unparse-params params
                            `((domain . ,(lambda (u)
                                           (string-join
                                            (map uri->string u))))
                              (qop . ,(lambda (q)
                                        (string-join
                                         (map symbol->string q)
                                         ",")))
                              (algorithm . ,symbol->string))
                            separator: ", "
                            grammar: 'infix
                            keyword-unparser: symbol->string
                            value-unparser:
                            (lambda (x)
                              (if (eq? x 'TRUE)
                                  "TRUE"
                                  (quote-string (->string x))))))))
       header-contents))

(define (strict-transport-security-unparser header-contents)
  (map (lambda (header)   ; Should only be one header; the rest must be ignored!
         (unparse-params (get-value header) '()
                         grammar: 'infix
                         keyword-unparser: ; Ugly but neccessary
                         (lambda (k)
                           (if (eq? k 'includesubdomains)
                               "includeSubDomains"
                               (->string k)))))
       header-contents))