summaryrefslogtreecommitdiff
path: root/header-parsers.scm
diff options
context:
space:
mode:
authorPeter Bex <peter@more-magic.net>2018-06-22 22:22:24 +0200
committerPeter Bex <peter@more-magic.net>2018-06-22 22:22:24 +0200
commit90a1f7d47525cfffe928e9a89becf622bd85a8a1 (patch)
treeef5043b60d49425f702fd154ee3ba1088a68677c /header-parsers.scm
downloadintarweb-90a1f7d47525cfffe928e9a89becf622bd85a8a1.tar.gz
Initial CHICKEN 5 port of intarweb 1.72.0
Diffstat (limited to 'header-parsers.scm')
-rw-r--r--header-parsers.scm994
1 files changed, 994 insertions, 0 deletions
diff --git a/header-parsers.scm b/header-parsers.scm
new file mode 100644
index 0000000..89edeae
--- /dev/null
+++ b/header-parsers.scm
@@ -0,0 +1,994 @@
+;;;; Header value accessor procedures
+
+;; Get the raw contents of a header
+(define (header-contents name headers #!optional default)
+ (cond ((assq name (headers-v headers)) => cdr)
+ (else default)))
+
+;; Get all values of a header
+(define (header-values header-name headers)
+ (map (cut vector-ref <> 0) (header-contents header-name headers '())))
+
+;; Get the value of a header, assuming it has only one value
+(define (header-value header-name headers #!optional default)
+ (let ((contents (header-contents header-name headers '())))
+ (if (null? contents)
+ default
+ (get-value (car contents)))))
+
+;; Get the parameters of a header, assuming it has only one value
+(define (header-params header-name headers)
+ (let ((contents (header-contents header-name headers '())))
+ (if (null? contents)
+ '()
+ (get-params (car contents)))))
+
+;; Get a specific parameter of a header, assuming it has only one value
+(define (header-param param-name header-name headers #!optional default)
+ (let ((params (header-params header-name headers)))
+ (cond ((not (pair? params)) default)
+ ((assq param-name params) => cdr)
+ (else default))))
+
+;; Get the value from one header entry
+(define get-value (cut vector-ref <> 0))
+;; Fast local version
+(define-compiler-syntax get-value
+ (syntax-rules ()
+ ((_ header-entry)
+ (vector-ref header-entry 0))))
+
+;; Get all params from one header entry
+(define get-params (cut vector-ref <> 1))
+;; Fast local version
+(define-compiler-syntax get-params
+ (syntax-rules ()
+ ((_ header-entry)
+ (vector-ref header-entry 1))))
+
+;; Get one specific parameter from one header entry
+(define (get-param param contents #!optional (default #f))
+ (let ((params (vector-ref contents 1)))
+ (cond ((not (pair? params)) default)
+ ((assq param params) => cdr)
+ (else default))))
+
+(define (get-no-newline-value header)
+ (let ((v (get-value header)))
+ (when (string-index v #\newline)
+ ;; There's duplication with quote-string error here...
+ (signal-http-condition
+ 'raw-value
+ (conc "Unencoded newline in header contents! "
+ "Please encode the newline in a way "
+ "appropriate for this header")
+ (list string) 'unencoded-header 'value string))
+ v))
+
+;;;; Header parsers
+
+(define (single subparser #!optional (parameter-subparsers '()))
+ (lambda (contents)
+ (list ((with-params subparser parameter-subparsers) contents))))
+
+(define (multiple subparser #!optional (parameter-subparsers '()))
+ (lambda (contents)
+ (map (with-params subparser parameter-subparsers)
+ (split-multi-header contents))))
+
+;; RFC 2616 4.2 says multi-headers are a comma-separated list of tokens
+(define (split-multi-header value)
+ (let ((len (string-length value)))
+ (let loop ((result '())
+ (start-pos 0) ; Where the current header value starts
+ (search-pos 0)) ; Where the searching starts
+ (or (and-let* (((< search-pos len))
+ (pos (string-index value (char-set #\, #\") search-pos)))
+ (if (char=? #\, (string-ref value pos))
+ (loop (cons (string-copy value start-pos pos) result) (add1 pos) (add1 pos))
+ (let ((end-pos (escaped-string-end value (add1 pos) (char-set #\"))))
+ (loop result start-pos (add1 end-pos)))))
+ (reverse (cons (string-drop value start-pos) result))))))
+
+;; Remove all escape characters from the input, recognising "escaped escapes"
+(define (unescape str)
+ (let ((last-char (sub1 (string-length str))))
+ (let loop ((result "")
+ (start-pos 0))
+ (or (and-let* ((pos (string-index str #\\ start-pos)))
+ (if (= pos last-char)
+ (string-append result (string-copy str start-pos))
+ (loop (string-append result (string-copy str start-pos pos)
+ (string-copy str (add1 pos) (+ pos 2)))
+ (+ pos 2))))
+ (string-append result (string-copy str start-pos))))))
+
+;; Find a matching endpoint for a token, ignoring escaped copies of the token
+(define (escaped-string-end str start stop-char-set)
+ (let ((len (string-length str)))
+ (let loop ((start start))
+ (let ((pos (string-index str (char-set-adjoin stop-char-set #\\) start)))
+ (if pos
+ (if (char=? #\\ (string-ref str pos))
+ ;; Escaped matching closing symbol
+ (if (= len (add1 pos))
+ pos
+ (loop (+ pos 2)))
+ ;; Reached the matching closing symbol
+ pos)
+ len))))) ; No matching closing symbol? "Insert" it at the end
+
+;; Try to parse a token, starting at the provided offset, up until the
+;; char-set where we should stop. Returns two values: the token or #f if
+;; there is no token left, and the position on which the token ends.
+(define (parse-token value start-pos
+ #!optional
+ (stop-char-set (char-set-adjoin char-set:whitespace #\,)))
+ (if (>= start-pos (string-length value))
+ (values #f start-pos)
+ (let ((stop (char-set-adjoin stop-char-set #\")))
+ (let ((pos (string-index value stop start-pos)))
+ (if pos
+ (if (not (char=? #\" (string-ref value pos)))
+ (values (string-trim-both
+ value char-set:whitespace start-pos pos)
+ pos) ; Stop-char found, but no quoting
+ (let ((end-pos (escaped-string-end value (add1 pos)
+ (char-set #\"))))
+ ;; Found the double quote? Recurse on the remainder
+ (receive (rest final-pos)
+ (parse-token value (add1 end-pos) stop-char-set)
+ (values (string-append
+ (string-trim-both
+ value char-set:whitespace start-pos pos)
+ (if (= pos end-pos)
+ (unescape (string-copy value (add1 pos)))
+ (unescape (string-copy value (add1 pos) end-pos)))
+ (or rest ""))
+ final-pos))))
+ ;; Nothing found? Then the remainder of the string is the token
+ (values (string-trim-both
+ value char-set:whitespace start-pos)
+ (string-length value)))))))
+
+;; Comments are a bit like tokens, except they can be nested
+(define (parse-comment value start-pos)
+ (let* ((len (string-length value))
+ (nospace-pos (and (< start-pos len)
+ (string-skip value char-set:whitespace start-pos))))
+ (if (and nospace-pos (char=? (string-ref value nospace-pos) #\())
+ (let loop ((result "")
+ (start-pos (add1 nospace-pos)))
+ (if (>= start-pos len)
+ (values result len)
+ (let ((pos (string-index value (char-set #\" #\( #\)) start-pos)))
+ (if pos
+ (cond ((char=? #\( (string-ref value pos)) ; Nested comment
+ (receive (nested end-pos)
+ (parse-comment value pos)
+ (loop (sprintf "~A~A(~A)"
+ result
+ (string-copy value start-pos pos)
+ nested)
+ (add1 end-pos))))
+ ((char=? #\" (string-ref value pos))
+ (let lp ((end (add1 pos))
+ (c (string-ref value (add1 pos)))
+ (res '()))
+ (cond ((char=? #\" c)
+ (loop (string-append
+ result
+ (reverse-list->string res))
+ (add1 end)))
+ ((char=? #\\ c)
+ (lp (+ end 2)
+ (string-ref value (+ end 2))
+ (cons (string-ref value (add1 end))
+ res)))
+ (else
+ (lp (add1 end)
+ (string-ref value (add1 end))
+ (cons c res))))))
+ ;; Else it's a )
+ (else (values (conc result (string-copy value start-pos pos)) (add1 pos))))
+ ;; Nothing found? Then the remainder of the string is the token
+ (values (conc result (string-copy value start-pos))
+ (string-length value))))))
+ ;; No (? Then fail to match the 'comment'
+ (values #f start-pos))))
+
+(define (parse-params string start-pos param-subparsers #!optional (stop-set (char-set #\;)) (separator-or-stop-set (char-set #\; #\=)))
+ (let loop ((start-pos start-pos)
+ (params '()))
+ (unless separator-or-stop-set
+ (error "The parse-params API has changed a bit for performance reasons: if you pass the optional stop char set, you need to pass one more argument which is the same char-set extended with an equals-sign"))
+ (receive (attribute-name pos)
+ (parse-token string start-pos separator-or-stop-set)
+ (if attribute-name
+ (let ((attribute (http-name->symbol attribute-name)))
+ (if (and (< pos (string-length string))
+ (char=? (string-ref string pos) #\=))
+ (receive (value pos)
+ (parse-token string (add1 pos) stop-set)
+ ;; In case of no value ("foo="), use the empty string as value
+ (let ((value ((alist-ref attribute param-subparsers
+ eq? identity)
+ (or value ""))))
+ (loop (add1 pos) (cons (cons attribute value) params))))
+ ;; Missing value is interpreted as "present",
+ ;; so #t. If not present, it's #f when looking it up
+ (loop (add1 pos) (cons (cons attribute #t) params))))
+ (values (reverse params) pos)))))
+
+(define (parse-value+params string start-pos value-subparser param-subparsers)
+ (receive (value pos)
+ (parse-token string start-pos (char-set #\;))
+ (if (not value)
+ (values #f pos) ;; XXX this is wrong and not expected by the caller!
+ (receive (params pos)
+ (parse-params string (add1 pos) param-subparsers)
+ (values (vector (value-subparser value) params) pos)))))
+
+(define (with-params value-subparser parameter-subparsers)
+ (lambda (entry)
+ (receive (type+params pos)
+ (parse-value+params entry 0 value-subparser parameter-subparsers)
+ type+params)))
+
+(define (make-key/value-subparser key/value-subparsers)
+ (lambda (k/v)
+ ;; We're abusing parse-params here to read value
+ ;; instead of params. This is weird, but it works :)
+ (receive (key+value pos)
+ (parse-params k/v 0 key/value-subparsers)
+ (vector (car key+value) '())))) ;; There's only one key/value pair
+
+(foreign-declare "#include <locale.h>")
+
+(define-foreign-variable LC_TIME int)
+
+(define setlocale (foreign-lambda c-string setlocale int c-string))
+
+(define-syntax let-locale
+ (syntax-rules ()
+ ((let-locale ((cat val) ...) body ...)
+ (let ((backup '()))
+ (dynamic-wind
+ (lambda () (set! backup `((cat . ,(setlocale cat val)) ...)))
+ (lambda () body ...)
+ (lambda () (setlocale cat (alist-ref 'cat backup)) ...))))))
+
+(define (make-date->string-parser spec)
+ (let ((date-regex
+ (irregex
+ (string-translate*
+ spec
+ '((" " . " +") ; Any number of spaces is very permissive
+ ("%a" . "(Sun|Mon|Tue|Wed|Thu|Fri|Sat)")
+ ("%A" . "(Sunday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday)")
+ ("%d" . "[0-9]{1,2}")
+ ("%b" . "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)")
+ ("%y" . "[0-9]{1,2}")
+ ("%Y" . "[0-9]{4}")
+ ("%X" . "[0-9]{2}:[0-9]{2}:[0-9]{2}"))))))
+ (lambda (str)
+ (and (irregex-match date-regex str) ; Or irregex-search?
+ (let-locale ((LC_TIME "POSIX"))
+ (string->time str spec))))))
+
+(define rfc1123-string->time (make-date->string-parser "%a, %d %b %Y %X GMT"))
+
+;; This is a little more relaxed than strict rfc850 (it allows abbreviated
+;; weekdays) - for example Google Web Server outputs cookies in this format.
+(define rfc850-string->time
+ (disjoin (make-date->string-parser "%A, %d-%b-%y %X GMT")
+ (make-date->string-parser "%a, %d-%b-%Y %X GMT")))
+
+(define asctime-string->time (make-date->string-parser "%a %b %d %X %Y"))
+
+(define http-date-string->time
+ (disjoin rfc1123-string->time rfc850-string->time asctime-string->time))
+
+;; RFC 1123 updates RFC 822's datetime spec
+(define (rfc1123-subparser str)
+ (or (rfc1123-string->time str)
+ (signal-http-condition
+ 'rfc1123-subparser
+ "Error parsing RFC 1123 date/time" (list str)
+ 'rfc1123-subparser 'value str)))
+
+(define (rfc850-subparser str)
+ (or (rfc850-string->time str)
+ (signal-http-condition
+ 'rfc850-subparser
+ "Error parsing RFC850 date/time" (list str)
+ 'rfc850-subparser 'value str)))
+
+(define (asctime-subparser str)
+ (or (asctime-string->time str)
+ (signal-http-condition
+ 'asctime-subparser
+ "Error parsing asctime() date/time" (list str)
+ 'asctime-subparser 'value str)))
+
+;; rfc1123-date | rfc850-date | asctime-date
+(define (http-date-subparser str)
+ (or (http-date-string->time str)
+ (signal-http-condition
+ 'http-date-subparser
+ "Error parsing date/time" (list str)
+ 'http-date-subparser 'value str)))
+
+;; Change the accuracy of a number to 'digits' number of digits to the
+;; right of the decimal point.
+(define (chop-number num digits)
+ (let ((factor (expt 10 digits)))
+ (/ (round (* num factor)) factor)))
+
+(define (quality-subparser str)
+ ;; Anything that's not a number is seen as if the value is missing, hence 1.0
+ (let* ((num (or (string->number str) 1.0))
+ (imprecise (chop-number num 3)))
+ (max 0.0 (min 1.0 imprecise))))
+
+;; Just put all header strings in a list, so we can pass it on.
+;; Make no assumptions about the contents (only value, don't try to parse params)
+;; This is different from (multiple (without-params generic-header-parser))
+;; because this does not assume it can split up comma-separated values.
+;; It also will ensure that the value is raw,
+(define (unknown-header-parser contents)
+ (list (vector contents 'raw)))
+
+(define symbol-subparser
+ (compose string->symbol string-trim-both))
+
+(define symbol-subparser-ci
+ (compose string->symbol string-trim-both string-downcase))
+
+(define (natnum-subparser contents)
+ (let ((num (string->number contents)))
+ (if num (inexact->exact (max 0 (round num))) 0)))
+
+(define (host/port-subparser contents)
+ (let* ((idx (string-index-right contents #\:))
+ (portnum (and idx (string->number
+ (substring/shared contents (add1 idx))))))
+ (if (and idx portnum)
+ (cons (substring/shared contents 0 idx)
+ (inexact->exact (round portnum)))
+ (cons contents #f))))
+
+; base64 of 128 bit hex digest as per RFC1864 (eg, Content-md5)
+(define base64-subparser base64-decode)
+
+;; This is retarded. The websocket spec (RFC6455) explicitly says the
+;; product token must be compared case-insensitively. RFC2817,
+;; "upgrading to TLS within HTTP/1.1" doesn't mention anything about
+;; case sensitivity, but defines an all-uppercase token. This means
+;; that there're going to be servers which accept only "TLS/1.0"
+;; as-is. This all just means we can't add the convenience of
+;; downcasing and switching to a symbol, pushing the case-(in)sensitive
+;; comparison down the the user level, causing more bugs :(
+(define (product-subparser contents)
+ (let* ((idx (string-index contents #\/))
+ (version (and idx (substring/shared contents (add1 idx)))))
+ (if (and idx version)
+ (cons (substring/shared contents 0 idx) version)
+ (cons contents #f))))
+
+;; bytes <start>-<end>/<total>
+(define range-subparser
+ (let ((range-regex
+ (irregex '(seq "bytes" (+ space)
+ (=> start (+ digit)) "-" (=> end (+ digit))
+ "/" (=> total (+ digit))))))
+ (lambda (s)
+ (and-let* ((m (irregex-match range-regex s))
+ (start (string->number (irregex-match-substring m 'start)))
+ (end (string->number (irregex-match-substring m 'end)))
+ (total (string->number (irregex-match-substring m 'total))))
+ (list start end total)))))
+
+;; Accept *just* a filename, not a full path (simply strips directories)
+;; This matches the content-disposition recommendation in RFC2616, 19.5.1:
+;; "The receiving user agent SHOULD NOT respect any directory path
+;; information present in the filename-parm parameter, which is the only
+;; parameter believed to apply to HTTP implementations at this time. The
+;; filename SHOULD be treated as a terminal component only."
+;; This echoes RFC2183 (and RFC1806 which it supersedes), section 2.3:
+;; "The receiving MUA SHOULD NOT respect any directory path information
+;; that may seem to be present in the filename parameter. The filename
+;; should be treated as a terminal component only."
+(define (filename-subparser fn)
+ (let ((base-fn (pathname-strip-directory (string-trim-both fn))))
+ (and (not (member base-fn '("" "." "..")))
+ (not (string-index base-fn (char-set #\/ #\nul)))
+ base-fn)))
+
+;; [W/]<string>
+;; This is a full parser, because it needs to be able to distinguish
+;; between W/"foo" and "W/foo". If it's preprocessed by the tokenizer
+;; both get "normalised" to the same thing: W/foo
+;;
+;; XXX It could be a good idea if the single/multiple token parsers
+;; did not do anything to their contents. If the consuming parsers
+;; want tokens, they know how to have it. OTOH, it would mean much
+;; more code for all the parsers as they need to tokenize more...
+(define (etag-parser contents)
+ (let ((contents (string-trim-both contents)))
+ (list (vector
+ (if (string-prefix? "W/" contents)
+ `(weak . ,(parse-token contents 2 char-set:whitespace))
+ `(strong . ,(parse-token contents 0 char-set:whitespace)))
+ '()))))
+
+;; Used for both if-match and if-none-match
+;; This is either a wilcard ('*') or several entities
+(define (if-match-parser contents)
+ (let ((contents (string-trim-both contents)))
+ (if (string=? "*" contents)
+ (list (vector '* '()))
+ (let loop ((pos 0)
+ (etags '()))
+ (let ((weak (string-prefix? "W/" contents 0 2 pos)))
+ (receive (etag newpos)
+ (parse-token contents (+ pos (if weak 2 0)) char-set:whitespace)
+ (let ((newpos (string-skip contents char-set:whitespace newpos))
+ (value (and etag (vector (cons (if weak 'weak 'strong)
+ etag) '()))))
+ (if value
+ (if newpos
+ (loop newpos (cons value etags))
+ (reverse! (cons value etags)))
+ (reverse! etags)))))))))
+
+;; ( <product>[/<version>] [<comment>] )+
+;; This parser is a full parser because parse-token cannot handle
+;; comments yet... (if a ; is in a comment, it breaks down)
+(define software-parser
+ (let ((char-set:space-or-paren (char-set-union (char-set #\()
+ char-set:whitespace))
+ (char-set:slash-or-paren (char-set #\/ #\()))
+ (lambda (contents)
+ (let loop ((start-pos 0)
+ (products '()))
+ (let*-values (((product pos)
+ (parse-token contents start-pos
+ char-set:slash-or-paren))
+ ((version pos2)
+ (parse-token contents pos ; (add1 pos)
+ char-set:space-or-paren))
+ ((comment pos3) (parse-comment contents pos2))
+ ;; Ugh
+ ((real-version) (and version (not (string-null? version)) (string-trim version #\/))))
+ (if product
+ (loop pos3 (cons (list product real-version comment) products))
+ (list (vector (reverse products) '()))))))))
+
+;;;; MAJOR TODOs
+;; RFC1123 mailbox parser - just strings for now
+(define mailbox-subparser identity)
+
+;; Either an entity-tag or a http-date
+(define (if-range-parser contents)
+ (let ((http-date ((with-params http-date-string->time '()) contents)))
+ (if (get-value http-date)
+ (list http-date)
+ (etag-parser contents))))
+
+;; Either delta-seconds or http-date
+(define retry-after-subparser (disjoin http-date-subparser natnum-subparser))
+
+;; Tricky - see 2616 14.45
+;; We probably shouldn't try to do too much parsing here
+(define via-parser (multiple identity))
+
+;; Tricky - see 2616 14.46
+(define warning-parser split-multi-header)
+;;;; END MAJOR TODOs
+
+(define (key/value-subparser str)
+ (let ((idx (string-index str #\=)))
+ (cons (string->symbol (string-take str idx)) (string-drop str (add1 idx)))))
+
+;; The 'expires' header defined by the Netscape cookie spec contains
+;; an embedded comma. RFC 2109 cookies use Max-Age instead.
+(define old-style-cookie?
+ (let ((old-cookie-regex
+ (irregex '(seq bos (+ (~ #\= #\")) "=" (* (~ #\;)) ";" ;
+ (* any) (w/nocase "expires") (* space) "="))))
+ (lambda (cookie)
+ (irregex-search old-cookie-regex cookie))))
+
+(define (string->number-list str)
+ (map string->number (string-split str ",")))
+
+(define (relative-ref/path-only s)
+ (and-let* ((ref (uri-reference s))
+ ((not (uri-host ref)))
+ ((null? (uri-query ref)))
+ ((not (uri-fragment ref))))
+ ref))
+
+;; We're using http-date-subparser for 'expires' instead of rfc1123-subparser
+;; (which would be the correct thing to do) because several well-known web
+;; server software packages (tested: PHP and Rails) get it wrong. So we
+;; will go by the robustness principle and allow any kind of HTTP date.
+(define set-cookie-parser
+ (let ((param-subparsers `((expires . ,http-date-subparser)
+ (max-age . ,string->number)
+ (version . ,string->number)
+ (port . ,string->number-list)
+ (path . ,relative-ref/path-only)))
+ (name/value-parser (lambda (str)
+ (let ((idx (string-index str #\=)))
+ (cons (string-take str idx)
+ (string-drop str (add1 idx)))))))
+ (lambda (contents)
+ (if (old-style-cookie? contents)
+ (list ((with-params name/value-parser param-subparsers) contents))
+ (map (with-params name/value-parser param-subparsers)
+ (split-multi-header contents))))))
+
+(define cache-control-parser
+ (let ((splitter (lambda (str) ;; Is this correct?
+ (map (compose string->symbol string-trim-both)
+ (string-split str ",")))))
+ (lambda (contents)
+ (map
+ (make-key/value-subparser `((max-age . ,natnum-subparser)
+ (s-maxage . ,natnum-subparser)
+ (max-stale . ,natnum-subparser)
+ (min-fresh . ,natnum-subparser)
+ (private . ,splitter)
+ (no-cache . ,splitter)))
+ (split-multi-header contents)))))
+
+(define (strict-transport-security-parser contents)
+ ;; This is ridiculous; there are no parameters because everything
+ ;; is a parameter (or everything is a value, depending on your point
+ ;; of view). The header has no main value. For convenience and sanity
+ ;; we just return an alist as a single value.
+ (list (vector (parse-params contents 0 `((max-age . ,natnum-subparser))) '())))
+
+(define (basic-auth-param-subparser contents pos)
+ (receive (secret pos)
+ (parse-token contents pos (char-set #\,))
+ (let* ((decoded (base64-decode secret))
+ (colon-idx (string-index decoded #\:))
+ (user (string-take decoded colon-idx))
+ (pass (string-drop decoded (add1 colon-idx))))
+ (values `((username . ,user) (password . ,pass)) pos))))
+
+(define (digest-auth-param-subparser contents pos)
+ (parse-params contents pos
+ `((nc . ,(lambda (n) (string->number n 16)))
+ (uri . ,uri-reference)
+ (qop . ,symbol-subparser)
+ (algorithm . ,symbol-subparser-ci))
+ (char-set #\,) (char-set #\, #\=)))
+
+(define authorization-param-subparsers
+ (make-parameter `((basic . ,basic-auth-param-subparser)
+ (digest . ,digest-auth-param-subparser))))
+
+(define (authorization-parser contents)
+ (let loop ((pos 0)
+ (result '()))
+ (receive (authtype pos)
+ (parse-token contents pos char-set:whitespace)
+ (if (not authtype)
+ (reverse result)
+ (let ((authtype (http-name->symbol authtype))
+ (default-subparser (lambda (contents pos)
+ (parse-params contents pos '()))))
+ (receive (params pos)
+ ((alist-ref authtype (authorization-param-subparsers)
+ eq? default-subparser) contents (add1 pos))
+ (loop (add1 pos)
+ (cons (vector authtype params) result))))))))
+
+(define (authenticate-parser contents)
+ (let loop ((pos 0)
+ (result '()))
+ (receive (authtype pos)
+ (parse-token contents pos char-set:whitespace)
+ (if (not authtype)
+ (reverse result)
+ (let ((authtype (http-name->symbol authtype)))
+ (receive (params pos)
+ (parse-params contents pos
+ `((domain . ,(lambda (s)
+ (map uri-reference
+ (string-split s))))
+ (qop . ,(lambda (s)
+ (map (compose symbol-subparser
+ string-trim)
+ (string-split s ","))))
+ (algorithm . ,symbol-subparser-ci)
+ (stale . ,(lambda (s)
+ (string-ci=? (string-trim s)
+ "TRUE"))))
+ (char-set #\,) (char-set #\, #\=))
+ (loop (add1 pos) (cons (vector authtype params) result))))))))
+
+(define (pragma-parser contents)
+ (map (make-key/value-subparser `()) (split-multi-header contents)))
+
+(define (te-parser contents)
+ (map (make-key/value-subparser `((q . ,quality-subparser)))
+ (split-multi-header contents)))
+
+;; Cookie headers are also braindead: there can be several cookies in one header,
+;; separated by either commas or semicolons. The only way to distinguish a
+;; new cookie from a parameter of the current cookie is the dollar in front
+;; of all parameter names.
+;; Also, there's a $Version attribute that prepends all cookies, which is
+;; considered to apply to all cookies that follow.
+(define (cookie-parser contents)
+ ;; Local version of parse-params that stops when param without $ is seen
+ (define (read-params start-pos)
+ (let next-param ((start-pos start-pos)
+ (params '()))
+ (receive (attribute-name pos)
+ (parse-token contents start-pos (char-set #\; #\=))
+ (if (or (not attribute-name) ;; Still reading params?
+ (not (char=? (string-ref attribute-name 0) #\$)))
+ (values (reverse! params) start-pos)
+ (let ((attrib (http-name->symbol (string-drop attribute-name 1))))
+ (if (and (< pos (string-length contents))
+ (char=? (string-ref contents pos) #\=))
+ (receive (value pos)
+ (parse-token contents (add1 pos) (char-set #\;))
+ (let ((value (case attrib
+ ((version port) (string->number (or value "")))
+ ((path) (relative-ref/path-only (or value "")))
+ (else value))))
+ (next-param (add1 pos) (cons (cons attrib value) params))))
+ ;; Missing value is interpreted as "present", so #t
+ (next-param (add1 pos) (cons (cons attrib #t) params))))))))
+ (receive (global-params pos)
+ (read-params 0)
+ (let loop ((cookies '())
+ (pos pos))
+ (let*-values (((name pos) (parse-token contents pos (char-set #\= #\;)))
+ ((val pos) (parse-token contents (add1 pos) (char-set #\;))))
+ (if (or (not name) (not val))
+ (reverse! cookies)
+ (receive (local-params pos)
+ (read-params (add1 pos))
+ (loop (cons (vector (cons name val)
+ (append! local-params global-params))
+ cookies) (add1 pos))))))))
+
+;;; Unparsers ;;;
+(define (unparse-params params unparsers #!key
+ (separator "; ") (grammar 'prefix)
+ (keyword-unparser ->string)
+ (value-unparser unparse-token))
+ (let loop ((params params)
+ (results '()))
+ (if (null? params)
+ (string-join (reverse results) separator grammar)
+ (let* ((name (caar params))
+ (val (cdar params))
+ (str (case val
+ ;; #t means param is present (no value)
+ ((#t) (keyword-unparser name))
+ ;; #f means param is missing
+ ((#f) #f)
+ (else (let* ((unparser (assq name unparsers))
+ (unparsed-val (if unparser
+ ((cdr unparser) val)
+ val)))
+ (string-append (keyword-unparser name) "="
+ (value-unparser unparsed-val)))))))
+ (loop (cdr params) (if str (cons str results) results))))))
+
+(define must-be-quoted-chars (char-set-adjoin char-set:iso-control #\"))
+
+(define (quote-string string)
+ (reverse-list->string
+ (cons #\"
+ (string-fold (lambda (c result)
+ (cond ((char=? c #\newline)
+ (signal-http-condition
+ 'quote-string
+ (conc "Unencoded newline in header contents! "
+ "Please encode the newline in a way "
+ "appropriate for this header")
+ (list string) 'unencoded-header 'value string))
+ ((char-set-contains? must-be-quoted-chars c)
+ (cons c (cons #\\ result)))
+ (else (cons c result))))
+ '(#\")
+ string))))
+
+;; Unparse a value as token, converting it to a string first
+(define unparse-token
+ (let ((default-trigger-chars (char-set-union must-be-quoted-chars
+ (char-set #\= #\; #\,)
+ char-set:blank)))
+ (lambda (token #!optional separator-chars)
+ (let ((trigger-quoting-chars
+ (if separator-chars
+ (char-set-union must-be-quoted-chars separator-chars char-set:blank)
+ default-trigger-chars))
+ (token-string (->string token)))
+ (if (string-any trigger-quoting-chars token-string)
+ (quote-string token-string)
+ token-string)))))
+
+(define (unparse-etag etag)
+ (string-append
+ (if (eq? 'weak (car etag)) "W/" "")
+ (quote-string (cdr etag)))) ;; Etags are _always_ quoted
+
+;; There's no need to make a specific header unparser for every header type.
+;; Usually, the Scheme value representing a header can unambiguously be
+;; unparsed into a header just by checking its type.
+(define (default-header-unparser header-contents)
+ (let loop ((headers (reverse header-contents))
+ (result '()))
+ (if (null? headers)
+ (list (string-join result ", "))
+ (let* ((header (car headers))
+ (contents (get-value header))
+ (value (cond
+ ((pair? contents) ; alist?
+ (let ((attribute (symbol->http-name (car contents))))
+ (if (eq? (cdr contents) #t)
+ (unparse-token attribute)
+ (string-append attribute "="
+ (unparse-token (cdr contents))))))
+ ((uri-reference? contents)
+ (unparse-token (uri->string contents) (char-set)))
+ (else (unparse-token contents)))))
+ (loop (cdr headers)
+ (cons
+ (string-append
+ value (unparse-params (get-params header) '()))
+ result))))))
+
+;; RFC2616 19.5.1 says that the "filename" attribute _must_ be quoted.
+;; It's a bit annoying that our API currently can't specify for particular
+;; attributes that only those must be unparsed specially, so we quote _all_
+;; attributes (which, strictly speaking, is always allowed for tokens) unless
+;; otherwise specified by a hack (when the value is prefixed by RAW).
+;; This may be dangerous or wrong, if a server doesn't accept quoted "name"
+;; attributes, for example. Not too likely since names can contain spaces etc.
+(define (content-disposition-unparser header-contents)
+ (let* ((type (get-value (car header-contents)))
+ (RAW (list 'raw))
+ (unparser (lambda (x) (if (and (pair? x) (eq? RAW (car x)))
+ (cdr x)
+ (quote-string (->string x))))))
+ (list (conc (unparse-token type)
+ (unparse-params (get-params (car header-contents))
+ `((filename . ,pathname-strip-directory)
+ (size . ,(lambda (x) (cons RAW (number->string x))))
+ (creation-date . ,rfc1123-time->string)
+ (modification-date . ,rfc1123-time->string)
+ (read-date . ,rfc1123-time->string))
+ value-unparser: unparser)))))
+
+(define (etag-unparser header-contents)
+ (list (unparse-etag (get-value (car header-contents)))))
+
+(define (if-match-unparser header-contents)
+ (let loop ((headers (reverse header-contents))
+ (result '()))
+ (cond
+ ((null? headers) (list (string-join result ", ")))
+ ((eq? '* (get-value (car headers))) '("*")) ;; We're done. * means anything
+ (else (loop (cdr headers)
+ (cons (unparse-etag (get-value (car headers))) result))))))
+
+(define (host/port-unparser header-contents)
+ (let ((contents (get-value (car header-contents))))
+ ;; XXX: urlencode?
+ (if (cdr contents)
+ (list (conc (car contents) ":" (cdr contents)))
+ (list (car contents)))))
+
+;; Handled specially because cookie value is not an alist but a cons of strings
+(define (set-cookie-unparser header-contents)
+ (map (lambda (header)
+ (let* ((unparsed-params
+ (map (lambda (p)
+ (if (eq? (cdr p) #t)
+ (unparse-token (symbol->http-name (car p)))
+ (string-append
+ (unparse-token (symbol->http-name (car p)))
+ "="
+ (cond
+ ((and (eq? (car p) 'port) (pair? (cdr p)))
+ (string-join
+ (map number->string (cdr p)) ","))
+ ((and (eq? (car p) 'path) (cdr p))
+ (uri->string (cdr p)))
+ ((eq? (car p) 'expires)
+ (let-locale ((LC_TIME "POSIX"))
+ (time->string (cdr p) "%A, %d-%b-%y %X GMT")))
+ (else (unparse-token (cdr p)))))))
+ ;; Remove #f values
+ (filter (lambda (p) (cdr p)) (get-params header))))
+ (cookie (get-value header))
+ (unparsed-cookie (string-append (car cookie) "="
+ (unparse-token (cdr cookie)))))
+ (string-join (cons unparsed-cookie unparsed-params) "; ")))
+ header-contents))
+
+(define (cookie-unparser header-contents)
+ (let loop ((prefix "")
+ (headers (reverse header-contents))
+ (result '()))
+ (if (null? headers)
+ (list (conc prefix (string-join result "; ")))
+ (let* ((version (get-param 'version (car headers) #f))
+ (params (alist-delete 'version (get-params (car headers))))
+ (unparsed-params
+ (map (lambda (p)
+ (if (eq? (cdr p) #t)
+ (unparse-token (conc "$" (symbol->http-name (car p))))
+ (string-append
+ (unparse-token
+ (conc "$" (symbol->http-name (car p))))
+ "="
+ (cond
+ ((and (eq? (car p) 'port) (pair? (cdr p)))
+ (string-join
+ (map number->string (cdr p)) ","))
+ ((and (eq? (car p) 'path) (cdr p))
+ (uri->string (cdr p)))
+ (else (unparse-token (cdr p)))))))
+ ;; Remove #f values
+ (filter (lambda (p) (cdr p)) params)))
+ (cookie (get-value (car headers)))
+ (unparsed-cookie (string-append (car cookie) "="
+ (unparse-token (cdr cookie)))))
+ ;; Doing it like this means we can't unparse cookies of
+ ;; mixed versions...
+ (loop (if version (sprintf "$Version: ~A; " version) prefix)
+ (cdr headers)
+ (cons (string-join (cons unparsed-cookie unparsed-params) "; ")
+ result))))))
+
+(define (software-unparser header-contents)
+ (list
+ (string-join
+ (map (lambda (content)
+ (conc (unparse-token (first content))
+ (if (second content)
+ (conc "/" (unparse-token (second content)))
+ "")
+ (if (third content)
+ (conc " (" (third content) ")")
+ "")))
+ (get-value (car header-contents))))))
+
+(define (product-unparser header-contents)
+ (list
+ (string-join
+ (map (lambda (header)
+ (let* ((default-unparser ; Not great, but better than nothing
+ (lambda (params) (unparse-params params '())))
+ (product+version (get-value header)))
+ (conc (unparse-token (car product+version))
+ (if (cdr product+version)
+ (conc "/" (unparse-token (cdr product+version)))
+ ""))))
+ header-contents)
+ ", ")))
+
+(define (via-unparser header-contents)
+ (list (string-join (map get-value header-contents) ", ")))
+
+(define (rfc1123-unparser header-contents)
+ (list (rfc1123-time->string (get-value (car header-contents)))))
+
+(define-constant short-weekdays `#("Sun" "Mon" "Tue" "Wed" "Thu" "Fri" "Sat"))
+(define-constant short-months `#("Jan" "Feb" "Mar" "Apr" "May" "Jun" "Jul" "Aug" "Sep" "Oct" "Nov" "Dec"))
+
+(define (rfc1123-time->string time)
+ (let ((padded (lambda (n)
+ (if (fx< n 10)
+ (string-append "0" (number->string n))
+ (number->string n))))
+ (secs (vector-ref time 0))
+ (mins (vector-ref time 1))
+ (hours (vector-ref time 2))
+ (mday (vector-ref time 3))
+ (month (vector-ref time 4))
+ (year (vector-ref time 5))
+ (wday (vector-ref time 6)))
+ (string-append (vector-ref short-weekdays wday) ", "
+ (padded mday) " " (vector-ref short-months month) " "
+ (number->string (+ 1900 year)) " " (padded hours) ":"
+ (padded mins) ":" (padded secs) " GMT")))
+
+(define (basic-auth-param-subunparser params)
+ (let ((user (alist-ref 'username params))
+ (pass (alist-ref 'password params)))
+ (if (string-index user #\:)
+ (signal-http-condition
+ 'basic-auth-param-subunparser
+ "Colon detected in username. This is not supported by basic auth!"
+ (list user) 'username-with-colon 'value user)
+ (base64-encode (string-append user ":" pass)))))
+
+(define (digest-auth-param-subunparser params)
+ (unparse-params params
+ `((nc . ,identity) ;; see below
+ (uri . ,uri->string)
+ (qop . ,symbol->string)
+ (algorithm . ,symbol->string))
+ separator: ", "
+ grammar: 'infix
+ keyword-unparser: symbol->string
+ value-unparser:
+ ;; Nasty exception for "nc", which is an unquoted
+ ;; padded integer...
+ (lambda (x)
+ (if (number? x)
+ (string-pad (number->string x 16) 8 #\0)
+ (quote-string (->string x))))))
+
+(define authorization-param-subunparsers
+ (make-parameter `((basic . ,basic-auth-param-subunparser)
+ (digest . ,digest-auth-param-subunparser))))
+
+(define (authorization-unparser header-contents)
+ (map (lambda (header)
+ (let* ((default-unparser ; Not great, but better than nothing
+ (lambda (params) (unparse-params params '())))
+ (auth-scheme (get-value header))
+ (unparser (alist-ref auth-scheme
+ (authorization-param-subunparsers)
+ eq? default-unparser)))
+ (string-append
+ (symbol->http-name auth-scheme) " "
+ (->string (unparser (get-params header))))))
+ header-contents))
+
+(define (authenticate-unparser header-contents)
+ (map (lambda (header)
+ (string-append
+ (symbol->http-name (get-value header))
+ " "
+ (let* ((old (get-params header))
+ ;; A quick hack to get presence of "stale"
+ ;; coded as TRUE instead of value-less param
+ ;; false value is coded by its absense
+ (params (if (alist-ref 'stale old)
+ (alist-update! 'stale 'TRUE old)
+ (alist-delete 'stale old))))
+ (unparse-params params
+ `((domain . ,(lambda (u)
+ (string-join
+ (map uri->string u))))
+ (qop . ,(lambda (q)
+ (string-join
+ (map symbol->string q)
+ ",")))
+ (algorithm . ,symbol->string))
+ separator: ", "
+ grammar: 'infix
+ keyword-unparser: symbol->string
+ value-unparser:
+ (lambda (x)
+ (if (eq? x 'TRUE)
+ "TRUE"
+ (quote-string (->string x))))))))
+ header-contents))
+
+(define (strict-transport-security-unparser header-contents)
+ (map (lambda (header) ; Should only be one header; the rest must be ignored!
+ (unparse-params (get-value header) '()
+ grammar: 'infix
+ keyword-unparser: ; Ugly but neccessary
+ (lambda (k)
+ (if (eq? k 'includesubdomains)
+ "includeSubDomains"
+ (->string k)))))
+ header-contents))