diff options
author | Peter Bex <peter@more-magic.net> | 2018-06-22 22:22:24 +0200 |
---|---|---|
committer | Peter Bex <peter@more-magic.net> | 2018-06-22 22:22:24 +0200 |
commit | 90a1f7d47525cfffe928e9a89becf622bd85a8a1 (patch) | |
tree | ef5043b60d49425f702fd154ee3ba1088a68677c /header-parsers.scm | |
download | intarweb-90a1f7d47525cfffe928e9a89becf622bd85a8a1.tar.gz |
Initial CHICKEN 5 port of intarweb 1.72.0
Diffstat (limited to 'header-parsers.scm')
-rw-r--r-- | header-parsers.scm | 994 |
1 files changed, 994 insertions, 0 deletions
diff --git a/header-parsers.scm b/header-parsers.scm new file mode 100644 index 0000000..89edeae --- /dev/null +++ b/header-parsers.scm @@ -0,0 +1,994 @@ +;;;; Header value accessor procedures + +;; Get the raw contents of a header +(define (header-contents name headers #!optional default) + (cond ((assq name (headers-v headers)) => cdr) + (else default))) + +;; Get all values of a header +(define (header-values header-name headers) + (map (cut vector-ref <> 0) (header-contents header-name headers '()))) + +;; Get the value of a header, assuming it has only one value +(define (header-value header-name headers #!optional default) + (let ((contents (header-contents header-name headers '()))) + (if (null? contents) + default + (get-value (car contents))))) + +;; Get the parameters of a header, assuming it has only one value +(define (header-params header-name headers) + (let ((contents (header-contents header-name headers '()))) + (if (null? contents) + '() + (get-params (car contents))))) + +;; Get a specific parameter of a header, assuming it has only one value +(define (header-param param-name header-name headers #!optional default) + (let ((params (header-params header-name headers))) + (cond ((not (pair? params)) default) + ((assq param-name params) => cdr) + (else default)))) + +;; Get the value from one header entry +(define get-value (cut vector-ref <> 0)) +;; Fast local version +(define-compiler-syntax get-value + (syntax-rules () + ((_ header-entry) + (vector-ref header-entry 0)))) + +;; Get all params from one header entry +(define get-params (cut vector-ref <> 1)) +;; Fast local version +(define-compiler-syntax get-params + (syntax-rules () + ((_ header-entry) + (vector-ref header-entry 1)))) + +;; Get one specific parameter from one header entry +(define (get-param param contents #!optional (default #f)) + (let ((params (vector-ref contents 1))) + (cond ((not (pair? params)) default) + ((assq param params) => cdr) + (else default)))) + +(define (get-no-newline-value header) + (let ((v (get-value header))) + (when (string-index v #\newline) + ;; There's duplication with quote-string error here... + (signal-http-condition + 'raw-value + (conc "Unencoded newline in header contents! " + "Please encode the newline in a way " + "appropriate for this header") + (list string) 'unencoded-header 'value string)) + v)) + +;;;; Header parsers + +(define (single subparser #!optional (parameter-subparsers '())) + (lambda (contents) + (list ((with-params subparser parameter-subparsers) contents)))) + +(define (multiple subparser #!optional (parameter-subparsers '())) + (lambda (contents) + (map (with-params subparser parameter-subparsers) + (split-multi-header contents)))) + +;; RFC 2616 4.2 says multi-headers are a comma-separated list of tokens +(define (split-multi-header value) + (let ((len (string-length value))) + (let loop ((result '()) + (start-pos 0) ; Where the current header value starts + (search-pos 0)) ; Where the searching starts + (or (and-let* (((< search-pos len)) + (pos (string-index value (char-set #\, #\") search-pos))) + (if (char=? #\, (string-ref value pos)) + (loop (cons (string-copy value start-pos pos) result) (add1 pos) (add1 pos)) + (let ((end-pos (escaped-string-end value (add1 pos) (char-set #\")))) + (loop result start-pos (add1 end-pos))))) + (reverse (cons (string-drop value start-pos) result)))))) + +;; Remove all escape characters from the input, recognising "escaped escapes" +(define (unescape str) + (let ((last-char (sub1 (string-length str)))) + (let loop ((result "") + (start-pos 0)) + (or (and-let* ((pos (string-index str #\\ start-pos))) + (if (= pos last-char) + (string-append result (string-copy str start-pos)) + (loop (string-append result (string-copy str start-pos pos) + (string-copy str (add1 pos) (+ pos 2))) + (+ pos 2)))) + (string-append result (string-copy str start-pos)))))) + +;; Find a matching endpoint for a token, ignoring escaped copies of the token +(define (escaped-string-end str start stop-char-set) + (let ((len (string-length str))) + (let loop ((start start)) + (let ((pos (string-index str (char-set-adjoin stop-char-set #\\) start))) + (if pos + (if (char=? #\\ (string-ref str pos)) + ;; Escaped matching closing symbol + (if (= len (add1 pos)) + pos + (loop (+ pos 2))) + ;; Reached the matching closing symbol + pos) + len))))) ; No matching closing symbol? "Insert" it at the end + +;; Try to parse a token, starting at the provided offset, up until the +;; char-set where we should stop. Returns two values: the token or #f if +;; there is no token left, and the position on which the token ends. +(define (parse-token value start-pos + #!optional + (stop-char-set (char-set-adjoin char-set:whitespace #\,))) + (if (>= start-pos (string-length value)) + (values #f start-pos) + (let ((stop (char-set-adjoin stop-char-set #\"))) + (let ((pos (string-index value stop start-pos))) + (if pos + (if (not (char=? #\" (string-ref value pos))) + (values (string-trim-both + value char-set:whitespace start-pos pos) + pos) ; Stop-char found, but no quoting + (let ((end-pos (escaped-string-end value (add1 pos) + (char-set #\")))) + ;; Found the double quote? Recurse on the remainder + (receive (rest final-pos) + (parse-token value (add1 end-pos) stop-char-set) + (values (string-append + (string-trim-both + value char-set:whitespace start-pos pos) + (if (= pos end-pos) + (unescape (string-copy value (add1 pos))) + (unescape (string-copy value (add1 pos) end-pos))) + (or rest "")) + final-pos)))) + ;; Nothing found? Then the remainder of the string is the token + (values (string-trim-both + value char-set:whitespace start-pos) + (string-length value))))))) + +;; Comments are a bit like tokens, except they can be nested +(define (parse-comment value start-pos) + (let* ((len (string-length value)) + (nospace-pos (and (< start-pos len) + (string-skip value char-set:whitespace start-pos)))) + (if (and nospace-pos (char=? (string-ref value nospace-pos) #\()) + (let loop ((result "") + (start-pos (add1 nospace-pos))) + (if (>= start-pos len) + (values result len) + (let ((pos (string-index value (char-set #\" #\( #\)) start-pos))) + (if pos + (cond ((char=? #\( (string-ref value pos)) ; Nested comment + (receive (nested end-pos) + (parse-comment value pos) + (loop (sprintf "~A~A(~A)" + result + (string-copy value start-pos pos) + nested) + (add1 end-pos)))) + ((char=? #\" (string-ref value pos)) + (let lp ((end (add1 pos)) + (c (string-ref value (add1 pos))) + (res '())) + (cond ((char=? #\" c) + (loop (string-append + result + (reverse-list->string res)) + (add1 end))) + ((char=? #\\ c) + (lp (+ end 2) + (string-ref value (+ end 2)) + (cons (string-ref value (add1 end)) + res))) + (else + (lp (add1 end) + (string-ref value (add1 end)) + (cons c res)))))) + ;; Else it's a ) + (else (values (conc result (string-copy value start-pos pos)) (add1 pos)))) + ;; Nothing found? Then the remainder of the string is the token + (values (conc result (string-copy value start-pos)) + (string-length value)))))) + ;; No (? Then fail to match the 'comment' + (values #f start-pos)))) + +(define (parse-params string start-pos param-subparsers #!optional (stop-set (char-set #\;)) (separator-or-stop-set (char-set #\; #\=))) + (let loop ((start-pos start-pos) + (params '())) + (unless separator-or-stop-set + (error "The parse-params API has changed a bit for performance reasons: if you pass the optional stop char set, you need to pass one more argument which is the same char-set extended with an equals-sign")) + (receive (attribute-name pos) + (parse-token string start-pos separator-or-stop-set) + (if attribute-name + (let ((attribute (http-name->symbol attribute-name))) + (if (and (< pos (string-length string)) + (char=? (string-ref string pos) #\=)) + (receive (value pos) + (parse-token string (add1 pos) stop-set) + ;; In case of no value ("foo="), use the empty string as value + (let ((value ((alist-ref attribute param-subparsers + eq? identity) + (or value "")))) + (loop (add1 pos) (cons (cons attribute value) params)))) + ;; Missing value is interpreted as "present", + ;; so #t. If not present, it's #f when looking it up + (loop (add1 pos) (cons (cons attribute #t) params)))) + (values (reverse params) pos))))) + +(define (parse-value+params string start-pos value-subparser param-subparsers) + (receive (value pos) + (parse-token string start-pos (char-set #\;)) + (if (not value) + (values #f pos) ;; XXX this is wrong and not expected by the caller! + (receive (params pos) + (parse-params string (add1 pos) param-subparsers) + (values (vector (value-subparser value) params) pos))))) + +(define (with-params value-subparser parameter-subparsers) + (lambda (entry) + (receive (type+params pos) + (parse-value+params entry 0 value-subparser parameter-subparsers) + type+params))) + +(define (make-key/value-subparser key/value-subparsers) + (lambda (k/v) + ;; We're abusing parse-params here to read value + ;; instead of params. This is weird, but it works :) + (receive (key+value pos) + (parse-params k/v 0 key/value-subparsers) + (vector (car key+value) '())))) ;; There's only one key/value pair + +(foreign-declare "#include <locale.h>") + +(define-foreign-variable LC_TIME int) + +(define setlocale (foreign-lambda c-string setlocale int c-string)) + +(define-syntax let-locale + (syntax-rules () + ((let-locale ((cat val) ...) body ...) + (let ((backup '())) + (dynamic-wind + (lambda () (set! backup `((cat . ,(setlocale cat val)) ...))) + (lambda () body ...) + (lambda () (setlocale cat (alist-ref 'cat backup)) ...)))))) + +(define (make-date->string-parser spec) + (let ((date-regex + (irregex + (string-translate* + spec + '((" " . " +") ; Any number of spaces is very permissive + ("%a" . "(Sun|Mon|Tue|Wed|Thu|Fri|Sat)") + ("%A" . "(Sunday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday)") + ("%d" . "[0-9]{1,2}") + ("%b" . "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)") + ("%y" . "[0-9]{1,2}") + ("%Y" . "[0-9]{4}") + ("%X" . "[0-9]{2}:[0-9]{2}:[0-9]{2}")))))) + (lambda (str) + (and (irregex-match date-regex str) ; Or irregex-search? + (let-locale ((LC_TIME "POSIX")) + (string->time str spec)))))) + +(define rfc1123-string->time (make-date->string-parser "%a, %d %b %Y %X GMT")) + +;; This is a little more relaxed than strict rfc850 (it allows abbreviated +;; weekdays) - for example Google Web Server outputs cookies in this format. +(define rfc850-string->time + (disjoin (make-date->string-parser "%A, %d-%b-%y %X GMT") + (make-date->string-parser "%a, %d-%b-%Y %X GMT"))) + +(define asctime-string->time (make-date->string-parser "%a %b %d %X %Y")) + +(define http-date-string->time + (disjoin rfc1123-string->time rfc850-string->time asctime-string->time)) + +;; RFC 1123 updates RFC 822's datetime spec +(define (rfc1123-subparser str) + (or (rfc1123-string->time str) + (signal-http-condition + 'rfc1123-subparser + "Error parsing RFC 1123 date/time" (list str) + 'rfc1123-subparser 'value str))) + +(define (rfc850-subparser str) + (or (rfc850-string->time str) + (signal-http-condition + 'rfc850-subparser + "Error parsing RFC850 date/time" (list str) + 'rfc850-subparser 'value str))) + +(define (asctime-subparser str) + (or (asctime-string->time str) + (signal-http-condition + 'asctime-subparser + "Error parsing asctime() date/time" (list str) + 'asctime-subparser 'value str))) + +;; rfc1123-date | rfc850-date | asctime-date +(define (http-date-subparser str) + (or (http-date-string->time str) + (signal-http-condition + 'http-date-subparser + "Error parsing date/time" (list str) + 'http-date-subparser 'value str))) + +;; Change the accuracy of a number to 'digits' number of digits to the +;; right of the decimal point. +(define (chop-number num digits) + (let ((factor (expt 10 digits))) + (/ (round (* num factor)) factor))) + +(define (quality-subparser str) + ;; Anything that's not a number is seen as if the value is missing, hence 1.0 + (let* ((num (or (string->number str) 1.0)) + (imprecise (chop-number num 3))) + (max 0.0 (min 1.0 imprecise)))) + +;; Just put all header strings in a list, so we can pass it on. +;; Make no assumptions about the contents (only value, don't try to parse params) +;; This is different from (multiple (without-params generic-header-parser)) +;; because this does not assume it can split up comma-separated values. +;; It also will ensure that the value is raw, +(define (unknown-header-parser contents) + (list (vector contents 'raw))) + +(define symbol-subparser + (compose string->symbol string-trim-both)) + +(define symbol-subparser-ci + (compose string->symbol string-trim-both string-downcase)) + +(define (natnum-subparser contents) + (let ((num (string->number contents))) + (if num (inexact->exact (max 0 (round num))) 0))) + +(define (host/port-subparser contents) + (let* ((idx (string-index-right contents #\:)) + (portnum (and idx (string->number + (substring/shared contents (add1 idx)))))) + (if (and idx portnum) + (cons (substring/shared contents 0 idx) + (inexact->exact (round portnum))) + (cons contents #f)))) + +; base64 of 128 bit hex digest as per RFC1864 (eg, Content-md5) +(define base64-subparser base64-decode) + +;; This is retarded. The websocket spec (RFC6455) explicitly says the +;; product token must be compared case-insensitively. RFC2817, +;; "upgrading to TLS within HTTP/1.1" doesn't mention anything about +;; case sensitivity, but defines an all-uppercase token. This means +;; that there're going to be servers which accept only "TLS/1.0" +;; as-is. This all just means we can't add the convenience of +;; downcasing and switching to a symbol, pushing the case-(in)sensitive +;; comparison down the the user level, causing more bugs :( +(define (product-subparser contents) + (let* ((idx (string-index contents #\/)) + (version (and idx (substring/shared contents (add1 idx))))) + (if (and idx version) + (cons (substring/shared contents 0 idx) version) + (cons contents #f)))) + +;; bytes <start>-<end>/<total> +(define range-subparser + (let ((range-regex + (irregex '(seq "bytes" (+ space) + (=> start (+ digit)) "-" (=> end (+ digit)) + "/" (=> total (+ digit)))))) + (lambda (s) + (and-let* ((m (irregex-match range-regex s)) + (start (string->number (irregex-match-substring m 'start))) + (end (string->number (irregex-match-substring m 'end))) + (total (string->number (irregex-match-substring m 'total)))) + (list start end total))))) + +;; Accept *just* a filename, not a full path (simply strips directories) +;; This matches the content-disposition recommendation in RFC2616, 19.5.1: +;; "The receiving user agent SHOULD NOT respect any directory path +;; information present in the filename-parm parameter, which is the only +;; parameter believed to apply to HTTP implementations at this time. The +;; filename SHOULD be treated as a terminal component only." +;; This echoes RFC2183 (and RFC1806 which it supersedes), section 2.3: +;; "The receiving MUA SHOULD NOT respect any directory path information +;; that may seem to be present in the filename parameter. The filename +;; should be treated as a terminal component only." +(define (filename-subparser fn) + (let ((base-fn (pathname-strip-directory (string-trim-both fn)))) + (and (not (member base-fn '("" "." ".."))) + (not (string-index base-fn (char-set #\/ #\nul))) + base-fn))) + +;; [W/]<string> +;; This is a full parser, because it needs to be able to distinguish +;; between W/"foo" and "W/foo". If it's preprocessed by the tokenizer +;; both get "normalised" to the same thing: W/foo +;; +;; XXX It could be a good idea if the single/multiple token parsers +;; did not do anything to their contents. If the consuming parsers +;; want tokens, they know how to have it. OTOH, it would mean much +;; more code for all the parsers as they need to tokenize more... +(define (etag-parser contents) + (let ((contents (string-trim-both contents))) + (list (vector + (if (string-prefix? "W/" contents) + `(weak . ,(parse-token contents 2 char-set:whitespace)) + `(strong . ,(parse-token contents 0 char-set:whitespace))) + '())))) + +;; Used for both if-match and if-none-match +;; This is either a wilcard ('*') or several entities +(define (if-match-parser contents) + (let ((contents (string-trim-both contents))) + (if (string=? "*" contents) + (list (vector '* '())) + (let loop ((pos 0) + (etags '())) + (let ((weak (string-prefix? "W/" contents 0 2 pos))) + (receive (etag newpos) + (parse-token contents (+ pos (if weak 2 0)) char-set:whitespace) + (let ((newpos (string-skip contents char-set:whitespace newpos)) + (value (and etag (vector (cons (if weak 'weak 'strong) + etag) '())))) + (if value + (if newpos + (loop newpos (cons value etags)) + (reverse! (cons value etags))) + (reverse! etags))))))))) + +;; ( <product>[/<version>] [<comment>] )+ +;; This parser is a full parser because parse-token cannot handle +;; comments yet... (if a ; is in a comment, it breaks down) +(define software-parser + (let ((char-set:space-or-paren (char-set-union (char-set #\() + char-set:whitespace)) + (char-set:slash-or-paren (char-set #\/ #\())) + (lambda (contents) + (let loop ((start-pos 0) + (products '())) + (let*-values (((product pos) + (parse-token contents start-pos + char-set:slash-or-paren)) + ((version pos2) + (parse-token contents pos ; (add1 pos) + char-set:space-or-paren)) + ((comment pos3) (parse-comment contents pos2)) + ;; Ugh + ((real-version) (and version (not (string-null? version)) (string-trim version #\/)))) + (if product + (loop pos3 (cons (list product real-version comment) products)) + (list (vector (reverse products) '())))))))) + +;;;; MAJOR TODOs +;; RFC1123 mailbox parser - just strings for now +(define mailbox-subparser identity) + +;; Either an entity-tag or a http-date +(define (if-range-parser contents) + (let ((http-date ((with-params http-date-string->time '()) contents))) + (if (get-value http-date) + (list http-date) + (etag-parser contents)))) + +;; Either delta-seconds or http-date +(define retry-after-subparser (disjoin http-date-subparser natnum-subparser)) + +;; Tricky - see 2616 14.45 +;; We probably shouldn't try to do too much parsing here +(define via-parser (multiple identity)) + +;; Tricky - see 2616 14.46 +(define warning-parser split-multi-header) +;;;; END MAJOR TODOs + +(define (key/value-subparser str) + (let ((idx (string-index str #\=))) + (cons (string->symbol (string-take str idx)) (string-drop str (add1 idx))))) + +;; The 'expires' header defined by the Netscape cookie spec contains +;; an embedded comma. RFC 2109 cookies use Max-Age instead. +(define old-style-cookie? + (let ((old-cookie-regex + (irregex '(seq bos (+ (~ #\= #\")) "=" (* (~ #\;)) ";" ; + (* any) (w/nocase "expires") (* space) "=")))) + (lambda (cookie) + (irregex-search old-cookie-regex cookie)))) + +(define (string->number-list str) + (map string->number (string-split str ","))) + +(define (relative-ref/path-only s) + (and-let* ((ref (uri-reference s)) + ((not (uri-host ref))) + ((null? (uri-query ref))) + ((not (uri-fragment ref)))) + ref)) + +;; We're using http-date-subparser for 'expires' instead of rfc1123-subparser +;; (which would be the correct thing to do) because several well-known web +;; server software packages (tested: PHP and Rails) get it wrong. So we +;; will go by the robustness principle and allow any kind of HTTP date. +(define set-cookie-parser + (let ((param-subparsers `((expires . ,http-date-subparser) + (max-age . ,string->number) + (version . ,string->number) + (port . ,string->number-list) + (path . ,relative-ref/path-only))) + (name/value-parser (lambda (str) + (let ((idx (string-index str #\=))) + (cons (string-take str idx) + (string-drop str (add1 idx))))))) + (lambda (contents) + (if (old-style-cookie? contents) + (list ((with-params name/value-parser param-subparsers) contents)) + (map (with-params name/value-parser param-subparsers) + (split-multi-header contents)))))) + +(define cache-control-parser + (let ((splitter (lambda (str) ;; Is this correct? + (map (compose string->symbol string-trim-both) + (string-split str ","))))) + (lambda (contents) + (map + (make-key/value-subparser `((max-age . ,natnum-subparser) + (s-maxage . ,natnum-subparser) + (max-stale . ,natnum-subparser) + (min-fresh . ,natnum-subparser) + (private . ,splitter) + (no-cache . ,splitter))) + (split-multi-header contents))))) + +(define (strict-transport-security-parser contents) + ;; This is ridiculous; there are no parameters because everything + ;; is a parameter (or everything is a value, depending on your point + ;; of view). The header has no main value. For convenience and sanity + ;; we just return an alist as a single value. + (list (vector (parse-params contents 0 `((max-age . ,natnum-subparser))) '()))) + +(define (basic-auth-param-subparser contents pos) + (receive (secret pos) + (parse-token contents pos (char-set #\,)) + (let* ((decoded (base64-decode secret)) + (colon-idx (string-index decoded #\:)) + (user (string-take decoded colon-idx)) + (pass (string-drop decoded (add1 colon-idx)))) + (values `((username . ,user) (password . ,pass)) pos)))) + +(define (digest-auth-param-subparser contents pos) + (parse-params contents pos + `((nc . ,(lambda (n) (string->number n 16))) + (uri . ,uri-reference) + (qop . ,symbol-subparser) + (algorithm . ,symbol-subparser-ci)) + (char-set #\,) (char-set #\, #\=))) + +(define authorization-param-subparsers + (make-parameter `((basic . ,basic-auth-param-subparser) + (digest . ,digest-auth-param-subparser)))) + +(define (authorization-parser contents) + (let loop ((pos 0) + (result '())) + (receive (authtype pos) + (parse-token contents pos char-set:whitespace) + (if (not authtype) + (reverse result) + (let ((authtype (http-name->symbol authtype)) + (default-subparser (lambda (contents pos) + (parse-params contents pos '())))) + (receive (params pos) + ((alist-ref authtype (authorization-param-subparsers) + eq? default-subparser) contents (add1 pos)) + (loop (add1 pos) + (cons (vector authtype params) result)))))))) + +(define (authenticate-parser contents) + (let loop ((pos 0) + (result '())) + (receive (authtype pos) + (parse-token contents pos char-set:whitespace) + (if (not authtype) + (reverse result) + (let ((authtype (http-name->symbol authtype))) + (receive (params pos) + (parse-params contents pos + `((domain . ,(lambda (s) + (map uri-reference + (string-split s)))) + (qop . ,(lambda (s) + (map (compose symbol-subparser + string-trim) + (string-split s ",")))) + (algorithm . ,symbol-subparser-ci) + (stale . ,(lambda (s) + (string-ci=? (string-trim s) + "TRUE")))) + (char-set #\,) (char-set #\, #\=)) + (loop (add1 pos) (cons (vector authtype params) result)))))))) + +(define (pragma-parser contents) + (map (make-key/value-subparser `()) (split-multi-header contents))) + +(define (te-parser contents) + (map (make-key/value-subparser `((q . ,quality-subparser))) + (split-multi-header contents))) + +;; Cookie headers are also braindead: there can be several cookies in one header, +;; separated by either commas or semicolons. The only way to distinguish a +;; new cookie from a parameter of the current cookie is the dollar in front +;; of all parameter names. +;; Also, there's a $Version attribute that prepends all cookies, which is +;; considered to apply to all cookies that follow. +(define (cookie-parser contents) + ;; Local version of parse-params that stops when param without $ is seen + (define (read-params start-pos) + (let next-param ((start-pos start-pos) + (params '())) + (receive (attribute-name pos) + (parse-token contents start-pos (char-set #\; #\=)) + (if (or (not attribute-name) ;; Still reading params? + (not (char=? (string-ref attribute-name 0) #\$))) + (values (reverse! params) start-pos) + (let ((attrib (http-name->symbol (string-drop attribute-name 1)))) + (if (and (< pos (string-length contents)) + (char=? (string-ref contents pos) #\=)) + (receive (value pos) + (parse-token contents (add1 pos) (char-set #\;)) + (let ((value (case attrib + ((version port) (string->number (or value ""))) + ((path) (relative-ref/path-only (or value ""))) + (else value)))) + (next-param (add1 pos) (cons (cons attrib value) params)))) + ;; Missing value is interpreted as "present", so #t + (next-param (add1 pos) (cons (cons attrib #t) params)))))))) + (receive (global-params pos) + (read-params 0) + (let loop ((cookies '()) + (pos pos)) + (let*-values (((name pos) (parse-token contents pos (char-set #\= #\;))) + ((val pos) (parse-token contents (add1 pos) (char-set #\;)))) + (if (or (not name) (not val)) + (reverse! cookies) + (receive (local-params pos) + (read-params (add1 pos)) + (loop (cons (vector (cons name val) + (append! local-params global-params)) + cookies) (add1 pos)))))))) + +;;; Unparsers ;;; +(define (unparse-params params unparsers #!key + (separator "; ") (grammar 'prefix) + (keyword-unparser ->string) + (value-unparser unparse-token)) + (let loop ((params params) + (results '())) + (if (null? params) + (string-join (reverse results) separator grammar) + (let* ((name (caar params)) + (val (cdar params)) + (str (case val + ;; #t means param is present (no value) + ((#t) (keyword-unparser name)) + ;; #f means param is missing + ((#f) #f) + (else (let* ((unparser (assq name unparsers)) + (unparsed-val (if unparser + ((cdr unparser) val) + val))) + (string-append (keyword-unparser name) "=" + (value-unparser unparsed-val))))))) + (loop (cdr params) (if str (cons str results) results)))))) + +(define must-be-quoted-chars (char-set-adjoin char-set:iso-control #\")) + +(define (quote-string string) + (reverse-list->string + (cons #\" + (string-fold (lambda (c result) + (cond ((char=? c #\newline) + (signal-http-condition + 'quote-string + (conc "Unencoded newline in header contents! " + "Please encode the newline in a way " + "appropriate for this header") + (list string) 'unencoded-header 'value string)) + ((char-set-contains? must-be-quoted-chars c) + (cons c (cons #\\ result))) + (else (cons c result)))) + '(#\") + string)))) + +;; Unparse a value as token, converting it to a string first +(define unparse-token + (let ((default-trigger-chars (char-set-union must-be-quoted-chars + (char-set #\= #\; #\,) + char-set:blank))) + (lambda (token #!optional separator-chars) + (let ((trigger-quoting-chars + (if separator-chars + (char-set-union must-be-quoted-chars separator-chars char-set:blank) + default-trigger-chars)) + (token-string (->string token))) + (if (string-any trigger-quoting-chars token-string) + (quote-string token-string) + token-string))))) + +(define (unparse-etag etag) + (string-append + (if (eq? 'weak (car etag)) "W/" "") + (quote-string (cdr etag)))) ;; Etags are _always_ quoted + +;; There's no need to make a specific header unparser for every header type. +;; Usually, the Scheme value representing a header can unambiguously be +;; unparsed into a header just by checking its type. +(define (default-header-unparser header-contents) + (let loop ((headers (reverse header-contents)) + (result '())) + (if (null? headers) + (list (string-join result ", ")) + (let* ((header (car headers)) + (contents (get-value header)) + (value (cond + ((pair? contents) ; alist? + (let ((attribute (symbol->http-name (car contents)))) + (if (eq? (cdr contents) #t) + (unparse-token attribute) + (string-append attribute "=" + (unparse-token (cdr contents)))))) + ((uri-reference? contents) + (unparse-token (uri->string contents) (char-set))) + (else (unparse-token contents))))) + (loop (cdr headers) + (cons + (string-append + value (unparse-params (get-params header) '())) + result)))))) + +;; RFC2616 19.5.1 says that the "filename" attribute _must_ be quoted. +;; It's a bit annoying that our API currently can't specify for particular +;; attributes that only those must be unparsed specially, so we quote _all_ +;; attributes (which, strictly speaking, is always allowed for tokens) unless +;; otherwise specified by a hack (when the value is prefixed by RAW). +;; This may be dangerous or wrong, if a server doesn't accept quoted "name" +;; attributes, for example. Not too likely since names can contain spaces etc. +(define (content-disposition-unparser header-contents) + (let* ((type (get-value (car header-contents))) + (RAW (list 'raw)) + (unparser (lambda (x) (if (and (pair? x) (eq? RAW (car x))) + (cdr x) + (quote-string (->string x)))))) + (list (conc (unparse-token type) + (unparse-params (get-params (car header-contents)) + `((filename . ,pathname-strip-directory) + (size . ,(lambda (x) (cons RAW (number->string x)))) + (creation-date . ,rfc1123-time->string) + (modification-date . ,rfc1123-time->string) + (read-date . ,rfc1123-time->string)) + value-unparser: unparser))))) + +(define (etag-unparser header-contents) + (list (unparse-etag (get-value (car header-contents))))) + +(define (if-match-unparser header-contents) + (let loop ((headers (reverse header-contents)) + (result '())) + (cond + ((null? headers) (list (string-join result ", "))) + ((eq? '* (get-value (car headers))) '("*")) ;; We're done. * means anything + (else (loop (cdr headers) + (cons (unparse-etag (get-value (car headers))) result)))))) + +(define (host/port-unparser header-contents) + (let ((contents (get-value (car header-contents)))) + ;; XXX: urlencode? + (if (cdr contents) + (list (conc (car contents) ":" (cdr contents))) + (list (car contents))))) + +;; Handled specially because cookie value is not an alist but a cons of strings +(define (set-cookie-unparser header-contents) + (map (lambda (header) + (let* ((unparsed-params + (map (lambda (p) + (if (eq? (cdr p) #t) + (unparse-token (symbol->http-name (car p))) + (string-append + (unparse-token (symbol->http-name (car p))) + "=" + (cond + ((and (eq? (car p) 'port) (pair? (cdr p))) + (string-join + (map number->string (cdr p)) ",")) + ((and (eq? (car p) 'path) (cdr p)) + (uri->string (cdr p))) + ((eq? (car p) 'expires) + (let-locale ((LC_TIME "POSIX")) + (time->string (cdr p) "%A, %d-%b-%y %X GMT"))) + (else (unparse-token (cdr p))))))) + ;; Remove #f values + (filter (lambda (p) (cdr p)) (get-params header)))) + (cookie (get-value header)) + (unparsed-cookie (string-append (car cookie) "=" + (unparse-token (cdr cookie))))) + (string-join (cons unparsed-cookie unparsed-params) "; "))) + header-contents)) + +(define (cookie-unparser header-contents) + (let loop ((prefix "") + (headers (reverse header-contents)) + (result '())) + (if (null? headers) + (list (conc prefix (string-join result "; "))) + (let* ((version (get-param 'version (car headers) #f)) + (params (alist-delete 'version (get-params (car headers)))) + (unparsed-params + (map (lambda (p) + (if (eq? (cdr p) #t) + (unparse-token (conc "$" (symbol->http-name (car p)))) + (string-append + (unparse-token + (conc "$" (symbol->http-name (car p)))) + "=" + (cond + ((and (eq? (car p) 'port) (pair? (cdr p))) + (string-join + (map number->string (cdr p)) ",")) + ((and (eq? (car p) 'path) (cdr p)) + (uri->string (cdr p))) + (else (unparse-token (cdr p))))))) + ;; Remove #f values + (filter (lambda (p) (cdr p)) params))) + (cookie (get-value (car headers))) + (unparsed-cookie (string-append (car cookie) "=" + (unparse-token (cdr cookie))))) + ;; Doing it like this means we can't unparse cookies of + ;; mixed versions... + (loop (if version (sprintf "$Version: ~A; " version) prefix) + (cdr headers) + (cons (string-join (cons unparsed-cookie unparsed-params) "; ") + result)))))) + +(define (software-unparser header-contents) + (list + (string-join + (map (lambda (content) + (conc (unparse-token (first content)) + (if (second content) + (conc "/" (unparse-token (second content))) + "") + (if (third content) + (conc " (" (third content) ")") + ""))) + (get-value (car header-contents)))))) + +(define (product-unparser header-contents) + (list + (string-join + (map (lambda (header) + (let* ((default-unparser ; Not great, but better than nothing + (lambda (params) (unparse-params params '()))) + (product+version (get-value header))) + (conc (unparse-token (car product+version)) + (if (cdr product+version) + (conc "/" (unparse-token (cdr product+version))) + "")))) + header-contents) + ", "))) + +(define (via-unparser header-contents) + (list (string-join (map get-value header-contents) ", "))) + +(define (rfc1123-unparser header-contents) + (list (rfc1123-time->string (get-value (car header-contents))))) + +(define-constant short-weekdays `#("Sun" "Mon" "Tue" "Wed" "Thu" "Fri" "Sat")) +(define-constant short-months `#("Jan" "Feb" "Mar" "Apr" "May" "Jun" "Jul" "Aug" "Sep" "Oct" "Nov" "Dec")) + +(define (rfc1123-time->string time) + (let ((padded (lambda (n) + (if (fx< n 10) + (string-append "0" (number->string n)) + (number->string n)))) + (secs (vector-ref time 0)) + (mins (vector-ref time 1)) + (hours (vector-ref time 2)) + (mday (vector-ref time 3)) + (month (vector-ref time 4)) + (year (vector-ref time 5)) + (wday (vector-ref time 6))) + (string-append (vector-ref short-weekdays wday) ", " + (padded mday) " " (vector-ref short-months month) " " + (number->string (+ 1900 year)) " " (padded hours) ":" + (padded mins) ":" (padded secs) " GMT"))) + +(define (basic-auth-param-subunparser params) + (let ((user (alist-ref 'username params)) + (pass (alist-ref 'password params))) + (if (string-index user #\:) + (signal-http-condition + 'basic-auth-param-subunparser + "Colon detected in username. This is not supported by basic auth!" + (list user) 'username-with-colon 'value user) + (base64-encode (string-append user ":" pass))))) + +(define (digest-auth-param-subunparser params) + (unparse-params params + `((nc . ,identity) ;; see below + (uri . ,uri->string) + (qop . ,symbol->string) + (algorithm . ,symbol->string)) + separator: ", " + grammar: 'infix + keyword-unparser: symbol->string + value-unparser: + ;; Nasty exception for "nc", which is an unquoted + ;; padded integer... + (lambda (x) + (if (number? x) + (string-pad (number->string x 16) 8 #\0) + (quote-string (->string x)))))) + +(define authorization-param-subunparsers + (make-parameter `((basic . ,basic-auth-param-subunparser) + (digest . ,digest-auth-param-subunparser)))) + +(define (authorization-unparser header-contents) + (map (lambda (header) + (let* ((default-unparser ; Not great, but better than nothing + (lambda (params) (unparse-params params '()))) + (auth-scheme (get-value header)) + (unparser (alist-ref auth-scheme + (authorization-param-subunparsers) + eq? default-unparser))) + (string-append + (symbol->http-name auth-scheme) " " + (->string (unparser (get-params header)))))) + header-contents)) + +(define (authenticate-unparser header-contents) + (map (lambda (header) + (string-append + (symbol->http-name (get-value header)) + " " + (let* ((old (get-params header)) + ;; A quick hack to get presence of "stale" + ;; coded as TRUE instead of value-less param + ;; false value is coded by its absense + (params (if (alist-ref 'stale old) + (alist-update! 'stale 'TRUE old) + (alist-delete 'stale old)))) + (unparse-params params + `((domain . ,(lambda (u) + (string-join + (map uri->string u)))) + (qop . ,(lambda (q) + (string-join + (map symbol->string q) + ","))) + (algorithm . ,symbol->string)) + separator: ", " + grammar: 'infix + keyword-unparser: symbol->string + value-unparser: + (lambda (x) + (if (eq? x 'TRUE) + "TRUE" + (quote-string (->string x)))))))) + header-contents)) + +(define (strict-transport-security-unparser header-contents) + (map (lambda (header) ; Should only be one header; the rest must be ignored! + (unparse-params (get-value header) '() + grammar: 'infix + keyword-unparser: ; Ugly but neccessary + (lambda (k) + (if (eq? k 'includesubdomains) + "includeSubDomains" + (->string k))))) + header-contents)) |