diff options
| -rw-r--r-- | benchmarks/parsers.scm | 60 | ||||
| -rw-r--r-- | benchmarks/run.scm | 7 | ||||
| -rw-r--r-- | benchmarks/unparsers.scm | 54 | ||||
| -rw-r--r-- | header-parsers.scm | 994 | ||||
| -rw-r--r-- | intarweb.egg | 11 | ||||
| -rw-r--r-- | intarweb.scm | 1055 | ||||
| -rw-r--r-- | tests/run.scm | 1243 | 
7 files changed, 3424 insertions, 0 deletions
| diff --git a/benchmarks/parsers.scm b/benchmarks/parsers.scm new file mode 100644 index 0000000..e4612e2 --- /dev/null +++ b/benchmarks/parsers.scm @@ -0,0 +1,60 @@ +(import intarweb chicken.string chicken.time chicken.time.posix +        chicken.port srfi-13) + +(define (mk-headers . strs) +  (string-append (string-join strs "\r\n") "\r\n\r\n")) + +(begin (newline) +       (print "---  Request parsing ---") +       (begin (print "Parsing a minimal HTTP/1.0 request many times") +              (let* ((str (mk-headers "GET / HTTP/1.0" +                                      "Host: 127.0.0.1:8080" +                                      "User-Agent: ApacheBench/2.3")) +                     (p (open-input-string str))) +                (time (do ((i 0 (add1 i))) +                          ((= i  100000)) +                        (##sys#setslot p 10 0) ; rewind +                        (read-request p))))) + +       (begin (print "Parsing a realistic HTTP/1.1 request many times") +              (let* ((str (mk-headers "GET /foo HTTP/1.1" +                                      "Host: localhost:8080" +                                      "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:24.0) Gecko/20140722 Firefox/24.0 Iceweasel/24.7.0" +                                      "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" +                                      "Accept-Language: en-US,en;q=0.5" +                                      "Accept-Encoding: gzip, deflate" +                                      "Connection: keep-alive")) +                     (p (open-input-string str))) +                (time (do ((i 0 (add1 i))) +                          ((= i  100000)) +                        (##sys#setslot p 10 0) ; rewind +                        (read-request p)))))) + +(begin (newline) +       (print "---  Response parsing ---") +       (begin (print "Parsing a minimal HTTP/1.0 response many times") +              (let* ((str (mk-headers "HTTP/1.0 200 OK" +                                      "Content-Length: 10")) +                     (p (open-input-string str))) +                (time (do ((i 0 (add1 i))) +                          ((= i  100000)) +                        (##sys#setslot p 10 0) ; rewind +                        (read-response p))))) + +       (begin (newline) +              (print "Parsing a realistic HTTP/1.1 response many times") +              (let* ((str (mk-headers +                           "HTTP/1.1 404 Not Found" +                           "Date: Tue, 19 Aug 2014 19:14:24 GMT" +                           "Server: Apache" +                           "Vary: Accept-Encoding" +                           "Content-Encoding: gzip" +                           "Content-Length: 176" +                           "Keep-Alive: timeout=15, max=100" +                           "Connection: Keep-Alive" +                           "Content-Type: text/html; charset=iso-8859-1")) +                     (p (open-input-string str))) +                (time (do ((i 0 (add1 i))) +                          ((= i  100000)) +                        (##sys#setslot p 10 0) ; rewind +                        (read-response p)))))) diff --git a/benchmarks/run.scm b/benchmarks/run.scm new file mode 100644 index 0000000..c7d45c8 --- /dev/null +++ b/benchmarks/run.scm @@ -0,0 +1,7 @@ +(print "Unparsers:") +(print "==========\n") +(load "unparsers") + +(print "\nParsers:") +(print "==========\n") +(load "parsers") diff --git a/benchmarks/unparsers.scm b/benchmarks/unparsers.scm new file mode 100644 index 0000000..b8db0c8 --- /dev/null +++ b/benchmarks/unparsers.scm @@ -0,0 +1,54 @@ +(import chicken.port chicken.time chicken.time.posix intarweb uri-common) + +(define null-output-port +  (make-output-port void void)) + +(begin (newline) +       (print "---  Response unparsing ---") +       (begin (print "Unparsing a minimal HTTP/1.1 response many times") +              (let ((response (make-response port: null-output-port))) +                (time (do ((i 0 (add1 i))) +                          ((= i 100000)) +                        (write-response response))))) + +       (begin (print "Unparsing a realistic HTTP/1.1 response many times") +              (let ((response +                     (make-response +                      port: null-output-port +                      headers: (headers +                                `((content-type text/css) +                                  (etag (strong . "1234-0123456789")) +                                  (content-length 1234) +                                  (last-modified #(,(seconds->utc-time (current-seconds)) ())) +                                  (date #(,(seconds->utc-time (current-seconds)) ()))))))) +                (time (do ((i 0 (add1 i))) +                          ((= i 100000)) +                        (write-response response)))))) + +(begin (newline) +       (print "---  Request unparsing ---") +       (begin (print "Unparsing a minimal HTTP/1.1 request many times") +              (let ((request +                     (make-request port: null-output-port))) +                (time (do ((i 0 (add1 i))) +                          ((= i 100000)) +                        (write-request request))))) + +       (begin (print "Unparsing a realistic HTTP/1.1 request many times") +              (let ((request +                     (make-request +                      port: null-output-port +                      uri: (uri-reference "http://www.call-cc.org/test.example") +                      headers: (headers +                                `((user-agent (("Mozilla" "5.0" +                                                "X11; U; NetBSD amd64; en-US; rv:1.9.0.3") +                                               ("Gecko" "2008110501" #f) +                                               ("Minefield" "3.0.3" #f))) +                                  (host ("example.com" . 8080)) +                                  (accept text/html application/xhtml+xml +                                          #(application/xml ((q . 0.9))) #(*/* ((q . 0.8)))) +                                  (accept-language en-US #(en ((q . 0.5)))) +                                  (accept-encoding gzip deflate)))))) +                (time (do ((i 0 (add1 i))) +                          ((= i 100000)) +                        (write-request request)))))) diff --git a/header-parsers.scm b/header-parsers.scm new file mode 100644 index 0000000..89edeae --- /dev/null +++ b/header-parsers.scm @@ -0,0 +1,994 @@ +;;;; Header value accessor procedures + +;; Get the raw contents of a header +(define (header-contents name headers #!optional default) +  (cond ((assq name (headers-v headers)) => cdr) +        (else default))) + +;; Get all values of a header +(define (header-values header-name headers) +  (map (cut vector-ref <> 0) (header-contents header-name headers '()))) + +;; Get the value of a header, assuming it has only one value +(define (header-value header-name headers #!optional default) +  (let ((contents (header-contents header-name headers '()))) +    (if (null? contents) +        default +        (get-value (car contents))))) + +;; Get the parameters of a header, assuming it has only one value +(define (header-params header-name headers) +  (let ((contents (header-contents header-name headers '()))) +    (if (null? contents) +        '() +        (get-params (car contents))))) + +;; Get a specific parameter of a header, assuming it has only one value +(define (header-param param-name header-name headers #!optional default) +  (let ((params (header-params header-name headers))) +    (cond ((not (pair? params)) default) +          ((assq param-name params) => cdr) +          (else default)))) + +;; Get the value from one header entry +(define get-value (cut vector-ref <> 0)) +;; Fast local version +(define-compiler-syntax get-value +  (syntax-rules () +    ((_ header-entry) +     (vector-ref header-entry 0)))) + +;; Get all params from one header entry +(define get-params (cut vector-ref <> 1)) +;; Fast local version +(define-compiler-syntax get-params +  (syntax-rules () +    ((_ header-entry) +     (vector-ref header-entry 1)))) + +;; Get one specific parameter from one header entry +(define (get-param param contents #!optional (default #f)) +  (let ((params (vector-ref contents 1))) +    (cond ((not (pair? params)) default) +          ((assq param params) => cdr) +          (else default)))) + +(define (get-no-newline-value header) +  (let ((v (get-value header))) +    (when (string-index v #\newline) +      ;; There's duplication with quote-string error here... +      (signal-http-condition +       'raw-value +       (conc "Unencoded newline in header contents! " +             "Please encode the newline in a way " +             "appropriate for this header") +       (list string) 'unencoded-header 'value string)) +    v)) + +;;;; Header parsers + +(define (single subparser #!optional (parameter-subparsers '())) +  (lambda (contents) +    (list ((with-params subparser parameter-subparsers) contents)))) + +(define (multiple subparser #!optional (parameter-subparsers '())) +  (lambda (contents) +    (map (with-params subparser parameter-subparsers) +         (split-multi-header contents)))) + +;; RFC 2616 4.2 says multi-headers are a comma-separated list of tokens +(define (split-multi-header value) +  (let ((len (string-length value))) +    (let loop ((result '()) +               (start-pos 0)   ; Where the current header value starts +               (search-pos 0)) ; Where the searching starts +      (or (and-let* (((< search-pos len)) +                     (pos (string-index value (char-set #\, #\") search-pos))) +            (if (char=? #\, (string-ref value pos)) +                (loop (cons (string-copy value start-pos pos) result) (add1 pos) (add1 pos)) +                (let ((end-pos (escaped-string-end value (add1 pos) (char-set #\")))) +                  (loop result start-pos (add1 end-pos))))) +          (reverse (cons (string-drop value start-pos) result)))))) + +;; Remove all escape characters from the input, recognising "escaped escapes" +(define (unescape str) +  (let ((last-char (sub1 (string-length str)))) +    (let loop ((result "") +               (start-pos 0)) +      (or (and-let* ((pos (string-index str #\\ start-pos))) +            (if (= pos last-char) +                (string-append result (string-copy str start-pos)) +                (loop (string-append result (string-copy str start-pos pos) +                                     (string-copy str (add1 pos) (+ pos 2))) +                      (+ pos 2)))) +          (string-append result (string-copy str start-pos)))))) + +;; Find a matching endpoint for a token, ignoring escaped copies of the token +(define (escaped-string-end str start stop-char-set) +  (let ((len (string-length str))) +    (let loop ((start start)) +      (let ((pos (string-index str (char-set-adjoin stop-char-set #\\) start))) +        (if pos +            (if (char=? #\\ (string-ref str pos)) +                ;; Escaped matching closing symbol +                (if (= len (add1 pos)) +                    pos +                    (loop (+ pos 2))) +                ;; Reached the matching closing symbol +                pos) +            len))))) ; No matching closing symbol?  "Insert" it at the end + +;; Try to parse a token, starting at the provided offset, up until the +;; char-set where we should stop.  Returns two values: the token or #f if +;; there is no token left, and the position on which the token ends. +(define (parse-token value start-pos +                     #!optional +                     (stop-char-set (char-set-adjoin char-set:whitespace #\,))) +  (if (>= start-pos (string-length value)) +      (values #f start-pos) +      (let ((stop (char-set-adjoin stop-char-set #\"))) +        (let ((pos (string-index value stop start-pos))) +          (if pos +              (if (not (char=? #\" (string-ref value pos))) +                  (values (string-trim-both +                           value char-set:whitespace start-pos pos) +                          pos) ; Stop-char found, but no quoting +                  (let ((end-pos (escaped-string-end value (add1 pos) +                                                     (char-set #\")))) +                    ;; Found the double quote? Recurse on the remainder +                    (receive (rest final-pos) +                      (parse-token value (add1 end-pos) stop-char-set) +                      (values (string-append +                               (string-trim-both +                                value char-set:whitespace start-pos pos) +                               (if (= pos end-pos) +                                   (unescape (string-copy value (add1 pos))) +                                   (unescape (string-copy value (add1 pos) end-pos))) +                               (or rest "")) +                              final-pos)))) +              ;; Nothing found?  Then the remainder of the string is the token +              (values (string-trim-both +                       value char-set:whitespace start-pos) +                      (string-length value))))))) + +;; Comments are a bit like tokens, except they can be nested +(define (parse-comment value start-pos) +  (let* ((len (string-length value)) +         (nospace-pos (and (< start-pos len) +                           (string-skip value char-set:whitespace start-pos)))) +    (if (and nospace-pos (char=? (string-ref value nospace-pos) #\()) +        (let loop ((result "") +                   (start-pos (add1 nospace-pos))) +          (if (>= start-pos len) +              (values result len) +              (let ((pos (string-index value (char-set #\" #\( #\)) start-pos))) +                (if pos +                    (cond ((char=? #\( (string-ref value pos)) ; Nested comment +                           (receive (nested end-pos) +                               (parse-comment value pos) +                             (loop (sprintf "~A~A(~A)" +                                     result +                                     (string-copy value start-pos pos) +                                     nested) +                                   (add1 end-pos)))) +                          ((char=? #\" (string-ref value pos)) +                           (let lp ((end (add1 pos)) +                                    (c (string-ref value (add1 pos))) +                                    (res '())) +                             (cond ((char=? #\" c) +                                    (loop (string-append +                                           result +                                           (reverse-list->string res)) +                                          (add1 end))) +                                   ((char=? #\\ c) +                                    (lp (+ end 2) +                                        (string-ref value (+ end 2)) +                                        (cons (string-ref value (add1 end)) +                                              res))) +                                   (else +                                    (lp (add1 end) +                                        (string-ref value (add1 end)) +                                        (cons c res)))))) +                          ;; Else it's a ) +                          (else (values (conc result (string-copy value start-pos pos)) (add1 pos)))) +                    ;; Nothing found?  Then the remainder of the string is the token +                    (values (conc result (string-copy value start-pos)) +                            (string-length value)))))) +        ;; No (? Then fail to match the 'comment' +        (values #f start-pos)))) + +(define (parse-params string start-pos param-subparsers #!optional (stop-set (char-set #\;)) (separator-or-stop-set (char-set #\; #\=))) +  (let loop ((start-pos start-pos) +             (params '())) +    (unless separator-or-stop-set +      (error "The parse-params API has changed a bit for performance reasons: if you pass the optional stop char set, you need to pass one more argument which is the same char-set extended with an equals-sign")) +    (receive (attribute-name pos) +        (parse-token string start-pos separator-or-stop-set) +      (if attribute-name +          (let ((attribute (http-name->symbol attribute-name))) +            (if (and (< pos (string-length string)) +                     (char=? (string-ref string pos) #\=)) +                (receive (value pos) +                    (parse-token string (add1 pos) stop-set) +                  ;; In case of no value ("foo="), use the empty string as value +                  (let ((value ((alist-ref attribute param-subparsers +                                           eq? identity) +                                (or value "")))) +                    (loop (add1 pos) (cons (cons attribute value) params)))) +                ;; Missing value is interpreted as "present", +                ;; so #t. If not present, it's #f when looking it up +                (loop (add1 pos) (cons (cons attribute #t) params)))) +          (values (reverse params) pos))))) + +(define (parse-value+params string start-pos value-subparser param-subparsers) +  (receive (value pos) +    (parse-token string start-pos (char-set #\;)) +    (if (not value) +        (values #f pos) ;; XXX this is wrong and not expected by the caller! +        (receive (params pos) +          (parse-params string (add1 pos) param-subparsers) +          (values (vector (value-subparser value) params) pos))))) + +(define (with-params value-subparser parameter-subparsers) +  (lambda (entry) +    (receive (type+params pos) +      (parse-value+params entry 0 value-subparser parameter-subparsers) +      type+params))) + +(define (make-key/value-subparser key/value-subparsers) +  (lambda (k/v) +    ;; We're abusing parse-params here to read value +    ;; instead of params.  This is weird, but it works :) +    (receive (key+value pos) +      (parse-params k/v 0 key/value-subparsers) +      (vector (car key+value) '())))) ;; There's only one key/value pair + +(foreign-declare "#include <locale.h>") + +(define-foreign-variable LC_TIME int) + +(define setlocale (foreign-lambda c-string setlocale int c-string)) + +(define-syntax let-locale +  (syntax-rules () +    ((let-locale ((cat val) ...) body ...) +     (let ((backup '())) +       (dynamic-wind +           (lambda () (set! backup `((cat . ,(setlocale cat val)) ...))) +           (lambda () body ...) +           (lambda () (setlocale cat (alist-ref 'cat backup)) ...)))))) + +(define (make-date->string-parser spec) +  (let ((date-regex +         (irregex +          (string-translate* +           spec +           '((" "  . " +")   ; Any number of spaces is very permissive +             ("%a" . "(Sun|Mon|Tue|Wed|Thu|Fri|Sat)") +             ("%A" . "(Sunday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday)") +             ("%d" . "[0-9]{1,2}") +             ("%b" . "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)") +             ("%y" . "[0-9]{1,2}") +             ("%Y" . "[0-9]{4}") +             ("%X" . "[0-9]{2}:[0-9]{2}:[0-9]{2}")))))) +    (lambda (str) +      (and (irregex-match date-regex str) ; Or irregex-search? +           (let-locale ((LC_TIME "POSIX")) +                       (string->time str spec)))))) + +(define rfc1123-string->time (make-date->string-parser "%a, %d %b %Y %X GMT")) + +;; This is a little more relaxed than strict rfc850 (it allows abbreviated +;; weekdays) - for example Google Web Server outputs cookies in this format. +(define rfc850-string->time +  (disjoin (make-date->string-parser "%A, %d-%b-%y %X GMT") +           (make-date->string-parser "%a, %d-%b-%Y %X GMT"))) + +(define asctime-string->time (make-date->string-parser "%a %b %d %X %Y")) + +(define http-date-string->time +  (disjoin rfc1123-string->time rfc850-string->time asctime-string->time)) + +;; RFC 1123 updates RFC 822's datetime spec +(define (rfc1123-subparser str) +  (or (rfc1123-string->time str) +      (signal-http-condition +       'rfc1123-subparser +       "Error parsing RFC 1123 date/time" (list str) +       'rfc1123-subparser 'value str))) + +(define (rfc850-subparser str) +  (or (rfc850-string->time str) +      (signal-http-condition +       'rfc850-subparser +       "Error parsing RFC850 date/time" (list str) +       'rfc850-subparser 'value str))) + +(define (asctime-subparser str) +  (or (asctime-string->time str) +      (signal-http-condition +       'asctime-subparser +       "Error parsing asctime() date/time" (list str) +       'asctime-subparser 'value str))) + +;; rfc1123-date | rfc850-date | asctime-date +(define (http-date-subparser str) +  (or (http-date-string->time str) +      (signal-http-condition +       'http-date-subparser +       "Error parsing date/time" (list str) +       'http-date-subparser 'value str))) + +;; Change the accuracy of a number to 'digits' number of digits to the +;; right of the decimal point. +(define (chop-number num digits) +  (let ((factor (expt 10 digits))) +    (/ (round (* num factor)) factor))) + +(define (quality-subparser str) +  ;; Anything that's not a number is seen as if the value is missing, hence 1.0 +  (let* ((num       (or (string->number str) 1.0)) +         (imprecise (chop-number num 3))) +    (max 0.0 (min 1.0 imprecise)))) + +;; Just put all header strings in a list, so we can pass it on. +;; Make no assumptions about the contents (only value, don't try to parse params) +;; This is different from (multiple (without-params generic-header-parser)) +;; because this does not assume it can split up comma-separated values. +;; It also will ensure that the value is raw, +(define (unknown-header-parser contents) +  (list (vector contents 'raw))) + +(define symbol-subparser +  (compose string->symbol string-trim-both)) + +(define symbol-subparser-ci +  (compose string->symbol string-trim-both string-downcase)) + +(define (natnum-subparser contents) +  (let ((num (string->number contents))) +    (if num (inexact->exact (max 0 (round num))) 0))) + +(define (host/port-subparser contents) +  (let* ((idx (string-index-right contents #\:)) +         (portnum (and idx (string->number +                            (substring/shared contents (add1 idx)))))) +    (if (and idx portnum) +        (cons (substring/shared contents 0 idx) +              (inexact->exact (round portnum))) +        (cons contents #f)))) + +; base64 of 128 bit hex digest as per RFC1864 (eg, Content-md5) +(define base64-subparser base64-decode) + +;; This is retarded.  The websocket spec (RFC6455) explicitly says the +;; product token must be compared case-insensitively.  RFC2817, +;; "upgrading to TLS within HTTP/1.1" doesn't mention anything about +;; case sensitivity, but defines an all-uppercase token.  This means +;; that there're going to be servers which accept only "TLS/1.0" +;; as-is.  This all just means we can't add the convenience of +;; downcasing and switching to a symbol, pushing the case-(in)sensitive +;; comparison down the the user level, causing more bugs :( +(define (product-subparser contents) +  (let* ((idx (string-index contents #\/)) +         (version (and idx (substring/shared contents (add1 idx))))) +    (if (and idx version) +        (cons (substring/shared contents 0 idx) version) +        (cons contents #f)))) + +;; bytes <start>-<end>/<total> +(define range-subparser +  (let ((range-regex +         (irregex '(seq "bytes" (+ space) +                        (=> start (+ digit)) "-" (=> end (+ digit)) +                        "/" (=> total (+ digit)))))) +    (lambda (s) +      (and-let* ((m (irregex-match range-regex s)) +                 (start (string->number (irregex-match-substring m 'start))) +                 (end (string->number (irregex-match-substring m 'end))) +                 (total (string->number (irregex-match-substring m 'total)))) +        (list start end total))))) + +;; Accept *just* a filename, not a full path (simply strips directories) +;; This matches the content-disposition recommendation in RFC2616, 19.5.1: +;; "The receiving user agent SHOULD NOT respect any directory path +;;  information present in the filename-parm parameter, which is the only +;;  parameter believed to apply to HTTP implementations at this time. The +;;  filename SHOULD be treated as a terminal component only." +;; This echoes RFC2183 (and RFC1806 which it supersedes), section 2.3: +;; "The receiving MUA SHOULD NOT respect any directory path information +;;  that may seem to be present in the filename parameter.  The filename +;;  should be treated as a terminal component only." +(define (filename-subparser fn) +  (let ((base-fn (pathname-strip-directory (string-trim-both fn)))) +    (and (not (member base-fn '("" "." ".."))) +         (not (string-index base-fn (char-set #\/ #\nul))) +         base-fn))) + +;; [W/]<string> +;; This is a full parser, because it needs to be able to distinguish +;; between W/"foo" and "W/foo". If it's preprocessed by the tokenizer +;; both get "normalised" to the same thing: W/foo +;; +;; XXX It could be a good idea if the single/multiple token parsers +;; did not do anything to their contents.  If the consuming parsers +;; want tokens, they know how to have it. OTOH, it would mean much +;; more code for all the parsers as they need to tokenize more... +(define (etag-parser contents) +  (let ((contents (string-trim-both contents))) +    (list (vector +           (if (string-prefix? "W/" contents) +               `(weak . ,(parse-token contents 2 char-set:whitespace)) +               `(strong . ,(parse-token contents 0 char-set:whitespace))) +           '())))) + +;; Used for both if-match and if-none-match +;; This is either a wilcard ('*') or several entities +(define (if-match-parser contents) +  (let ((contents (string-trim-both contents))) +    (if (string=? "*" contents) +        (list (vector '* '())) +        (let loop ((pos 0) +                   (etags '())) +          (let ((weak (string-prefix? "W/" contents 0 2 pos))) +            (receive (etag newpos) +              (parse-token contents (+ pos (if weak 2 0)) char-set:whitespace) +              (let ((newpos (string-skip contents char-set:whitespace newpos)) +                    (value (and etag (vector (cons (if weak 'weak 'strong) +                                                   etag) '())))) +                (if value +                    (if newpos +                        (loop newpos (cons value etags)) +                        (reverse! (cons value etags))) +                    (reverse! etags))))))))) + +;; ( <product>[/<version>] [<comment>] )+ +;; This parser is a full parser because parse-token cannot handle +;; comments yet... (if a ; is in a comment, it breaks down) +(define software-parser +  (let ((char-set:space-or-paren (char-set-union (char-set #\() +                                                 char-set:whitespace)) +        (char-set:slash-or-paren (char-set #\/ #\())) +   (lambda (contents) +     (let loop ((start-pos 0) +                (products '())) +       (let*-values (((product pos) +                      (parse-token contents start-pos +                                   char-set:slash-or-paren)) +                     ((version pos2) +                      (parse-token contents pos ; (add1 pos) +                                   char-set:space-or-paren)) +                     ((comment pos3) (parse-comment contents pos2)) +                     ;; Ugh +                     ((real-version) (and version (not (string-null? version)) (string-trim version #\/)))) +         (if product +             (loop pos3 (cons (list product real-version comment) products)) +             (list (vector (reverse products) '())))))))) + +;;;; MAJOR TODOs +;; RFC1123 mailbox parser - just strings for now +(define mailbox-subparser identity) + +;; Either an entity-tag or a http-date +(define (if-range-parser contents) +  (let ((http-date ((with-params http-date-string->time '()) contents))) +    (if (get-value http-date) +        (list http-date) +        (etag-parser contents)))) + +;; Either delta-seconds or http-date +(define retry-after-subparser (disjoin http-date-subparser natnum-subparser)) + +;; Tricky - see 2616 14.45 +;; We probably shouldn't try to do too much parsing here +(define via-parser (multiple identity)) + +;; Tricky - see 2616 14.46 +(define warning-parser split-multi-header) +;;;; END MAJOR TODOs + +(define (key/value-subparser str) +  (let ((idx (string-index str #\=))) +    (cons (string->symbol (string-take str idx)) (string-drop str (add1 idx))))) + +;; The 'expires' header defined by the Netscape cookie spec contains +;; an embedded comma.  RFC 2109 cookies use Max-Age instead. +(define old-style-cookie? +  (let ((old-cookie-regex +         (irregex '(seq bos (+ (~ #\= #\")) "=" (* (~ #\;)) ";" ; +                        (* any) (w/nocase "expires") (* space) "=")))) +   (lambda (cookie) +     (irregex-search old-cookie-regex cookie)))) + +(define (string->number-list str) +  (map string->number (string-split str ","))) + +(define (relative-ref/path-only s) +  (and-let* ((ref (uri-reference s)) +             ((not (uri-host ref))) +             ((null? (uri-query ref))) +             ((not (uri-fragment ref)))) +    ref)) + +;; We're using http-date-subparser for 'expires' instead of rfc1123-subparser +;; (which would be the correct thing to do) because several well-known web +;; server software packages (tested: PHP and Rails) get it wrong.  So we +;; will go by the robustness principle and allow any kind of HTTP date. +(define set-cookie-parser +  (let ((param-subparsers `((expires . ,http-date-subparser) +                            (max-age . ,string->number) +                            (version . ,string->number) +                            (port    . ,string->number-list) +                            (path    . ,relative-ref/path-only))) +        (name/value-parser (lambda (str) +                             (let ((idx (string-index str #\=))) +                               (cons (string-take str idx) +                                     (string-drop str (add1 idx))))))) +    (lambda (contents) +      (if (old-style-cookie? contents) +          (list ((with-params name/value-parser param-subparsers) contents)) +          (map (with-params name/value-parser param-subparsers) +               (split-multi-header contents)))))) + +(define cache-control-parser +  (let ((splitter (lambda (str) ;; Is this correct? +                    (map (compose string->symbol string-trim-both) +                         (string-split str ","))))) +    (lambda (contents) +      (map +       (make-key/value-subparser `((max-age . ,natnum-subparser) +                                   (s-maxage . ,natnum-subparser) +                                   (max-stale . ,natnum-subparser) +                                   (min-fresh . ,natnum-subparser) +                                   (private . ,splitter) +                                   (no-cache . ,splitter))) +       (split-multi-header contents))))) + +(define (strict-transport-security-parser contents) +  ;; This is ridiculous; there are no parameters because everything +  ;; is a parameter (or everything is a value, depending on your point +  ;; of view).  The header has no main value.  For convenience and sanity +  ;; we just return an alist as a single value. +  (list (vector (parse-params contents 0 `((max-age . ,natnum-subparser))) '()))) + +(define (basic-auth-param-subparser contents pos) +   (receive (secret pos) +     (parse-token contents pos (char-set #\,)) +     (let* ((decoded (base64-decode secret)) +            (colon-idx (string-index decoded #\:)) +            (user (string-take decoded colon-idx)) +            (pass (string-drop decoded (add1 colon-idx)))) +       (values `((username . ,user) (password . ,pass)) pos)))) + +(define (digest-auth-param-subparser contents pos) +  (parse-params contents pos +                `((nc . ,(lambda (n) (string->number n 16))) +                  (uri . ,uri-reference) +                  (qop . ,symbol-subparser) +                  (algorithm . ,symbol-subparser-ci)) +                (char-set #\,) (char-set #\, #\=))) + +(define authorization-param-subparsers +  (make-parameter `((basic . ,basic-auth-param-subparser) +                    (digest . ,digest-auth-param-subparser)))) + +(define (authorization-parser contents) +  (let loop ((pos 0) +             (result '())) +    (receive (authtype pos) +      (parse-token contents pos char-set:whitespace) +      (if (not authtype) +          (reverse result) +          (let ((authtype (http-name->symbol authtype)) +                (default-subparser (lambda (contents pos) +                                     (parse-params contents pos '())))) +            (receive (params pos) +              ((alist-ref authtype (authorization-param-subparsers) +                          eq? default-subparser) contents (add1 pos)) +              (loop (add1 pos) +                    (cons (vector authtype params) result)))))))) + +(define (authenticate-parser contents) +  (let loop ((pos 0) +             (result '())) +    (receive (authtype pos) +      (parse-token contents pos char-set:whitespace) +      (if (not authtype) +          (reverse result) +          (let ((authtype (http-name->symbol authtype))) +            (receive (params pos) +              (parse-params contents pos +                            `((domain . ,(lambda (s) +                                           (map uri-reference +                                                (string-split s)))) +                              (qop . ,(lambda (s) +                                        (map (compose symbol-subparser +                                                      string-trim) +                                             (string-split s ",")))) +                              (algorithm . ,symbol-subparser-ci) +                              (stale . ,(lambda (s) +                                          (string-ci=? (string-trim s) +                                                       "TRUE")))) +                            (char-set #\,) (char-set #\, #\=)) +              (loop (add1 pos) (cons (vector authtype params) result)))))))) + +(define (pragma-parser contents) +  (map (make-key/value-subparser `()) (split-multi-header contents))) + +(define (te-parser contents) +  (map (make-key/value-subparser `((q . ,quality-subparser))) +       (split-multi-header contents))) + +;; Cookie headers are also braindead: there can be several cookies in one header, +;; separated by either commas or semicolons. The only way to distinguish a +;; new cookie from a parameter of the current cookie is the dollar in front +;; of all parameter names. +;; Also, there's a $Version attribute that prepends all cookies, which is +;; considered to apply to all cookies that follow. +(define (cookie-parser contents) +  ;; Local version of parse-params that stops when param without $ is seen +  (define (read-params start-pos) +    (let next-param ((start-pos start-pos) +                     (params '())) +      (receive (attribute-name pos) +        (parse-token contents start-pos (char-set #\; #\=)) +        (if (or (not attribute-name) ;; Still reading params? +                (not (char=? (string-ref attribute-name 0) #\$))) +            (values (reverse! params) start-pos) +            (let ((attrib (http-name->symbol (string-drop attribute-name 1)))) +              (if (and (< pos (string-length contents)) +                       (char=? (string-ref contents pos) #\=)) +                  (receive (value pos) +                    (parse-token contents (add1 pos) (char-set #\;)) +                    (let ((value (case attrib +                                   ((version port) (string->number (or value ""))) +                                   ((path) (relative-ref/path-only (or value ""))) +                                   (else value)))) +                      (next-param (add1 pos) (cons (cons attrib value) params)))) +                  ;; Missing value is interpreted as "present", so #t +                  (next-param (add1 pos) (cons (cons attrib #t) params)))))))) +  (receive (global-params pos) +    (read-params 0) +    (let loop ((cookies '()) +               (pos pos)) +      (let*-values (((name pos) (parse-token contents pos (char-set #\= #\;))) +                    ((val pos) (parse-token contents (add1 pos) (char-set #\;)))) +        (if (or (not name) (not val)) +            (reverse! cookies) +            (receive (local-params pos) +              (read-params (add1 pos)) +              (loop (cons (vector (cons name val) +                                  (append! local-params global-params)) +                          cookies) (add1 pos)))))))) + +;;; Unparsers ;;; +(define (unparse-params params unparsers #!key +                        (separator "; ") (grammar 'prefix) +                        (keyword-unparser ->string) +                        (value-unparser unparse-token)) +  (let loop ((params params) +             (results '())) +    (if (null? params) +        (string-join (reverse results) separator grammar) +        (let* ((name (caar params)) +               (val (cdar params)) +               (str (case val +                      ;; #t means param is present (no value) +                      ((#t) (keyword-unparser name)) +                      ;; #f means param is missing +                      ((#f) #f) +                      (else (let* ((unparser (assq name unparsers)) +                                   (unparsed-val (if unparser +                                                     ((cdr unparser) val) +                                                     val))) +                              (string-append (keyword-unparser name) "=" +                                             (value-unparser unparsed-val))))))) +          (loop (cdr params) (if str (cons str results) results)))))) + +(define must-be-quoted-chars (char-set-adjoin char-set:iso-control #\")) + +(define (quote-string string) +  (reverse-list->string +   (cons #\" +         (string-fold (lambda (c result) +                        (cond ((char=? c #\newline) +                               (signal-http-condition +                                'quote-string +                                (conc "Unencoded newline in header contents! " +                                      "Please encode the newline in a way " +                                      "appropriate for this header") +                                (list string) 'unencoded-header 'value string)) +                              ((char-set-contains? must-be-quoted-chars c) +                               (cons c (cons #\\ result))) +                              (else (cons c result)))) +                      '(#\") +                      string)))) + +;; Unparse a value as token, converting it to a string first +(define unparse-token +  (let ((default-trigger-chars (char-set-union must-be-quoted-chars +                                               (char-set #\= #\; #\,) +                                               char-set:blank))) +    (lambda (token #!optional separator-chars) +      (let ((trigger-quoting-chars +             (if separator-chars +                 (char-set-union must-be-quoted-chars separator-chars char-set:blank) +                 default-trigger-chars)) +            (token-string (->string token))) +        (if (string-any trigger-quoting-chars token-string) +            (quote-string token-string) +            token-string))))) + +(define (unparse-etag etag) +  (string-append +   (if (eq? 'weak (car etag)) "W/" "") +   (quote-string (cdr etag)))) ;; Etags are _always_ quoted + +;; There's no need to make a specific header unparser for every header type. +;; Usually, the Scheme value representing a header can unambiguously be +;; unparsed into a header just by checking its type. +(define (default-header-unparser header-contents) +  (let loop ((headers (reverse header-contents)) +             (result '())) +    (if (null? headers) +        (list (string-join result ", ")) +        (let* ((header (car headers)) +               (contents (get-value header)) +               (value (cond +                       ((pair? contents) ; alist? +                        (let ((attribute (symbol->http-name (car contents)))) +                          (if (eq? (cdr contents) #t) +                              (unparse-token attribute) +                              (string-append attribute "=" +                                             (unparse-token (cdr contents)))))) +                       ((uri-reference? contents) +                        (unparse-token (uri->string contents) (char-set))) +                       (else (unparse-token contents))))) +          (loop (cdr headers) +                (cons +                 (string-append +                  value (unparse-params (get-params header) '())) +                 result)))))) + +;; RFC2616 19.5.1 says that the "filename" attribute _must_ be quoted. +;; It's a bit annoying that our API currently can't specify for particular +;; attributes that only those must be unparsed specially, so we quote _all_ +;; attributes (which, strictly speaking, is always allowed for tokens) unless +;; otherwise specified by a hack (when the value is prefixed by RAW). +;; This may be dangerous or wrong, if a server doesn't accept quoted "name" +;; attributes, for example.  Not too likely since names can contain spaces etc. +(define (content-disposition-unparser header-contents) +  (let* ((type (get-value (car header-contents))) +         (RAW (list 'raw)) +         (unparser (lambda (x) (if (and (pair? x) (eq? RAW (car x))) +                                   (cdr x) +                                   (quote-string (->string x)))))) +    (list (conc (unparse-token type) +                (unparse-params (get-params (car header-contents)) +                                `((filename . ,pathname-strip-directory) +                                  (size . ,(lambda (x) (cons RAW (number->string x)))) +                                  (creation-date . ,rfc1123-time->string) +                                  (modification-date . ,rfc1123-time->string) +                                  (read-date . ,rfc1123-time->string)) +                                value-unparser: unparser))))) + +(define (etag-unparser header-contents) +  (list (unparse-etag (get-value (car header-contents))))) + +(define (if-match-unparser header-contents) +  (let loop ((headers (reverse header-contents)) +             (result '())) +    (cond +     ((null? headers) (list (string-join result ", "))) +     ((eq? '* (get-value (car headers))) '("*")) ;; We're done. * means anything +     (else (loop (cdr headers) +                 (cons (unparse-etag (get-value (car headers))) result)))))) + +(define (host/port-unparser header-contents) +  (let ((contents (get-value (car header-contents)))) +    ;; XXX: urlencode? +    (if (cdr contents) +        (list (conc (car contents) ":" (cdr contents))) +        (list (car contents))))) + +;; Handled specially because cookie value is not an alist but a cons of strings +(define (set-cookie-unparser header-contents) +  (map (lambda (header) +         (let* ((unparsed-params +                 (map (lambda (p) +                        (if (eq? (cdr p) #t) +                            (unparse-token (symbol->http-name (car p))) +                            (string-append +                              (unparse-token (symbol->http-name (car p))) +                              "=" +                              (cond +                               ((and (eq? (car p) 'port) (pair? (cdr p))) +                                (string-join +                                 (map number->string (cdr p)) ",")) +                               ((and (eq? (car p) 'path) (cdr p)) +                                (uri->string (cdr p))) +                               ((eq? (car p) 'expires) +                                (let-locale ((LC_TIME "POSIX")) +                                            (time->string (cdr p) "%A, %d-%b-%y %X GMT"))) +                               (else (unparse-token (cdr p))))))) +                      ;; Remove #f values +                      (filter (lambda (p) (cdr p)) (get-params header)))) +                (cookie (get-value header)) +                (unparsed-cookie (string-append (car cookie) "=" +                                                (unparse-token (cdr cookie))))) +           (string-join (cons unparsed-cookie unparsed-params) "; "))) +       header-contents)) + +(define (cookie-unparser header-contents) +  (let loop ((prefix "") +             (headers (reverse header-contents)) +             (result '())) +    (if (null? headers) +        (list (conc prefix (string-join result "; "))) +        (let* ((version (get-param 'version (car headers) #f)) +               (params (alist-delete 'version (get-params (car headers)))) +               (unparsed-params +                (map (lambda (p) +                       (if (eq? (cdr p) #t) +                           (unparse-token (conc "$" (symbol->http-name (car p)))) +                           (string-append +                            (unparse-token +                             (conc "$" (symbol->http-name (car p)))) +                            "=" +                            (cond +                             ((and (eq? (car p) 'port) (pair? (cdr p))) +                              (string-join +                               (map number->string (cdr p)) ",")) +                             ((and (eq? (car p) 'path) (cdr p)) +                              (uri->string (cdr p))) +                             (else (unparse-token (cdr p))))))) +                     ;; Remove #f values +                     (filter (lambda (p) (cdr p)) params))) +               (cookie (get-value (car headers))) +               (unparsed-cookie (string-append (car cookie) "=" +                                               (unparse-token (cdr cookie))))) +          ;; Doing it like this means we can't unparse cookies of +          ;; mixed versions... +          (loop (if version (sprintf "$Version: ~A; " version) prefix) +                (cdr headers) +                (cons (string-join (cons unparsed-cookie unparsed-params) "; ") +                      result)))))) + +(define (software-unparser header-contents) +  (list +   (string-join +    (map (lambda (content) +           (conc (unparse-token (first content)) +                 (if (second content) +                     (conc "/" (unparse-token (second content))) +                     "") +                 (if (third content) +                     (conc " (" (third content) ")") +                     ""))) +         (get-value (car header-contents)))))) + +(define (product-unparser header-contents) +  (list +   (string-join +    (map (lambda (header) +           (let* ((default-unparser ; Not great, but better than nothing +                    (lambda (params) (unparse-params params '()))) +                  (product+version (get-value header))) +             (conc (unparse-token (car product+version)) +                   (if (cdr product+version) +                       (conc "/" (unparse-token (cdr product+version))) +                       "")))) +         header-contents) +    ", "))) + +(define (via-unparser header-contents) +  (list (string-join (map get-value header-contents) ", "))) + +(define (rfc1123-unparser header-contents) +  (list (rfc1123-time->string (get-value (car header-contents))))) + +(define-constant short-weekdays `#("Sun" "Mon" "Tue" "Wed" "Thu" "Fri" "Sat")) +(define-constant short-months `#("Jan" "Feb" "Mar" "Apr" "May" "Jun" "Jul" "Aug" "Sep" "Oct" "Nov" "Dec")) + +(define (rfc1123-time->string time) +  (let ((padded (lambda (n) +                  (if (fx< n 10) +                      (string-append "0" (number->string n)) +                      (number->string n)))) +        (secs (vector-ref time 0)) +        (mins (vector-ref time 1)) +        (hours (vector-ref time 2)) +        (mday (vector-ref time 3)) +        (month (vector-ref time 4)) +        (year (vector-ref time 5)) +        (wday (vector-ref time 6))) +    (string-append (vector-ref short-weekdays wday) ", " +                   (padded mday) " " (vector-ref short-months month) " " +                   (number->string (+ 1900 year)) " " (padded hours) ":" +                   (padded mins) ":" (padded secs) " GMT"))) + +(define (basic-auth-param-subunparser params) +  (let ((user (alist-ref 'username params)) +        (pass (alist-ref 'password params))) +    (if (string-index user #\:) +        (signal-http-condition +         'basic-auth-param-subunparser +         "Colon detected in username. This is not supported by basic auth!" +         (list user) 'username-with-colon 'value user) +        (base64-encode (string-append user ":" pass))))) + +(define (digest-auth-param-subunparser params) +  (unparse-params params +                  `((nc . ,identity) ;; see below +                    (uri . ,uri->string) +                    (qop . ,symbol->string) +                    (algorithm . ,symbol->string)) +                  separator: ", " +                  grammar: 'infix +                  keyword-unparser: symbol->string +                  value-unparser: +                  ;; Nasty exception for "nc", which is an unquoted +                  ;; padded integer... +                  (lambda (x) +                    (if (number? x) +                        (string-pad (number->string x 16) 8 #\0) +                        (quote-string (->string x)))))) + +(define authorization-param-subunparsers +  (make-parameter `((basic . ,basic-auth-param-subunparser) +                    (digest . ,digest-auth-param-subunparser)))) + +(define (authorization-unparser header-contents) +  (map (lambda (header) +         (let* ((default-unparser        ; Not great, but better than nothing +                  (lambda (params) (unparse-params params '()))) +                (auth-scheme (get-value header)) +                (unparser (alist-ref auth-scheme +                                     (authorization-param-subunparsers) +                                     eq? default-unparser))) +           (string-append +            (symbol->http-name auth-scheme) " " +            (->string (unparser (get-params header)))))) +       header-contents)) + +(define (authenticate-unparser header-contents) +  (map (lambda (header) +         (string-append +          (symbol->http-name (get-value header)) +          " " +          (let* ((old (get-params header)) +                 ;; A quick hack to get presence of "stale" +                 ;; coded as TRUE instead of value-less param +                 ;; false value is coded by its absense +                 (params (if (alist-ref 'stale old) +                             (alist-update! 'stale 'TRUE old) +                             (alist-delete 'stale old)))) +            (unparse-params params +                            `((domain . ,(lambda (u) +                                           (string-join +                                            (map uri->string u)))) +                              (qop . ,(lambda (q) +                                        (string-join +                                         (map symbol->string q) +                                         ","))) +                              (algorithm . ,symbol->string)) +                            separator: ", " +                            grammar: 'infix +                            keyword-unparser: symbol->string +                            value-unparser: +                            (lambda (x) +                              (if (eq? x 'TRUE) +                                  "TRUE" +                                  (quote-string (->string x)))))))) +       header-contents)) + +(define (strict-transport-security-unparser header-contents) +  (map (lambda (header)   ; Should only be one header; the rest must be ignored! +         (unparse-params (get-value header) '() +                         grammar: 'infix +                         keyword-unparser: ; Ugly but neccessary +                         (lambda (k) +                           (if (eq? k 'includesubdomains) +                               "includeSubDomains" +                               (->string k))))) +       header-contents)) diff --git a/intarweb.egg b/intarweb.egg new file mode 100644 index 0000000..43eb73d --- /dev/null +++ b/intarweb.egg @@ -0,0 +1,11 @@ +;; -*- Scheme -*- +((synopsis "A more convenient HTTP library") + (author "Peter Bex") + (category web) + (license "BSD") + (dependencies srfi-1 srfi-13 srfi-14 defstruct uri-common base64) + (test-dependencies test srfi-18) + (components (extension intarweb +                        (csc-options "-O3") +                        (source-dependencies "intarweb.scm" +                                             "header-parsers.scm")))) diff --git a/intarweb.scm b/intarweb.scm new file mode 100644 index 0000000..3df4690 --- /dev/null +++ b/intarweb.scm @@ -0,0 +1,1055 @@ +;;; +;;; Intarweb is an improved HTTP library for Chicken +;;; +;; Copyright (c) 2008-2018, Peter Bex +;; All rights reserved. +;; +;; Redistribution and use in source and binary forms, with or without +;; modification, are permitted provided that the following conditions +;; are met: +;; +;; 1. Redistributions of source code must retain the above copyright +;;    notice, this list of conditions and the following disclaimer. +;; 2. Redistributions in binary form must reproduce the above copyright +;;    notice, this list of conditions and the following disclaimer in the +;;    documentation and/or other materials provided with the distribution. +;; 3. Neither the name of the author nor the names of its +;;    contributors may be used to endorse or promote products derived +;;    from this software without specific prior written permission. +;; +;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +;; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +;; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +;; FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +;; COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +;; INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +;; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +;; HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +;; STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +;; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +;; OF THE POSSIBILITY OF SUCH DAMAGE. + +;; TODO: Support RFC5987?  Seems awfully messy though (need to pull in iconv?) +;; We could use http://www.greenbytes.de/tech/tc2231/ in the testsuite. +;; Look at that URI's toplevel directory for more HTTP/URI-related testcases! + +(module intarweb +  (http-line-limit http-header-limit http-urlencoded-request-data-limit +   replace-header-contents replace-header-contents! remove-header remove-header! +   update-header-contents update-header-contents! headers single-headers +   headers? headers->list http-name->symbol symbol->http-name +   header-parsers header-unparsers unparse-header unparse-headers read-headers +   safe-methods safe? idempotent-methods idempotent? keep-alive? response-class +   etag=? etag=-weakly? etag-matches? etag-matches-weakly? +    +   make-request request? request-major request-major-set! +   request-minor request-minor-set! +   request-method request-method-set! request-uri request-uri-set! +   request-headers request-headers-set! request-port request-port-set! +   update-request set-request! request-has-message-body? + +   request-parsers read-request request-unparsers write-request +   finish-request-body http-0.9-request-parser http-1.x-request-parser +   http-0.9-request-unparser http-1.0-request-unparser http-1.x-request-unparser +   header-parse-error-handler +   read-urlencoded-request-data +    +   make-response response? response-major response-major-set! +   response-minor response-minor-set! +   response-code response-code-set! response-reason response-reason-set! +   response-status response-status-set! response-headers response-headers-set! +   response-port response-port-set! update-response set-response! +   response-has-message-body-for-request? +    +   write-response response-parsers response-unparsers read-response +   finish-response-body http-0.9-response-parser http-0.9-response-unparser +   http-1.0-response-parser http-1.0-response-unparser +   http-1.x-response-parser http-1.x-response-unparser +   http-status-codes http-status->code&reason + +   ;; http-header-parsers +   header-contents header-values header-value header-params header-param +   get-value get-params get-param + +   split-multi-header parse-token parse-comment +   parse-params parse-value+params unparse-params +   multiple single make-key/value-subparser +    +   rfc1123-string->time rfc850-string->time asctime-string->time +   http-date-string->time +   rfc1123-subparser rfc850-subparser  asctime-subparser http-date-subparser +   product-subparser quality-subparser unknown-header-parser +   filename-subparser symbol-subparser symbol-subparser-ci natnum-subparser +   host/port-subparser base64-subparser range-subparser filename-subparser +   etag-parser software-parser mailbox-subparser +   if-range-parser retry-after-subparser via-parser warning-parser +   key/value-subparser set-cookie-parser cache-control-parser pragma-parser +   te-parser cookie-parser strict-transport-security-parser +    +   must-be-quoted-chars quote-string unparse-token +   default-header-unparser etag-unparser host/port-unparser +   product-unparser software-unparser rfc1123-unparser cookie-unparser +   strict-transport-security-unparser + +   ;; Subparsers/subunparsers +   authorization-param-subparsers +   basic-auth-param-subparser digest-auth-param-subparser +    +   authorization-param-subunparsers +   basic-auth-param-subunparser digest-auth-param-subunparser +   ) + +(import scheme (chicken base) (chicken foreign) (chicken irregex) +        (chicken format) (chicken io) (chicken string) +        (chicken time posix) (chicken pathname) (chicken fixnum) +        (chicken condition) (chicken port) (chicken syntax) +        srfi-1 srfi-13 srfi-14 base64 uri-common defstruct) + +;; The below can all be #f if you want no limit (not recommended!) +(define http-line-limit (make-parameter 4096)) +(define http-header-limit (make-parameter 64)) +(define http-urlencoded-request-data-limit (make-parameter (* 4 1024 1024))) + +(define (read-urlencoded-request-data +         request #!optional (max-length (http-urlencoded-request-data-limit))) +  (let* ((p (request-port request)) +         (len (header-value 'content-length (request-headers request))) +         ;; For simplicity's sake, we don't allow exactly the max request limit +         (limit (if (and len max-length) +                    (min len max-length) +                    (or max-length len))) +         (data (read-string limit (request-port request)))) +    (if (and (not (eof-object? data)) max-length (= max-length (string-length data))) +        (signal-http-condition +         'read-urlencoded-request-data +         "Max allowed URLencoded request size exceeded" +         (list request max-length) +         'urlencoded-request-data-limit-exceeded +         'contents data 'limit limit) +        (form-urldecode data)))) + +(define (raise-line-limit-exceeded-error line limit port) +  (let ((safe-line-prefix +         (if (< limit 128) +             (sprintf "~A[..and more (was limited to ~A)..]" line limit) +             (sprintf "~A[..~A+ more chars (was limited to ~A)..]" +               (substring line 0 128) (- limit 128) limit)))) +    (signal-http-condition +     'safe-read-line +     "Max allowed line length exceeded" +     (list port safe-line-prefix) +     'line-limit-exceeded 'contents line 'limit limit))) + +(define (safe-read-line p) +  (let* ((limit (http-line-limit)) +         (line (read-line p (http-line-limit)))) +    (if (and (not (eof-object? line)) limit (= limit (string-length line))) +        (raise-line-limit-exceeded-error line limit p) +        line))) + +;; Make headers a new type, to force the use of the HEADERS procedure +;; and ensure only proper header values are passed to all procedures +;; that deal with headers. +(define-record headers v) + +(define-record-printer (headers h out) +  (fprintf out "#(headers: ~S)" (headers-v h))) + +(define headers->list headers-v) + +(define (remove-header! name headers) +  (let loop ((h (headers-v headers))) +    (cond +     ((null? h) headers) +     ((eq? name (caar h)) +      (set-cdr! h (cdr h)) +      headers) +     (else (loop (cdr h)))))) + +(define (remove-header name headers) +  (make-headers +   (let loop ((h (headers-v headers))) +     (cond +      ((null? h) h) +      ((eq? name (caar h)) (loop (cdr h))) +      (else (cons (car h) (loop (cdr h)))))))) + +;; Check that the header values are valid vectors, and that if there +;; is a raw value, there is only one value at all. +(define (check-header-values loc name contents) +  (let lp ((mode 'unknown) (todo contents)) +    (let ((head (car todo))) +      (if (not (and (vector? head) (= 2 (vector-length head)))) +          (signal-http-condition +           loc "header values must be vectors of length 2" +           (list name contents) 'header-value) +          (let ((type (if (eq? (get-params head) 'raw) 'raw 'cooked))) +            (unless (or (eq? mode 'unknown) (eq? mode type)) +              (signal-http-condition +               loc "When using raw headers, all values must be raw" +               (list name contents) 'header-value) +              (lp type (cdr todo)))))))) + +;; XXX: Do we need these replace procedures in the exports list?  It +;; looks like we can use update everywhere. +(define (replace-header-contents! name contents headers) +  (check-header-values 'replace-header-contents! name contents) +  (let loop ((h (headers-v headers))) +    (cond +     ((null? h) +      (headers-v-set! +       headers (cons (cons name contents) (headers-v headers))) +      headers) +     ((eq? name (caar h)) +      (set-cdr! (car h) contents) +      headers) +     (else (loop (cdr h)))))) + +(define (replace-header-contents name contents headers) +  (check-header-values 'replace-header-contents! name contents) +  (make-headers +   (let loop ((h (headers-v headers))) +     (cond +      ((null? h) (cons (cons name contents) h)) +      ((eq? name (caar h)) +       (cons (cons (caar h) contents) (cdr h))) +      (else (cons (car h) (loop (cdr h)))))))) + +(define (make-updater replacer) +  (lambda (name contents headers) +    (let ((old (header-contents name headers '()))) +      (replacer name +                (if (member name (single-headers)) +                    (list (last contents)) +                    (append old contents)) +                headers)))) + +(define update-header-contents  (make-updater replace-header-contents)) +(define update-header-contents! (make-updater replace-header-contents!)) + +(define http-name->symbol (compose string->symbol string-downcase!)) +(define symbol->http-name (compose string-titlecase symbol->string)) + +;; Make a header set from a literal expression by folding in the headers +;; with any previous ones +(define (headers headers-to-be #!optional (old-headers (make-headers '()))) +  (fold (lambda (h new-headers) +          (update-header-contents +           (car h) +           (map (lambda (v) +                  (if (vector? v) v (vector v '()))) ; normalize to vector +                (cdr h)) +           new-headers)) +        old-headers +        headers-to-be)) + +(define (normalized-uri str) +  (and-let* ((uri (uri-reference str))) +    (uri-normalize-path-segments uri))) + +(include "header-parsers") ; Also includes header unparsers + +;; Any unknown headers are considered to be multi-headers, always +(define single-headers +  (make-parameter '(accept-ranges age authorization content-disposition +                    content-length content-location content-md5 content-type +                    date etag expect expires host if-modified-since +                    if-unmodified-since last-modified location max-forwards +                    proxy-authorization range referer retry-after server +                    transfer-encoding user-agent www-authenticate))) + +(define string->http-method string->symbol) +(define http-method->string symbol->string) + +;; Make an output port automatically "chunked" +(define (chunked-output-port port) +  (let ((chunked-port +         (make-output-port (lambda (s)        ; write +                             (let ((len (string-length s))) +                               (unless (zero? len) +                                 (fprintf port "~X\r\n~A\r\n" len s)))) +                           (lambda ()         ; close +                             (close-output-port port)) +                           (lambda ()         ; flush +                             (flush-output port))))) +    ;; first "reserved" slot +    ;; Slot 7 should probably stay 'custom +    (##sys#setslot chunked-port 10 'chunked-output-port) +    ;; second "reserved" slot +    (##sys#setslot chunked-port 11 port) +    chunked-port)) + +;; Make an input port automatically "chunked" +(define (chunked-input-port port) +  (let* ((chunk-length 0) +         (position 0) +         (check-position (lambda () +                           (when (and position (>= position chunk-length)) +                             (unless (eq? chunk-length 0) +                               (safe-read-line port)) ; Read \r\n data trailer +                             (let ((line (safe-read-line port))) +                               (if (eof-object? line) +                                   (set! position #f) +                                   (begin +                                     (set! chunk-length (string->number line 16)) +                                     (cond +                                      ((not chunk-length) (set! position #f)) +                                      ((zero? chunk-length) ; Read final data trailer +                                       (safe-read-line port) +                                       (set! position #f)) +                                      (else (set! position 0)))))))))) +    (make-input-port (lambda ()         ; read-char +                       (check-position) +                       (if position +                           (let ((char (read-char port))) +                             (unless (eof-object? char) +                               (set! position (add1 position))) +                             char) +                           #!eof)) +                     (lambda ()         ; ready? +                       (check-position) +                       (or (not position) (char-ready? port))) +                     (lambda ()         ; close +                       (close-input-port port)) +                     (lambda ()         ; peek-char +                       (check-position) +                       (if position +                           (peek-char port) +                           #!eof)) +                     (lambda (p bytes buf off) ; read-string! +                       (let lp ((todo bytes) +                                (total-bytes-read 0) +                                (off off)) +                         (check-position) +                         (if (or (not position) (= todo 0)) +                             total-bytes-read +                             (let* ((n (min todo (- chunk-length position))) +                                    (bytes-read (read-string! n buf port off))) +                               (set! position (+ position bytes-read)) +                               (lp (- todo bytes-read) +                                   (+ total-bytes-read bytes-read) +                                   (+ off bytes-read))))))))) +;; TODO: Note that in the above, read-line is not currently +;; implemented.  It is *extremely* tricky to correctly maintain the +;; port position when all \r *AND/OR* \n characters get chopped off +;; the line-string.  It can be done by maintaining our own extra +;; buffer, but that complicates all the procedures here enormously, +;; including read-line itself. + +;; RFC2616, Section 4.3: "The presence of a message-body in a request +;; is signaled by the inclusion of a Content-Length or Transfer-Encoding +;; header field in the request's message-headers." +;; We don't check the method since "a server SHOULD read and forward the +;; a message-body on any request", even it shouldn't be sent for that method. +;; +;; Because HTTP/1.0 has no official definition of when a message body +;; is present, we'll assume it's always present, unless there is no +;; content-length and we have a keep-alive connection. +(define request-has-message-body? +  (make-parameter +   (lambda (req) +     (let ((headers (request-headers req))) +       (if (and (= 1 (request-major req)) (= 0 (request-minor req))) +           (not (eq? 'keep-alive (header-contents 'connection headers))) +           (or (header-contents 'content-length headers) +               (header-contents 'transfer-encoding headers))))))) + +;; RFC2616, Section 4.3: "For response messages, whether or not a +;; message-body is included with a message is dependent on both the +;; request method and the response status code (section 6.1.1)." +(define response-has-message-body-for-request? +  (make-parameter +   (lambda (resp req) +     (not (or (= (response-class resp) 100) +              (memv (response-code resp) '(204 304)) +              (eq? 'HEAD (request-method req))))))) + +;; OPTIONS and TRACE are not explicitly mentioned in section 9.1.1, +;; but section 9.1.2 says they SHOULD NOT have side-effects by +;; definition, which means they are safe, as well. +(define safe-methods +  (make-parameter '(GET HEAD OPTIONS TRACE))) + +;; RFC2616, Section 9.1.1 +(define (safe? obj) +  (let ((method (if (request? obj) (request-method obj) obj))) +    (not (not (member method (safe-methods)))))) + +(define idempotent-methods +  (make-parameter '(GET HEAD PUT DELETE OPTIONS TRACE))) + +;; RFC2616, Section 9.1.2 +(define (idempotent? obj) +  (let ((method (if (request? obj) (request-method obj) obj))) +    (not (not (member method (idempotent-methods)))))) + +(define (keep-alive? obj) +  (let ((major (if (request? obj) (request-major obj) (response-major obj))) +        (minor (if (request? obj) (request-minor obj) (response-minor obj))) +        (con   (header-value 'connection (if (request? obj) +                                             (request-headers obj) +                                             (response-headers obj))))) +   (if (and (= major 1) (> minor 0)) +       (not (eq? con 'close)) +       ;; RFC 2068, section 19.7.1 (see also RFC 2616, section 19.6.2) +       (eq? con 'keep-alive)))) + +(define (etag=? a b) +  (and (not (eq? 'weak (car a))) +       (eq? (car a) (car b)) +       (string=? (cdr a) (cdr b)))) + +(define (etag=-weakly? a b) +  (and (eq? (car a) (car b)) +       (string=? (cdr a) (cdr b)))) + +(define (etag-matches? etag matchlist) +  (any (lambda (m) (or (eq? m '*) (etag=? etag m))) matchlist)) + +(define (etag-matches-weakly? etag matchlist) +  (any (lambda (m) (or (eq? m '*) (etag=-weakly? etag m))) matchlist)) + +;;;;;;;;;;;;;;;;;;;;;;;;; +;;;; Request parsing ;;;; +;;;;;;;;;;;;;;;;;;;;;;;;; + +;; This includes parsers for all RFC-defined headers +(define header-parsers +  (make-parameter +   `((accept . ,(multiple symbol-subparser-ci +                          `((q . ,quality-subparser)))) +     (accept-charset . ,(multiple symbol-subparser-ci +                                  `((q . ,quality-subparser)))) +     (accept-encoding . ,(multiple symbol-subparser-ci +                                   `((q . ,quality-subparser)))) +     (accept-language . ,(multiple symbol-subparser-ci +                                   `((q . ,quality-subparser)))) +     (accept-ranges . ,(single symbol-subparser-ci)) +     (age . ,(single natnum-subparser)) +     (allow . ,(multiple symbol-subparser)) +     (authorization . ,authorization-parser) +     (cache-control . ,cache-control-parser) +     (connection . ,(multiple symbol-subparser-ci)) +     (content-encoding . ,(multiple symbol-subparser-ci)) +     (content-language . ,(multiple symbol-subparser-ci)) +     (content-length . ,(single natnum-subparser)) +     (content-location . ,(single normalized-uri)) +     (content-md5 . ,(single base64-subparser)) +     (content-range . ,(single range-subparser)) +     (content-type . ,(single symbol-subparser-ci +                              `((charset . ,symbol-subparser-ci)))) +     (date . ,(single http-date-subparser)) +     (etag . ,etag-parser) +     (expect . ,(single (make-key/value-subparser '()))) +     (expires . ,(single http-date-subparser)) +     (from . ,(multiple mailbox-subparser)) +     (host . ,(single host/port-subparser)) +     (if-match . ,if-match-parser) +     (if-modified-since . ,(single http-date-subparser)) +     (if-none-match . ,if-match-parser) +     (if-range . ,if-range-parser) +     (if-unmodified-since . ,(single http-date-subparser)) +     (last-modified . ,(single http-date-subparser)) +     (location . ,(single normalized-uri)) +     (max-forwards . ,(single natnum-subparser)) +     (pragma . ,pragma-parser) +     (proxy-authenticate . ,authenticate-parser) +     (proxy-authorization . ,authorization-parser) +     (range . ,(multiple range-subparser)) +     (referer . ,(single normalized-uri)) +     (retry-after . ,(single retry-after-subparser)) +     (server . ,software-parser) +     (te . ,te-parser) +     (trailer . ,(multiple symbol-subparser-ci)) +     (transfer-encoding . ,(single symbol-subparser-ci)) +     (upgrade . ,(multiple product-subparser)) +     (user-agent . ,software-parser) +     (vary . ,(multiple symbol-subparser-ci)) +     (via . ,via-parser) +     (warning . ,warning-parser) +     (www-authenticate . ,authenticate-parser) +     ;; RFC 2183 +     (content-disposition . ,(single symbol-subparser-ci +                                     `((filename . ,filename-subparser) +                                       (creation-date . ,rfc1123-subparser) +                                       (modification-date . ,rfc1123-subparser) +                                       (read-date . ,rfc1123-subparser) +                                       (size . ,natnum-subparser)))) +     ;; RFC 2109 +     (set-cookie . ,set-cookie-parser) +     (cookie . ,cookie-parser) +     ;; +     ;; TODO: RFC 2965? +     ;; +     ;; RFC 6797 +     (strict-transport-security . ,strict-transport-security-parser) +     ;; Nonstandard but common headers +     (x-forwarded-for . ,(multiple identity)) +     ))) + +(define header-parse-error-handler ;; ignore errors +  (make-parameter (lambda (header-name contents headers exn) headers))) + +;; The parser is supposed to return a list of header values for its header +(define (parse-header name contents) +  (let* ((default unknown-header-parser) +         (parser (alist-ref name (header-parsers) eq? default))) +    (parser contents))) + +(define (parse-header-line line headers) +  (or +   (and-let* ((colon-idx   (string-index line #\:)) +              (header-name (http-name->symbol (string-take line colon-idx))) +              (contents    (string-trim-both (string-drop line (add1 colon-idx))))) +     (handle-exceptions +      exn +      ((header-parse-error-handler) header-name contents headers exn) +      (update-header-contents! +       header-name (parse-header header-name contents) headers))) +   (signal-http-condition +    'parse-header-line "Bad header line" (list line) +    'header-error 'contents line))) + +;; XXXX: Bottleneck? +(define (read-headers port) +  (if (eof-object? (peek-char port))    ; Yeah, so sue me +      (make-headers '()) +      (let ((header-limit (http-header-limit)) +            (line-limit (http-line-limit))) +        (let lp ((c (read-char port)) +                 (ln '()) +                 (headers (make-headers '())) +                 (hc 0) +                 (len 0)) +          (cond ((eqv? len line-limit) +                 (raise-line-limit-exceeded-error +                  (reverse-list->string ln) line-limit port)) +                ((eof-object? c) +                 (if (null? ln) +                     headers +                     (parse-header-line (reverse-list->string ln) headers))) +                ;; Only accept CRLF (we're not this strict everywhere...) +                ((and (eqv? c #\return) (eqv? (peek-char port) #\newline)) +                 (read-char port)       ; Consume and discard NL +                 (if (null? ln)         ; Nothing came before: end of headers +                     headers +                     (let ((pc (peek-char port))) +                       (if (and (not (eof-object? pc)) +                                (or (eqv? pc #\space) (eqv? pc #\tab))) +                           ;; If the next line starts with whitespace, +                           ;; it's a continuation line of the same +                           ;; header.  See section 2.2 of RFC 2616. +                           (let skip ((pc (read-char port)) (len len) (ln ln)) +                             (if (and (not (eqv? len line-limit)) +                                      (or (eqv? pc #\space) (eqv? pc #\tab))) +                                 (skip (read-char port) (add1 len) (cons pc ln)) +                                 (lp pc ln headers hc len))) +                           (let* ((ln (reverse-list->string ln)) +                                  (headers (parse-header-line ln headers)) +                                  (hc (add1 hc))) +                             (when (eqv? hc header-limit) +                               (signal-http-condition +                                'read-headers +                                "Max allowed header count exceeded" +                                (list port) +                                'header-limit-exceeded +                                'contents ln +                                'headers headers +                                'limit header-limit)) +                             (lp (read-char port) '() headers hc 0)))))) +                ((eqv? c #\") +                 (let lp2 ((c2 (read-char port)) +                           (ln (cons c ln)) +                           (len len)) +                   (cond ((or (eqv? 0 len) (eof-object? c2)) +                          (lp c2 ln headers hc len)) +                         ((eqv? c2 #\") +                          (lp (read-char port) (cons c2 ln) +                              headers hc (add1 len))) +                         ((eqv? c2 #\\) +                          (let ((c3 (read-char port)) +                                (len len)) +                            (if (or (eof-object? c3) (eqv? 0 len)) +                                (lp c3 (cons c2 ln) headers hc len) +                                (lp2 (read-char port) +                                     (cons c3 (cons c2 ln)) +                                     (add1 len))))) +                         (else +                          (lp2 (read-char port) (cons c2 ln) (add1 len)))))) +                (else +                 (lp (read-char port) (cons c ln) headers hc (add1 len)))))))) + +(define (signal-http-condition loc msg args type . more-info) +  (signal (make-composite-condition +           (make-property-condition 'http) +           (apply make-property-condition type more-info) +           (make-property-condition +            'exn 'location loc 'message msg 'arguments args)))) + +(defstruct request +  (method 'GET) uri (major 1) (minor 1) (headers (make-headers '())) port) + +;; Perhaps we should have header parsers indexed by version or +;; something like that, so you can define the maximum version. Useful +;; for when expecting a response. Then we group request/response parsers +;; together, as with request/response unparsers. +(define http-0.9-request-parser +  (let ((req (irregex '(seq (w/nocase "GET") (+ space) (=> uri (* any)))))) +    (lambda (line in) +      (and-let* ((m (irregex-match req line)) +                 (uri (normalized-uri (irregex-match-substring m 'uri)))) +        (make-request method: 'GET uri: uri +                      major: 0 minor: 9 port: in))))) + +;; Might want to reuse this elsewhere +(define token-sre '(+ (~ "()<>@,;:\\\"/[]?={}\t "))) + +;; XXX This actually parses anything >= HTTP/1.0 +(define http-1.x-request-parser +  (let ((req (irregex `(seq (=> method ,token-sre) (+ space) +                            (=> uri (+ (~ blank))) ; uri-common handles details +                            (+ space) (w/nocase "HTTP/") +                            (=> major (+ digit)) "." (=> minor (+ digit)))))) +    (lambda (line in) +      (and-let* ((m (irregex-match req line)) +                 (uri-string (irregex-match-substring m 'uri)) +                 (major (string->number (irregex-match-substring m 'major))) +                 (minor (string->number (irregex-match-substring m 'minor))) +                 (method (string->http-method (irregex-match-substring m 'method))) +                 (headers (read-headers in))) +        (let* ((wildcard (string=? uri-string "*")) +               (uri (and (not wildcard) (normalized-uri uri-string))) +               ;; HTTP/1.0 has no chunking +               (port (if (and (or (> major 1) (>= minor 1)) +                              (memq 'chunked +                                    (header-values +                                     'transfer-encoding headers))) +                         (chunked-input-port in) +                         in))) +          ;; HTTP/1.1 allows several "things" as "URI" (RFC2616, 5.1.2): +          ;; Request-URI = "*" | absoluteURI | abs_path | authority +          ;; +          ;; HTTP/1.0, URIs are more limited (RFC1945, 5.1.2): +          ;; Request-URI = absoluteURI | abs_path +          ;; +          ;; Currently, a plain authority is not accepted.  This would +          ;; require deep changes in the representation of request +          ;; objects.  It is only used in CONNECT requests, so +          ;; currently not much of a problem.  If we want to support +          ;; this, we'd need a separate object type and expose a +          ;; parser from uri-generic/uri-common for just authority. +          (and (or (and wildcard (or (> major 1) (>= minor 1))) +                   (and uri (or (absolute-uri? uri) +                                (and (uri-path-absolute? uri) +                                     (not (uri-host uri)))))) +               (make-request method: method uri: uri +                             major: major minor: minor +                             headers: headers +                             port: port))))))) + +(define request-parsers   ; order matters here +  (make-parameter (list http-1.x-request-parser))) + +(define (read-request inport) +  (let ((line (safe-read-line inport))) +    (and (not (eof-object? line)) +         ;; Try each parser in turn to process the request-line. +         ;; A parser returns either #f or a request object +         (let loop ((parsers (request-parsers))) +           (if (null? parsers) +               (signal-http-condition +                'read-request "Unknown protocol line" (list line) +                'unknown-protocol-line 'line line) +               (or ((car parsers) line inport) (loop (cdr parsers)))))))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;; Request unparsing ;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define header-unparsers +  (make-parameter +   `((content-disposition . ,content-disposition-unparser) +     (date . ,rfc1123-unparser) +     (etag . ,etag-unparser) +     (expires . ,rfc1123-unparser) +     (host . ,host/port-unparser) +     (if-match . ,if-match-unparser) +     (if-modified-since . ,rfc1123-unparser) +     (if-none-match . ,if-match-unparser) +     (if-unmodified-since . ,rfc1123-unparser) +     (last-modified . ,rfc1123-unparser) +     (user-agent . ,software-unparser) +     (server . ,software-unparser) +     (upgrade . ,product-unparser) +     (cookie . ,cookie-unparser) +     (set-cookie . ,set-cookie-unparser) +     (authorization . ,authorization-unparser) +     (www-authenticate . ,authenticate-unparser) +     (proxy-authorization . ,authorization-unparser) +     (proxy-authenticate . ,authenticate-unparser) +     (via . ,via-unparser) +     ;; RFC 6797 +     (strict-transport-security . ,strict-transport-security-unparser)))) + +(define (unparse-header header-name header-value) +  (cond ((and (not (null? header-value)) +              (eq? 'raw (get-params (car header-value)))) +         (map get-no-newline-value header-value)) +        ((assq header-name (header-unparsers)) +         => (lambda (unparser) ((cdr unparser) header-value))) +        (else (default-header-unparser header-value)))) + +(define (unparse-headers headers out) +  (let ((unparsers (header-unparsers))) ; Don't access parameter for each header +    (for-each +     (lambda (h) +       (let* ((name (car h)) +              (name-s (symbol->http-name name)) +              (contents (cdr h)) +              (unparse (cond ((assq name unparsers) => cdr) ; inlined for perf +                             (else default-header-unparser)))) +         (handle-exceptions exn +             (if ((condition-predicate 'http) exn) +                 (signal exn) ;; Do not tamper with our own custom errors +                 (let* ((none "(no error message provided in original exn)") +                        (msg ((condition-property-accessor +                               'exn 'message none) exn)) +                        (loc ((condition-property-accessor +                               'exn 'location #f) exn)) +                        (args ((condition-property-accessor +                                'exn 'arguments '()) exn))) +                   (signal-http-condition +                    'unparse-headers +                    (sprintf "could not unparse ~S header ~S: ~A~A" +                      name-s contents (if loc (sprintf "(~A) " loc) "") msg) +                    args +                    'unparse-error +                    'header-name name +                    'header-value contents +                    'unparser unparse +                    'original-exn exn))) +           (let ((lines (if (and (not (null? contents)) +                                 (eq? 'raw (get-params (car contents)))) +                            (map get-no-newline-value contents) +                            (unparse contents)))) +             (for-each (lambda (value) +                         ;; Verify there's no \r\n or \r or \n in value? +                         (display (string-append name-s ": " value "\r\n") out)) +                       lines))))) +     (headers-v headers)))) + +;; Use string-append and display rather than fprintf so the line gets +;; written in one burst.  This supposedly avoids a strange race +;; condition, see #800.  We use string-append instead of sprintf for +;; performance reasons.  This is not exported, and our callers compare +;; request-major and request-minor so we can assume they're numbers. +(define (write-request-line request) +  (let ((uri (request-uri request))) +    (display (string-append +              (http-method->string (request-method request)) +              " " (if uri (uri->string uri) "*") " HTTP/" +              (number->string (request-major request)) "." +              (number->string (request-minor request)) "\r\n") +             (request-port request)))) + +(define (http-0.9-request-unparser request) +  (display (string-append "GET " (uri->string (request-uri request)) "\r\n") +           (request-port request)) +  request) + +(define (http-1.0-request-unparser request) +  (and-let* (((= (request-major request) 1)) +             ((= (request-minor request) 0)) +             (o (request-port request))) +    (write-request-line request) +    (unparse-headers (request-headers request) o) +    (display "\r\n" o) +    request)) + +;; XXX This actually unparses anything >= HTTP/1.1 +(define (http-1.x-request-unparser request) +  (and-let* (((or (> (request-major request) 1) +                  (and (= (request-major request) 1) +                       (> (request-minor request) 0)))) +             (o (request-port request))) +    (write-request-line request) +    (unparse-headers (request-headers request) o) +    (display "\r\n" o) +    (if (memq 'chunked (header-values 'transfer-encoding +                                      (request-headers request))) +        (update-request request +                        port: (chunked-output-port (request-port request))) +        request))) + +(define request-unparsers  ; order matters here +  (make-parameter (list http-1.x-request-unparser http-1.0-request-unparser))) + +(define (write-request request) +  ;; Try each unparser in turn to write the request-line. +  ;; An unparser returns either #f or a new request object. +  (let loop ((unparsers (request-unparsers))) +    (if (null? unparsers) +        (let ((major (request-major request)) +              (minor (request-minor request))) +          (signal-http-condition +           'write-request +           "Unknown protocol" (list (conc major "." minor)) +           'unknown-protocol 'major major 'minor minor)) +        (or ((car unparsers) request) (loop (cdr unparsers)))))) + +;; Required for chunked requests.  This is a bit of a hack! +(define (finish-request-body request) +  (when (and (memq 'chunked (header-values 'transfer-encoding +                                           (request-headers request))) +             (eq? (##sys#slot (request-port request) 10) 'chunked-output-port)) +    (display "0\r\n\r\n" (##sys#slot (request-port request) 11))) +  request) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;; Response unparsing ;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(defstruct response +  (code 200) (reason "OK") (major 1) (minor 1) (headers (make-headers '())) port) + +(define make-response +  (let ((old-make-response make-response)) +    (lambda (#!rest args #!key status code reason) +      (let ((resp (apply old-make-response args))) +        (when (and status (not code) (not reason)) +          (response-status-set! resp status)) +        resp)))) + +(define update-response +  (let ((old-update-response update-response)) +    (lambda (resp #!rest args #!key status code reason) +      (let ((resp (apply old-update-response resp args))) +        (when (and status (not code) (not reason)) +          (response-status-set! resp status)) +        resp)))) + +(define (response-status-set! resp status) +  (receive (code reason) (http-status->code&reason status) +    (response-code-set! resp code) +    (response-reason-set! resp reason) +    resp)) + +(define (response-class obj) +  (let ((code (if (response? obj) (response-code obj) obj))) +    (- code (modulo code 100)))) + +(define (response-status obj) +  (let* ((c (if (response? obj) (response-code obj) obj)) +         (s (find (lambda (x) (= (cadr x) c)) (http-status-codes)))) +    (if s +        (car s) +        (signal-http-condition +         'response-status "Unknown status code" (list c) +         'unknown-code 'code c)))) + +(define (http-status->code&reason status) +  (let ((s (alist-ref status (http-status-codes)))) +    (unless s +      (signal-http-condition +       'http-status->code&reason +       ;; haha, status symbol ;) +       "Unknown response status symbol" +       (list status) 'unknown-status 'status status)) +    (values (car s) (cdr s)))) + +;; List of HTTP status codes based on: +;; http://www.iana.org/assignments/http-status-codes/http-status-codes.xml +(define http-status-codes +  (make-parameter  +   `((continue . (100 . "Continue")) +     (switching-protocols . (101 . "Switching Protocols")) +     (processing . (102 . "Processing")) +     (ok . (200 . "OK")) +     (created . (201 . "Created")) +     (accepted . (202 . "Accepted")) +     (non-authoritative-information . (203 . "Non-Authoritative Information")) +     (no-content . (204 . "No Content")) +     (reset-content . (205 . "Reset Content")) +     (partial-content . (206 . "Partial Content")) +     (multi-status . (207 . "Multi-Status")) +     (already-reported . (208 . "Already Reported")) +     (im-used . (226 . "IM Used")) +     (multiple-choices . (300 . "Multiple Choices")) +     (moved-permanently . (301 . "Moved Permanently")) +     (found . (302 . "Found")) +     (see-other . (303 . "See Other")) +     (not-modified . (304 . "Not Modified")) +     (use-proxy . (305 . "Use Proxy")) +     (temporary-redirect . (307 . "Temporary Redirect")) +     (bad-request . (400 . "Bad Request")) +     (unauthorized . (401 . "Unauthorized")) +     (payment-required . (402 . "Payment Required")) +     (forbidden . (403 . "Forbidden")) +     (not-found . (404 . "Not Found")) +     (method-not-allowed . (405 . "Method Not Allowed")) +     (not-acceptable . (406 . "Not Acceptable")) +     (proxy-authentication-required . (407 . "Proxy Authentication Required")) +     (request-time-out . (408 . "Request Time-out")) +     (conflict . (409 . "Conflict")) +     (gone . (410 . "Gone")) +     (length-required . (411 . "Length Required")) +     (precondition-failed . (412 . "Precondition Failed")) +     (request-entity-too-large . (413 . "Request Entity Too Large")) +     (request-uri-too-large . (414 . "Request-URI Too Large")) +     (unsupported-media-type . (415 . "Unsupported Media Type")) +     (requested-range-not-satisfiable . (416 . "Requested Range Not Satisfiable")) +     (expectation-failed . (417 . "Expectation Failed")) +     (unprocessable-entity . (422 . "Unprocessable Entity")) +     (locked . (423 . "Locked")) +     (failed-dependency . (424 . "Failed Dependency")) +     (upgrade-required . (426 . "Upgrade Required")) +     (precondition-required . (428 . "Precondition Required")) +     (too-many-requests . (429 . "Too Many Requests")) +     (request-header-fields-too-large . (431 . "Request Header Fields Too Large")) +     (internal-server-error . (500 . "Internal Server Error")) +     (not-implemented . (501 . "Not Implemented")) +     (bad-gateway . (502 . "Bad Gateway")) +     (service-unavailable . (503 . "Service Unavailable")) +     (gateway-time-out . (504 . "Gateway Time-out")) +     (http-version-not-supported . (505 . "HTTP Version Not Supported")) +     (insufficient-storage . (507 . "Insufficient Storage")) +     (loop-detected . (508 . "Loop Detected")) +     (not-extended . (510 . "Not Extended")) +     (network-authentication-required . (511 . "Network Authentication Required"))))) + +(define (http-0.9-response-unparser response) +  response) ;; The response-body will just follow + +;; See notes at write-request-line +(define (write-response-line response) +  (display (string-append +            "HTTP/" +            (number->string (response-major response)) "." +            (number->string (response-minor response)) " " +            (->string (response-code response)) " " +            (->string (response-reason response)) "\r\n") +           (response-port response))) + +(define (http-1.0-response-unparser response) +  (and-let* (((= (response-major response) 1)) +             ((= (response-minor response) 0)) +             (o (response-port response))) +    (write-response-line response) +    (unparse-headers (response-headers response) o) +    (display "\r\n" o) +    response)) + +;; XXX This actually unparses anything >= HTTP/1.1 +(define (http-1.x-response-unparser response) +  (and-let* (((or (> (response-major response) 1) +                  (and (= (response-major response) 1) +                       (> (response-minor response) 0)))) +             (o (response-port response))) +    (write-response-line response) +    (unparse-headers (response-headers response) o) +    (display "\r\n" o) +    (if (memq 'chunked (header-values 'transfer-encoding +                                      (response-headers response))) +        (update-response response +                         port: (chunked-output-port (response-port response))) +        response))) + +(define response-unparsers +  (make-parameter (list http-1.x-response-unparser http-1.0-response-unparser))) + +(define (write-response response) +  ;; Try each unparser in turn to write the response-line. +  ;; An unparser returns either #f or a new response object. +  (let loop ((unparsers (response-unparsers))) +    (if (null? unparsers) +        (let ((major (response-major response)) +              (minor (response-minor response))) +          (signal-http-condition +           'write-response +           "Unknown protocol" (list (conc major "." minor)) +           'unknown-protocol 'major major 'minor minor)) +        (or ((car unparsers) response) (loop (cdr unparsers)))))) + +;; Required for chunked requests.  This is a bit of a hack! +(define (finish-response-body response) +  (when (and (memq 'chunked (header-values 'transfer-encoding +                                           (response-headers response))) +             (eq? (##sys#slot (response-port response) 10) 'chunked-output-port)) +    (display "0\r\n\r\n" (##sys#slot (response-port response) 11))) +  response) + +;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;; Response parsing ;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define http-1.x-response-parser +  (let ((resp (irregex '(seq (w/nocase "HTTP/") +                             (=> major (+ digit)) "." (=> minor (+ digit)) +                             ;; Could use '(= 3 digit) for status-code, but +                             ;; that's currently not compilable +                             (+ space) (=> status-code digit digit digit) +                             (+ space) (=> reason-phrase (* nonl)))))) +    (lambda (line in) +      (and-let* ((m (irregex-match resp line)) +                 (code (string->number (irregex-match-substring m 'status-code))) +                 (major (string->number (irregex-match-substring m 'major))) +                 (minor (string->number (irregex-match-substring m 'minor))) +                 ((or (> major 1) (and (= major 1) (> minor 0)))) +                 (reason (irregex-match-substring m 'reason-phrase)) +                 (h (read-headers in)) +                 (port (if (memq 'chunked (header-values 'transfer-encoding h)) +                           (chunked-input-port in) +                           in))) +        (make-response code: code reason: reason +                       major: major minor: minor +                       headers: h +                       port: port))))) + +(define http-1.0-response-parser +  (let ((resp (irregex '(seq (w/nocase "HTTP/1.0") +                             ;; Could use '(= 3 digit) for status-code, but +                             ;; that's currently not compilable +                             (+ space) (=> status-code digit digit digit) +                             (+ space) (=> reason-phrase (* nonl)))))) +    (lambda (line in) +      (and-let* ((m (irregex-match resp line)) +                 (code (string->number (irregex-match-substring m 'status-code))) +                 (reason (irregex-match-substring m 'reason-phrase)) +                 (h (read-headers in))) +        ;; HTTP/1.0 has no chunking +        (make-response code: code reason: reason +                       major: 1 minor: 0 +                       headers: h +                       port: in))))) + +;; You can't "detect" a 0.9 response, because there is no response line. +;; It will simply output the body directly, so we will just assume that +;; if we can't recognise the output string, we just got a 0.9 response. +(define (http-0.9-response-parser line in) +  (make-response code: 200 reason: "OK" +                 major: 0 minor: 9 +                 ;; XXX This is wrong, it re-inserts \r\n, while it may have +                 ;; been a \n only. To work around this, we'd have to write +                 ;; a custom (safe-)read-line procedure. +                 ;; However, it does not matter much because HTTP 0.9 is only +                 ;; defined to ever return text/html, no binary or any other +                 ;; content type. +                 port: (call-with-input-string (string-append line "\r\n") +                         (lambda (str) +                           (make-concatenated-port str in))))) + +(define response-parsers ;; order matters here +  (make-parameter (list http-1.x-response-parser http-1.0-response-parser))) + +(define (read-response inport) +  (let ((line (safe-read-line inport))) +    (and (not (eof-object? line)) +         (let loop ((parsers (response-parsers))) +           (if (null? parsers) +               (signal-http-condition +                'read-response "Unknown protocol line" (list line) +                'unknown-protocol-line 'line line) +               (or ((car parsers) line inport) (loop (cdr parsers)))))))) + +) diff --git a/tests/run.scm b/tests/run.scm new file mode 100644 index 0000000..e2d0b70 --- /dev/null +++ b/tests/run.scm @@ -0,0 +1,1243 @@ +(import scheme chicken.base chicken.port +        chicken.condition chicken.time.posix srfi-1 srfi-18  +        test uri-common intarweb chicken.io chicken.format) + +;; Below, there are specific tests for when these do have a value +(http-header-limit #f) +(http-line-limit #f) +(http-urlencoded-request-data-limit #f) + +(define-syntax test-error* +  (syntax-rules () +    ((_ ?msg (?error-type ...) ?expr) +     (let-syntax ((expression: +                   (syntax-rules () +                     ((_ ?expr) +                      (condition-case (begin ?expr "<no error thrown>") +                                      ((?error-type ...) '(?error-type ...)) +                                      (exn () (##sys#slot exn 1))))))) +       (test ?msg '(?error-type ...) (expression: ?expr)))) +    ((_ ?msg ?error-type ?expr) +     (test-error* ?msg (?error-type) ?expr)) +    ((_ ?error-type ?expr) +     (test-error* (sprintf "~S" '?expr) ?error-type ?expr)))) + +(header-parse-error-handler (lambda (header-name contents header exn) +                              (raise exn))) + +(define (test-read-headers str) +  (call-with-input-string str read-headers)) + +(test-begin "intarweb") +(test-group "headers" +  (test-group "single headers" +   (parameterize ((single-headers '(foo qux)) +                  (header-parsers `((foo . ,(single identity)) +                                    (qux . ,(single identity))))) +     (let ((headers (test-read-headers "foo: bar\r\nqux:\t   \tmooh\t   \r\n\r\n"))) +       (test "Basic test" +             '("bar") (header-values 'foo headers)) +       ;; RFC 2616 4.2 +       (test "Extra spaces are ignored" +             '("mooh") (header-values 'qux headers))) +     (let ((headers (test-read-headers "foo: bar\r\n qux: mooh\r\nquux: mumble\r\n\r\n"))) +       ;; RFC 2616 2.2 +       (test "Continuation chars" +             '("bar qux: mooh") (header-values 'foo headers))) +     ;; Not in RFC but common behaviour - also, robustness principle +     (let ((headers (test-read-headers "foo: bar\r\nfoo: qux\r\n"))) +       (test "Multiple headers for singular header types discarded" +             '("qux") (header-values 'foo headers))))) +  ;; All this RFC 2616 4.2 +  (test-group "multi-headers" +   (parameterize ((header-parsers `((foo . ,(multiple identity))))) +     (let ((headers (test-read-headers "foo: bar\r\nfoo: qux\r\nquux: mumble\r\n\r\n"))) +       (test "Multiple headers" +             '("bar" "qux") (header-values 'foo headers))) +     (let ((headers (test-read-headers "Foo: bar\r\nFoO: qux\r\nquux: mumble\r\n\r\n"))) +       (test "Multiple headers: case insensitivity" +             '("bar" "qux") (header-values 'foo headers))) +     (let ((headers (test-read-headers "foo: bar, qux\r\nquux: mumble\r\n\r\n"))) +       (test "Comma-separated headers" +             '("bar" "qux") (header-values 'foo headers))) +     (let ((headers (test-read-headers "foo: \"ba\\\"r, qux\"\r\nfoo: mooh\r\n\r\n"))) +       (test "Quoted headers" +             '("ba\"r, qux" "mooh") (header-values 'foo headers)))) +   ;; RFC 2616 4.5 +   ;; "Unrecognized header fields are treated as entity-header fields." +   ;; +   ;; RFC 2616 7.1 +   ;; "Unrecognized header fields SHOULD be ignored by the recipient and MUST be +   ;;  forwarded by transparent proxies." +   (let ((headers (test-read-headers "unknown: foo, bar\r\nunknown: blah\r\n\r\n"))) +     (test "Unknown headers are not parsed and put into lists" +           '("foo, bar" "blah") (header-values 'unknown headers)) +     (test "Unknown headers get raw instead of a parameter list" +           'raw (header-params 'unknown headers)))) +  (test-group "miscellaneous header stuff" +    (parameterize ((header-parsers `((foo . ,(multiple identity)) +                                     (bar . ,(lambda x (error "bad header"))))) +                   (http-header-limit 2)) +      (test-error "Missing header contents" (test-read-headers "foo\r\n\r\n")) +      (test-error "Bad header w/ handler" (test-read-headers "bar: x\r\n\r\n")) +      (parameterize ((header-parse-error-handler (lambda (n c h exn) h))) +       (test "Bad header w/o handler" #t (headers? (test-read-headers "bar: x\r\n\r\n")))) +      ;; RFC 2616 2.2 +      ;; "The backslash character ("\") MAY be used as a single-character +      ;; quoting mechanism only within quoted-string and comment constructs." +      ;;     quoted-pair = "\" CHAR +      ;; CHAR implies any char, *including* CR/LF. This is clarified by RFC 822, +      ;; on which RFC 2616 is based. +      ;; Apparently, even \CRLF is allowed (as opposed to \CR\LF) +      (test "Embedded newlines" +            '("bar\r\nqux") +            ;; It's unclear whether we should interpret the "\r\n" as EOL +            ;; in "\\\r\n", or whether it should be seen as an embedded \r +            ;; followed by a \n (which is then interpreted as a literal \n?) +            (header-values 'foo (test-read-headers "Foo: \"bar\\\r\\\nqux\""))) +      (test-error "Too many headers is an error" +                  (test-read-headers "foo: bar\r\nfoo: qux\r\nfoo: hoohoo\r\n"))))) + +(test-group "specialized header parsers" +  (test-group "host/port" +    (test "Hostname and port" +          '(("foo.example.com" . 8080)) +          (header-values 'host (test-read-headers "Host: foo.example.com:8080"))) +    (test "Hostname, no port" +          '(("foo.example.com" . #f)) +          (header-values 'host (test-read-headers "Host: foo.example.com")))) +  (test-group "quality parameter" +   (let* ((headers (test-read-headers "Accept: text/plain; Q=0.5, text/html, text/plain; q=0.123456, application/pdf; q=1.2345, text/xml; q=-0.234, text/whatever; q=")) +          (accept  (header-contents 'accept headers))) +     ;; RFC 2616 3.6: "All transfer-coding values are case insensitive". +     ;; This includes the parameter name (attribute) and value. +     (test "quality value (case-insensitive)" +           0.5 (get-param 'q (first accept) 1.0)) +     (test "quality encoding value" +           'text/plain (get-value (first accept))) +     (test "quality values have only three digits" +           0.123 (get-param 'q (third accept) 1.0)) +     (test "quality values maximum is 1.0" +           1.0 (get-param 'q (fourth accept) 1.0)) +     (test "quality values minimum is 0.0" +           0.0 (get-param 'q (fifth accept) 1.0)) +     (test "missing quality value ok" +           1.0 (get-param 'q (sixth accept) 1.0)))) +  (test-group "charset parameter" +   (let* ((headers (test-read-headers "Content-Type: text/PLAIN; charset=ISO-8859-1")) +          (content-type (header-contents 'content-type headers))) +     (test "content-type value is lowercase symbol" +           'text/plain (get-value (car content-type))) +     ;; RFC 2616 3.4: "HTTP character sets are identified by +     ;; case-insensitive tokens. The complete set of tokens is defined +     ;; by the IANA Character Set registry." +     (test "content-type charset is lowercase symbol" +           'iso-8859-1 (get-param 'charset (car content-type))))) + +  (test-group "symbol-parser-ci" +    (let* ((headers (test-read-headers "Accept-Ranges: FoO"))) +      (test "Case-insensitive" +            '(foo) (header-values 'accept-ranges headers)))) +   +  (test-group "symbol-parser" +    (let* ((headers (test-read-headers "Allow: FoO, foo"))) +      (test "Case-sensitive" +            '(FoO foo) (header-values 'allow headers)))) + +  (test-group "natnum-subparser" +    (parameterize ((single-headers '(foo bar qux mooh)) +                   (header-parsers `((foo . ,(single natnum-subparser)) +                                     (bar . ,(single natnum-subparser)) +                                     (qux . ,(single natnum-subparser)) +                                     (mooh . ,(single natnum-subparser))))) +     (let ((headers (test-read-headers "Foo: 10\r\nBar: abc\r\nQux: -10\r\nMooh: 1.6"))) +       (test "Simple test" +             10 (header-value 'foo headers)) +       (test "No number defaults to 0" +             0 (header-value 'bar headers)) +       (test "No negative numbers" +             0 (header-value 'qux headers)) +       ;; This is a "feature" in the interest of the robustness principle +       (test "Rounding of real numbers" +             2 (header-value 'mooh headers))))) + +  (test-group "cache-control-parser" +    (let ((headers (test-read-headers "Cache-control: max-age=10, private"))) +      (test "max-age is a number" +            '(max-age . 10) (assq 'max-age (header-values 'cache-control headers))) +      (test "private without value" +            '(private . #t) (assq 'private (header-values 'cache-control headers)))) +    (let ((headers (test-read-headers "Cache-control: private=\"accept-encoding, accept-ranges\"\r\nCache-control: must-revalidate"))) +      (test "private with values" +            '(private . (accept-encoding accept-ranges)) +            (assq 'private (header-values 'cache-control headers))) +      (test "Acts like a multi-header" +            '(must-revalidate . #t) (assq 'must-revalidate (header-values 'cache-control headers))))) + +  (test-group "authorization-parser" +    (test-group "basic auth" +     (let ((headers (test-read-headers "Authorization: Basic QWxpIEJhYmE6b3BlbiBzZXNhbWU=\r\n"))) +       (test "basic" +             'basic +             (header-value 'authorization headers)) +       (test "username" +             "Ali Baba" +             (header-param 'username 'authorization headers)) +       (test "password" +             "open sesame" +             (header-param 'password 'authorization headers)))) +    (test-group "digest auth" +      (let ((headers (test-read-headers "Authorization: Digest username=\"Mufasa\", realm=\"testrealm@host.com\", nonce=\"dcd98b7102dd2f0e8b11d0f600bfb0c093\", uri=\"/dir/index.html\", qop=auth, nc=00000001, cnonce=\"0a4f113b\", response=\"6629fae49393a05397450978507c4ef1\", opaque=\"5ccc069c403ebaf9f0171e9517f40e41\", algorithm=MD5"))) +        (test "digest" +              'digest +              (header-value 'authorization headers)) +        (test "realm" +              "testrealm@host.com" +              (header-param 'realm 'authorization headers)) +        (test "nonce" +              "dcd98b7102dd2f0e8b11d0f600bfb0c093" +              (header-param 'nonce 'authorization headers)) +        (test "username" +              "Mufasa" +              (header-param 'username 'authorization headers)) +        (test "qop" +              'auth +              (header-param 'qop 'authorization headers)) +        (test "digest uri" +              "/dir/index.html" +              (uri->string (header-param 'uri 'authorization headers))) +        (test "nonce count" +              1 +              (header-param 'nc 'authorization headers)) +        (test "cnonce" +              "0a4f113b" +              (header-param 'cnonce 'authorization headers)) +        (test "response" +              "6629fae49393a05397450978507c4ef1" +              (header-param 'response 'authorization headers)) +        (test "opaque" +              "5ccc069c403ebaf9f0171e9517f40e41" +              (header-param 'opaque 'authorization headers)) +        (test "algorithm" +              'md5 +              (header-param 'algorithm 'authorization headers)))) +    (test-group "custom authorization scheme" +      (parameterize ((authorization-param-subparsers +                      `((custom . ,(lambda (contents pos) +                                     (receive (c p) +                                       (parse-token contents pos) +                                       (values `((contents . ,(http-name->symbol c))) p)))) +                        . ,(authorization-param-subparsers)))) +        (let ((headers (test-read-headers "Authorization: Custom Security-through-obscurity"))) +          (test "Custom" +                'custom +                (header-value 'authorization headers)) +          (test "Custom contents" +                'security-through-obscurity +                (header-param 'contents 'authorization headers)))))) +   +  (test-group "authenticate parser" +    (test-group "basic auth" +      (let ((headers (test-read-headers "WWW-Authenticate: Basic realm=\"WallyWorld\""))) +        (test "basic" +              'basic +              (header-value 'www-authenticate headers)) +        (test "realm" +              "WallyWorld" +              (header-param 'realm 'www-authenticate headers)))) +    (test-group "digest auth" +      (let ((headers (test-read-headers "WWW-Authenticate: Digest realm=\"testrealm@host.com\", qop=\"auth, auth-int\", nonce=\"dcd98b7102dd2f0e8b11d0f600bfb0c093\", opaque=\"5ccc069c403ebaf9f0171e9517f40e41\""))) +        (test "digest" +              'digest +              (header-value 'www-authenticate headers)) +        (test "realm" +              "testrealm@host.com" +              (header-param 'realm 'www-authenticate headers)) +        (test "qop" +              '(auth auth-int) +              (header-param 'qop 'www-authenticate headers)) +        (test "nonce" +              "dcd98b7102dd2f0e8b11d0f600bfb0c093" +              (header-param 'nonce 'www-authenticate headers)) +        (test "opaque" +              "5ccc069c403ebaf9f0171e9517f40e41" +              (header-param 'opaque 'www-authenticate headers)) +        (test "missing stale value" +              #f +              (header-param 'stale 'www-authenticate headers))) +      (let ((headers (test-read-headers "WWW-Authenticate: Digest domain=\"/example http://foo.com/bar\", stale=TRUE"))) +        (test "domains" +              '("/example" "http://foo.com/bar") +              (map uri->string +                   (header-param 'domain 'www-authenticate headers))) +        (test "stale" +              #t +              (header-param 'stale 'www-authenticate headers))) +      (let ((headers (test-read-headers "WWW-Authenticate: Digest stale=whatever"))) +        (test "non-true stale value" +              #f +              (header-param 'stale 'www-authenticate headers))))) +   +  (test-group "pragma-parser" +    (let ((headers (test-read-headers "Pragma: custom-value=10, no-cache"))) +      (test "value" +            '(custom-value . "10") +            (assq 'custom-value (header-values 'pragma headers))) +      (test "no value" +            '(no-cache . #t) (assq 'no-cache (header-values 'pragma headers)))) +    (let ((headers (test-read-headers "Cache-control: private=\"accept-encoding, accept-ranges\"\r\nCache-control: must-revalidate"))) +      (test "private with values" +            '(private . (accept-encoding accept-ranges)) +            (assq 'private (header-values 'cache-control headers))) +      (test "Acts like a multi-header" +            '(must-revalidate . #t) (assq 'must-revalidate (header-values 'cache-control headers))))) + +  ;; RFC 2616, 14.15  &  RFC 1864 (Base64) +  (test-group "base64-parser" +    (let ((headers (test-read-headers "Content-md5: Q2hlY2sgSW50ZWdyaXR5IQ=="))) +      (test "md5 is base64-decoded" +            "Check Integrity!" +            (header-value 'content-md5 headers)))) + +  (test-group "range-parser" +    (let ((headers (test-read-headers "content-range: bytes 500-999/1234"))) +      (test "Simple range" +            '(500 999 1234) +            (header-value 'content-range headers)))) + +  (test-group "content-disposition" +    (let ((headers (test-read-headers "Content-Disposition: attachment; filename=dir/foo.jpg"))) +      (test "Attachment with filename parameter containing directory" +            `(attachment (filename . "foo.jpg")) +            (cons (header-value  'content-disposition headers) +                  (header-params 'content-disposition headers)))) +    (let ((headers (test-read-headers "Content-Disposition: inline; filename=foo.jpg; creation-date=Sun, 06 Nov 1994 08:49:37 GMT"))) +      (test "Inline with filename and (not quoted) creation-date parameter" +            `(inline +              (filename . "foo.jpg") +              (creation-date . ,(utc-time->seconds '#(37 49 08 06 10 94 0 310 #f 0)))) +            (cons (header-value  'content-disposition headers) +                  (map (lambda (x) +                         (if (vector? (cdr x)) +                             (cons (car x) (utc-time->seconds (cdr x))) +                             x)) +                       (header-params 'content-disposition headers))))) +    (let ((headers (test-read-headers "Content-Disposition: inline; read-date=\"Sun, 06 Nov 1994 08:49:37 GMT\"; size=100"))) +      (test "Inline with size and (quoted) read-date parameter" +            `(inline +              (read-date . ,(utc-time->seconds '#(37 49 08 06 10 94 0 310 #f 0))) +              (size . 100)) +            (cons (header-value  'content-disposition headers) +                  (map (lambda (x) +                         (if (vector? (cdr x)) +                             (cons (car x) (utc-time->seconds (cdr x))) +                             x)) +                       (header-params 'content-disposition headers)))))) + +  (test-group "normalized-uri" +    (let ((headers (test-read-headers "Location: http://example.com/foo"))) +      (test "Uri" +            (uri-reference "http://example.com/foo") +            (header-value 'location headers))) +    (let ((headers (test-read-headers "Location: http://example.com/foo/../bar"))) +     (test "Auto-normalization" +           (uri-reference "http://example.com/bar") +           (header-value 'location headers)))) + +  (test-group "etag-parser" +    (let ((headers (test-read-headers "Etag: \"foo\""))) +      (test "Strong tag" +            '(strong . "foo") +            (header-value 'etag headers))) +    (let ((headers (test-read-headers "Etag: W/\"bar\""))) +      (test "Weak tag" +            '(weak . "bar") +            (header-value 'etag headers))) +    (let ((headers (test-read-headers "Etag: \"\""))) +      (test "Empty tag" +            '(strong . "") +            (header-value 'etag headers))) +    (let ((headers (test-read-headers "Etag: \"W/bar\""))) +        (test "Strong tag, containing W/ prefix" +              '(strong . "W/bar") +              (header-value 'etag headers)))) + +  (test-group "if-match parser" +    (let ((headers (test-read-headers "If-match: foo"))) +      (test "Strong etag" +            '(strong . "foo") +            (header-value 'if-match headers))) +    (let ((headers (test-read-headers "If-match: W/foo"))) +      (test "Weak etag" +            '(weak . "foo") +            (header-value 'if-match headers))) +    (let ((headers (test-read-headers "If-match: W/foo bar"))) +      (test "Multiple etags" +            '((weak . "foo") (strong . "bar")) +            (header-values 'if-match headers))) +    (let ((headers (test-read-headers "If-match: *"))) +      (test "Wildcard" +            '* +            (header-value 'if-match headers)))) + +  (test-group "http-date-parser" +    (let ((headers (test-read-headers "Date: Sun, 06 Nov 1994 08:49:37 GMT"))) +      (test "RFC1123 time" +            (utc-time->seconds '#(37 49 08 06 10 94 0 310 #f 0)) +            (utc-time->seconds (header-value 'date headers)))) +    (let ((headers (test-read-headers "Date: Sunday, 06-Nov-94 08:49:37 GMT"))) +      (test "RFC850 time" +            (utc-time->seconds '#(37 49 08 06 10 94 0 310 #f 0)) +            (utc-time->seconds (header-value 'date headers)))) +    (let ((headers (test-read-headers "Date: Sun Nov  6 08:49:37 1994"))) +      (test "asctime time" +            (utc-time->seconds '#(37 49 08 06 10 94 0 310 #f 0)) +            (utc-time->seconds (header-value 'date headers))))) + +  ;; This seems a little excessive.. Maybe find a way to reduce the number +  ;; of cases and still have a good representative test? +  (test-group "If-Range parser" +    (let ((headers (test-read-headers "If-Range: Sun, 06 Nov 1994 08:49:37 GMT"))) +      (test "RFC1123 time" +            (utc-time->seconds '#(37 49 08 06 10 94 0 310 #f 0)) +            (utc-time->seconds (header-value 'if-range headers)))) +    (let ((headers (test-read-headers "If-Range: Sunday, 06-Nov-94 08:49:37 GMT"))) +      (test "RFC850 time" +            (utc-time->seconds '#(37 49 08 06 10 94 0 310 #f 0)) +            (utc-time->seconds (header-value 'if-range headers)))) +    (let ((headers (test-read-headers "If-Range: Sun Nov  6 08:49:37 1994"))) +      (test "asctime time" +            (utc-time->seconds '#(37 49 08 06 10 94 0 310 #f 0)) +            (utc-time->seconds (header-value 'if-range headers)))) +    (let ((headers (test-read-headers "If-Range: \"foo\""))) +      (test "Strong Etag" +            '(strong . "foo") +            (header-value 'if-range headers))) +    (let ((headers (test-read-headers "If-Range: W/\"bar\""))) +      (test "Weak Etag" +            '(weak . "bar") +            (header-value 'if-range headers))) +    (let ((headers (test-read-headers "If-Range: \"\""))) +      (test "Empty Etag" +            '(strong . "") +            (header-value 'if-range headers))) +    (let ((headers (test-read-headers "If-Range: \"W/bar\""))) +        (test "Strong Etag, containing W/ prefix" +              '(strong . "W/bar") +              (header-value 'if-range headers)))    ) + +  (test-group "via parser" +    (let ((headers (test-read-headers "Via: 1.1"))) +      (test "simple" +            '("1.1") +            (header-values 'via headers))) +    (let ((headers (test-read-headers "Via: 1.1 foo:80 (comment)"))) +      (test "complex" +            '("1.1 foo:80 (comment)") +            (header-values 'via headers))) +    (let ((headers (test-read-headers "Via: 1.1 foo"))) +      (test "one hop" +            '("1.1 foo") +            (header-values 'via headers))) +    (let ((headers (test-read-headers "Via: 1.1 foo, 1.0 bar"))) +      (test "two hops" +            '("1.1 foo" "1.0 bar") +            (header-values 'via headers)))) + +  (test-group "product parser" +    (test "Simple product" +          '("websocket" . #f) +          (header-value 'upgrade (test-read-headers "Upgrade: websocket\r\n"))) +    (test "Product with version" +          '("TLS" . "1.0") +          (header-value 'upgrade (test-read-headers "Upgrade: TLS/1.0\r\n")))) + +  (test-group "software parser" +    (test "Simple product" +          '(("Mozilla" "5.0" #f)) +          (header-value 'user-agent (test-read-headers "User-Agent: Mozilla/5.0\r\n"))) +    (test "Product with comment" +          '(("Mozilla" #f "foo")) +          (header-value 'user-agent (test-read-headers "User-Agent: Mozilla (foo)\r\n")))     +    (test "Realistic product (comments, semicolons)" +          '(("Mozilla" "5.0" "X11; U; NetBSD amd64; en-US; rv:1.9.0.3") ("Gecko" "2008110501" #f) ("Minefield" "3.0.3" #f)) +          (header-value 'user-agent (test-read-headers "User-Agent: Mozilla/5.0 (X11; U; NetBSD amd64; en-US; rv:1.9.0.3) Gecko/2008110501 Minefield/3.0.3\r\n"))) +    ;; Reported by Peter Danenberg; Google Drive returns this header +    (test "Realistic product (quoted comment)" +          '(("UploadServer" #f "Built on May 4 2015 17:31:43 (1430785903)")) +          (header-value 'server (test-read-headers "Server: UploadServer (\"Built on May 4 2015 17:31:43 (1430785903)\")\r\n")))) + +  (test-group "Set-Cookie parser" +    (let* ((headers (test-read-headers "Set-Cookie: foo=\"bar\""))) +      (test "Simple name/value pair" +            '("foo" . "bar") +            (get-value (first (header-contents 'set-cookie headers))))) +    (let* ((headers (test-read-headers "Set-Cookie: foo=qux\r\nSet-Cookie: Foo=\"bar\""))) +      ;; XXX: Should intarweb remove these, or should the user code handle this? +      ;; What if interacting with actual broken code on the other side? +      (test "Multiple cookies with same name (CI) are all kept" +            '(("foo" . "qux") ("Foo" . "bar")) +            (map get-value (header-contents 'set-cookie headers)))) +    (let* ((headers (test-read-headers "Set-Cookie: Foo=bar"))) +      (test "Cookie names preserve case" +            '("Foo" . "bar") +            (get-value (first (header-contents 'set-cookie headers))))) +    (let ((headers (test-read-headers "Set-Cookie: foo=bar=qux; max-age=10"))) +      (test "Cookie with = signs" +            '("foo" . "bar=qux") +            (get-value (first (header-contents 'set-cookie headers))))) +    (let* ((headers (test-read-headers "Set-Cookie: foo=bar; Comment=\"Hi, there!\", qux=mooh\r\nSet-Cookie: mumble=mutter\r\n"))) +      (test "Comment" +            "Hi, there!" +            (get-param 'comment +                       (first (header-contents 'set-cookie headers)))) +      (test "Multiple cookies in one header" +            '("qux" . "mooh") +            (get-value (second (header-contents 'set-cookie headers)))) +      (test "Multiple cookies in multiple headers" +            '("mumble" . "mutter") +            (get-value (third (header-contents 'set-cookie headers)))) +      (test "Missing \"secure\" value" +            #f +            (get-param 'secure +                       (third (header-contents 'set-cookie headers))))) +    (let* ((headers (test-read-headers "Set-Cookie: foo=; expires=Sunday, 20-Jul-08 15:23:42 GMT; secure; path = / ; Port=80,8080"))) +      (test "Missing value" +            '("foo" . "") +            (get-value (first (header-contents 'set-cookie headers)))) +      (test "Old-style cookie expires value" +            (utc-time->seconds '#(42 23 15 20 6 108 0 309 #f 0)) +            (utc-time->seconds +             (get-param 'expires +                        (first (header-contents 'set-cookie headers))))) +      (test "Secure value" +            #t +            (get-param 'secure +                       (first (header-contents 'set-cookie headers)))) +      (test "Path" +            (uri-reference "/") +            (get-param 'path +                       (first (header-contents 'set-cookie headers)))) +      (test "Port numbers" +            '(80 8080) +            (get-param 'port +                       (first (header-contents 'set-cookie headers))))) +    (let* ((headers (test-read-headers "Set-Cookie: foo=; expires=Sun, 20 Jul 2008 15:23:42 GMT; secure; path = / "))) +      (test "Noncompliant syntax cookie expiry value (rfc1123)" +            (utc-time->seconds '#(42 23 15 20 6 108 0 309 #f 0)) +            (utc-time->seconds +             (get-param 'expires +                        (first (header-contents 'set-cookie headers)))))) +    (let* ((headers (test-read-headers "Set-Cookie: foo=; expires=Sun, 20-Jul-2008 15:23:42 GMT; secure; path = / "))) +      (test "Noncompliant syntax cookie expiry value (rfc850-like, abbrev day)" +            (utc-time->seconds '#(42 23 15 20 6 108 0 309 #f 0)) +            (utc-time->seconds +             (get-param 'expires +                        (first (header-contents 'set-cookie headers))))))) +   +  (test-group "cookie-parser" +    (let* ((headers (test-read-headers "Cookie: Foo=bar; $Path=/; qux=mooh; $unknown=something"))) +      (test "Multiple cookies in the same header" +            '(("Foo" . "bar") . ("qux" . "mooh")) +            (cons +             (get-value (first  (header-contents 'cookie headers))) +             (get-value (second (header-contents 'cookie headers))))) +      (test "Parameters of cookies (spaces stripped)" +            (uri-reference "/") +            (get-param 'path (first (header-contents 'cookie headers)))) +      (test "Parameters of cookies" +            "something" +            (get-param 'unknown (second (header-contents 'cookie headers))))) +    (let* ((headers (test-read-headers "Cookie: $Version=\"1\"; Foo=bar; $Path=/; qux=mooh; $unknown=something"))) +      (test "Version string is used for all cookies" +            (cons 1 1) +            (cons +             (get-param 'version (first (header-contents 'cookie headers))) +             (get-param 'version (second (header-contents 'cookie headers))))))) + +  (test-group "strict-transport-security-parser" +    (let ((headers (test-read-headers "Strict-Transport-Security: max-age=10; includeSubDomains"))) +      (test "max-age is a number" +            '(max-age . 10) +            (assq 'max-age (header-value 'strict-transport-security headers))) +      (test "includeSubDomains without value" +            '(includesubdomains . #t) +            (assq 'includesubdomains (header-value 'strict-transport-security headers))))) + +  (test-group "headers" +    (test "Simple test" +          `(bar qux) +          (header-values 'foo (headers `((foo bar qux))))) +    (test "Multi headers are folded" +          `(bar qux) +          (header-values 'foo (headers `((foo bar) +                                         (foo qux))))) +    (test "Single headers are unique" +          `(qux) +          (header-values 'foo (parameterize ((single-headers '(foo))) +                                (headers `((foo bar) +                                           (foo qux)))))) +    (test "Extra single headers are ignored" +          `(qux) +          (header-values 'foo (parameterize ((single-headers '(foo))) +                                (headers `((foo bar qux)))))) +    (test "Parameters" +          `((bar . qux)) +          (get-params +           (car (header-contents 'foo (headers `((foo #(mooh ((bar . qux)))))))))) +    (test "Multi headers are folded into old headers" +          `(bar qux) +          (header-values 'foo (headers `((foo qux)) +                                       (headers `((foo bar)))))))) + +(define (test-unparse-headers h) +  (call-with-output-string +   (lambda (o) +     (unparse-headers (headers h) o)))) + +(test-group "unparsers" +  (test-group "default unparser" +    (test "String" +          "Foo: bar\r\n" +          (test-unparse-headers `((foo "bar")))) +    (test "Multiple strings" +          "Foo: bar, qux\r\n" +          (test-unparse-headers `((foo "bar" "qux")))) +    (test "Auto-quoting on commas and whitespace" +          "Foo: \"bar, qux\", \"mooh blah\"\r\n" +          (test-unparse-headers `((foo "bar, qux" "mooh blah")))) +    ;; RFC 2616 2.2 +    (test "Escaping quotes" +          "Foo: \"bar \\\" qux\", mooh\r\n" +          (test-unparse-headers `((foo "bar \" qux" "mooh")))) +    (test "Escaping control characters" +          "Foo: \"bar\\\r\\\x01qux\"\r\n" +          (test-unparse-headers `((foo "bar\r\x01qux")))) +    ;; Unfortunately, there are no or very few HTTP implementations +    ;; which understand that newlines can be escaped with a backslash +    ;; in a quoted string. That's why we don't allow it. +    ;; The user is expected to escape the newlines according to the type +    ;; of header (URLencoding, removing the newlines from cookies, etc) +    (test-error* "Embedded newlines throw an error" +                 (exn http unencoded-header) +                 (test-unparse-headers `((foo "bar\n\x01qux")))) +    (test "Alist" +          "Foo: Bar=qux, Mooh=mumble\r\n" +          (test-unparse-headers `((foo (bar . qux) (mooh . mumble))))) +    (test "Alist with escapes" +          "Foo: Bar=qux, Mooh=\"mum, ble\"\r\n" +          (test-unparse-headers `((foo (bar . "qux") (mooh . "mum, ble"))))) +    (test "URI" +          "Foo: http://foo.com/bar;xyz?a=b\r\n" +          (test-unparse-headers `((foo ,(uri-reference "http://foo.com/bar;xyz?a=b"))))) +    (test "Parameters" +          "Foo: bar; qux=mooh; mumble=mutter; blah\r\n" +          (test-unparse-headers `((foo #(bar ((qux . mooh) +                                              (mumble . mutter) +                                              (blah . #t) +                                              (feh . #f))))))) +    (test "Raw headers are unparsed as-is" +          "Foo: bla bla; whatever \"ohai\"\r\n" +          (test-unparse-headers `((foo #("bla bla; whatever \"ohai\"" raw))))) +    (test "Raw headers are unparsed as-is for known headers, too" +          "Etag: \"hi there\r\n" ;; unclosed quote is intentional here +          (test-unparse-headers `((etag #("\"hi there" raw))))) +    (test-error* "Embedded newlines in raw headers also throw an error" +                 (exn http unencoded-header) +                 (test-unparse-headers `((foo #("bar\n\x01qux" raw)))))) +  (test-group "etag unparser" +    (test "Weak tag" +          "Etag: W/\"blah\"\r\n" +          (test-unparse-headers `((etag (weak . "blah"))))) +    (test "Strong tag" +          "Etag: \"blah\"\r\n" +          (test-unparse-headers `((etag (strong . "blah"))))) +    (test "Strong tag starting with W/" +          "Etag: \"W/blah\"\r\n" +          (test-unparse-headers `((etag (strong . "W/blah")))))) +  (test-group "if-match unparser" +    (test "List of etags" +          "If-Match: \"foo\", \"bar\", W/\"qux\"\r\n" +          (test-unparse-headers +           `((if-match (strong . "foo") (strong . "bar") (weak . "qux"))))) +    (test "Wildcard" +          "If-Match: *\r\n" +          (test-unparse-headers +           `((if-match (strong . "foo") * (weak . "qux")))))) +  ;; http-dates are all deserialized as rfc1123 +  (test-group "date/time unparser" +    (test "RFC1123 time" +          "If-Modified-Since: Sun, 06 Nov 1994 08:49:37 GMT\r\n" +          ;; Having to specify a vector here twice is sucky and counter-intuitive +          (test-unparse-headers +           `((if-modified-since #(#(37 49 08 06 10 94 0 310 #f 0) ())))))) +  (test-group "host/port unparser" +    (test "No port specified" +          "Host: foo.example.com\r\n" +          (test-unparse-headers `((host ("foo.example.com" . #f))))) +    (test "Different port" +          "Host: foo.example.com:8080\r\n" +          (test-unparse-headers `((host ("foo.example.com" . 8080)))))) +  (test-group "product unparser" +    (test "Products without version" +          "Upgrade: websocket, foo\r\n" +          (test-unparse-headers `((upgrade ("websocket" . #f) ("foo" . #f))))) +    (test "Products with version" +          "Upgrade: TLS/1.0, bar/2\r\n" +          (test-unparse-headers `((upgrade ("TLS" . "1.0") ("bar" . "2")))))) +  (test-group "software unparser" +    (test "Product with comments" +          "User-Agent: Mozilla (X11) Gecko/2008110501\r\n" +          (test-unparse-headers `((user-agent (("Mozilla" #f "X11") ("Gecko" "2008110501" #f)))))) +    (test "Realistic product" +          "User-Agent: Mozilla/5.0 (X11; U; NetBSD amd64; en-US; rv:1.9.0.3) Gecko/2008110501 Minefield/3.0.3\r\n" +          (test-unparse-headers `((user-agent (("Mozilla" "5.0" "X11; U; NetBSD amd64; en-US; rv:1.9.0.3") ("Gecko" "2008110501" #f) ("Minefield" "3.0.3" #f))))))) +  (test-group "cookie unparser" +    (test "Basic cookie" +          "Cookie: foo=bar; $Path=/; Qux=mooh; $Unknown=something\r\n" +          (test-unparse-headers `((cookie #(("foo" . "bar") +                                            ((path . ,(uri-reference "/")))) +                                          #(("Qux" . "mooh") +                                            ((unknown . "something"))))))) +    (test "Port list" +          "Cookie: Foo=bar; $Port=80,8080\r\n" +          (test-unparse-headers `((cookie #(("Foo" . "bar") +                                            ((port . (80 8080)))))))) +    (test "#t or #f values" +          "Cookie: Foo=bar; $Port\r\n" +          (test-unparse-headers `((cookie #(("Foo" . "bar") +                                            ((port . #t) (domain . #f)))))))) +  (test-group "Set-Cookie unparser" +    (test "Simple name/value pair" +          "Set-Cookie: foo=\"bar with space\"\r\n" +          (test-unparse-headers `((set-cookie ("foo" . "bar with space"))))) +    ;; XXX: Should intarweb remove these, or should the user code handle this? +    ;; What if interacting with actual broken code on the other side? +    (test "Multiple cookies with same name (CI) are all written" +          "Set-Cookie: foo=qux\r\nSet-Cookie: Foo=bar\r\n" +          (test-unparse-headers `((set-cookie ("foo" . "qux") ("Foo" . "bar"))))) +    (test "Cookie names preserve case" +          "Set-Cookie: Foo=bar\r\n" +          (test-unparse-headers `((set-cookie ("Foo" . "bar"))))) +    (test "Cookie with = signs" +          "Set-Cookie: foo=\"bar=qux\"; Max-Age=10\r\n" +          (test-unparse-headers `((set-cookie #(("foo" . "bar=qux") ((max-age . 10))))))) +    (test "Comment" +          "Set-Cookie: foo=bar; Comment=\"Hi, there!\"\r\n" +          (test-unparse-headers `((set-cookie #(("foo" . "bar") +                                                ((comment . "Hi, there!"))))))) +    (test "Old-style cookie expires value" +          "Set-Cookie: foo=; Expires=Sunday, 20-Jul-08 15:23:42 GMT\r\n" +          (test-unparse-headers `((set-cookie #(("foo" . "") +                                                ((expires . #(42 23 15 20 6 108 0 309 #f 0))))))))     +    (test "Secure (true)" +          "Set-Cookie: foo=bar; Secure\r\n" +          (test-unparse-headers `((set-cookie #(("foo" . "bar") +                                                ((secure . #t))))))) +    (test "Secure (false)" +          "Set-Cookie: foo=bar\r\n" +          (test-unparse-headers `((set-cookie #(("foo" . "bar") +                                                ((secure . #f))))))) + +    (test "Path" +          "Set-Cookie: foo=bar; Path=/blah\r\n" +          (test-unparse-headers `((set-cookie #(("foo" . "bar") +                                                ((path . ,(uri-reference "/blah")) +                                                 (secure . #f))))))))   +  (test-group "authorization unparser" +    (test "Basic auth" +          "Authorization: Basic QWxpIEJhYmE6b3BlbiBzZXNhbWU=\r\n" +          (test-unparse-headers +           `((authorization #(basic +                              ((username . "Ali Baba") +                               (password . "open sesame"))))))) +    (test-error* "Basic auth with colon in username" +                 (exn http username-with-colon) +                 (test-unparse-headers +                  `((authorization #(basic +                                     ((username . "foo:bar") +                                      (password . "qux"))))))) +    (test "Digest auth" +          "Authorization: Digest username=\"Mufasa\", realm=\"testrealm@host.com\", nonce=\"dcd98b7102dd2f0e8b11d0f600bfb0c093\", uri=\"/dir/index.html\", qop=\"auth\", cnonce=\"0a4f113b\", response=\"6629fae49393a05397450978507c4ef1\", opaque=\"5ccc069c403ebaf9f0171e9517f40e41\", nc=00000001, algorithm=\"md5\"\r\n" +          (test-unparse-headers +           `((authorization #(digest +                              ((username . "Mufasa") +                               (realm . "testrealm@host.com") +                               (nonce . "dcd98b7102dd2f0e8b11d0f600bfb0c093") +                               (uri . ,(uri-reference "/dir/index.html")) +                               (qop . auth) +                               (cnonce . "0a4f113b") +                               (response . "6629fae49393a05397450978507c4ef1") +                               (opaque . "5ccc069c403ebaf9f0171e9517f40e41") +                               (nc . 1) +                               (algorithm . md5))))))) +    (test "Custom auth" +          "Authorization: Custom some-random-contents\r\n" +          (parameterize ((authorization-param-subunparsers +                          `((custom . ,(lambda (params) +                                         (alist-ref 'contents params))) +                            . ,(authorization-param-subparsers)))) +            (test-unparse-headers +             `((authorization #(custom ((contents . some-random-contents))))))))) + +  (test-group "authenticate unparser" +    (test-group "basic auth" +      (test "basic" +            "Www-Authenticate: Basic realm=\"WallyWorld\"\r\n" +            (test-unparse-headers +             `((www-authenticate #(basic +                                   ((realm . "WallyWorld")))))))) +    (test-group "digest auth" +      (test "digest" +            "Www-Authenticate: Digest realm=\"testrealm@host.com\", qop=\"auth,auth-int\", nonce=\"dcd98b7102dd2f0e8b11d0f600bfb0c093\", opaque=\"5ccc069c403ebaf9f0171e9517f40e41\"\r\n" +            (test-unparse-headers +             `((www-authenticate #(digest +                                   ((realm . "testrealm@host.com") +                                    (qop . (auth auth-int)) +                                    (nonce . "dcd98b7102dd2f0e8b11d0f600bfb0c093") +                                    (opaque . "5ccc069c403ebaf9f0171e9517f40e41"))))))) +      (test "domains" +            "Www-Authenticate: Digest domain=\"/example http://foo.com/bar\"\r\n" +            (test-unparse-headers +             `((www-authenticate #(digest +                                   ((domain . (,(uri-reference "/example") +                                               ,(uri-reference "http://foo.com/bar"))))))))) +      (test "stale" +            "Www-Authenticate: Digest realm=\"foo\", stale=TRUE\r\n" +            (test-unparse-headers +             `((www-authenticate #(digest +                                   ((realm . "foo") +                                    (stale . #t))))))) +      (test "stale present but false" +            "Www-Authenticate: Digest realm=\"foo\"\r\n" +            (test-unparse-headers +             `((www-authenticate #(digest +                                   ((realm . "foo") +                                    (stale . #f))))))))) +  (test-group "content-disposition unparser" +    (test "Attributes are always fully quoted and filenames stripped" +          "Content-Disposition: form-data; name=\"foo\"; filename=\"a b c\"\r\n" +          (test-unparse-headers `((content-disposition +                                   #(form-data ((name . foo) +                                                (filename . "blabla/a b c"))))))) +    (test "Size and dates are recognised correctly" +          "Content-Disposition: inline; size=20; creation-date=\"Sun, 06 Nov 1994 08:49:37 GMT\"\r\n" +          (test-unparse-headers `((content-disposition +                                   #(inline ((size . 20) +                                             (creation-date . #(37 49 08 06 10 94 0 310 #f 0))))))))) + +  (test-group "strict-transport-security unparser" +    (test "Silly capitalization is honored, even if unneccessary" +          "Strict-Transport-Security: max-age=10; includeSubDomains\r\n" +          (test-unparse-headers `((strict-transport-security +                                   ((max-age . 10) +                                    (includesubdomains . #t)))))))) + +(define (test-read-request str) +  (call-with-input-string str read-request)) + +(test-group "reading of requests" +  (parameterize ((request-parsers `(,(lambda (line in) +                                       (and (string=? line "foo") 'foo)) +                                    ,(lambda (line in) +                                       (and (string=? line "bar") 'bar))))) +    (test-error* (exn http unknown-protocol-line) (test-read-request "qux")) +    (test #f (test-read-request "")) +    (test 'foo (test-read-request "foo")) +    (test 'bar (test-read-request "bar"))) +  ;; Even though we officially "should" support HTTP/0.9, we disable it +  ;; by default because there are security implications of just outputting +  ;; responses for random resources that might be under attacker control. +  (test-group "HTTP/0.9" +    (test-error* "By default, HTTP/0.9 is disabled" +                 (exn http unknown-protocol-line) +                 (test-read-request "GET /path/../to/stuff?arg1=val1&arg2=val2\r\n")) +    (parameterize ((request-parsers (list http-1.x-request-parser http-0.9-request-parser))) +     (let ((req (test-read-request "GET /path/../to/stuff?arg1=val1&arg2=val2\r\n"))) +       (test 0 (request-major req)) +       (test 9 (request-minor req)) +       (test 'GET (request-method req)) +       ;; Path-normalized URI (dots removed) +       (test (uri-reference "/to/stuff?arg1=val1&arg2=val2") (request-uri req)) +       (test (headers '()) (request-headers req))) +     ;; RFC 1945 5.0 does not mention case-sensitivity for the method in HTTP/0.9. +     ;; It only mentions it in the context of HTTP/1.x (section 5.1.1). +     ;; We obey the BNF syntax rule in 2.1: +     ;;     "literal" - Quotation marks surround literal text. +     ;;                 Unless stated otherwise, the text is case-insensitive. +     ;; Section 4.1 defines: +     ;;     Simple-Request  = "GET" SP Request-URI CRLF +     (test "Method is case-insensitive" 'GET (request-method (test-read-request "geT /path\r\n"))) +     (test-error "0.9 only knows GET" (test-read-request "PUT /path")))) +  (test-group "HTTP/1.0" +    (test-error "Asterisk is not allowed for HTTP/1.0" +                (request-uri (test-read-request "OPTIONS * HTTP/1.0\r\n"))) +    (let ((req (test-read-request "GET /path/to/stuff?arg1=val1&arg2=val2 HTTP/1.0\r\n\r\n"))) +      (test 1 (request-major req)) +      (test 0 (request-minor req)) +      (test 'GET (request-method req)) +      (test (uri-reference "/path/to/stuff?arg1=val1&arg2=val2") (request-uri req)) +      (test (headers '()) (request-headers req))) +    (test 'PUT (request-method (test-read-request "PUT /path HTTP/1.0\r\n"))) +    (let ((req (test-read-request "POST / HTTP/1.0\r\nTransfer-Encoding: chunked\r\n\r\n3\r\nfoo\r\na\r\n1234567890\r\n"))) +      (test "Chunking ignored" +            "3\r\nfoo\r\na\r\n1234567890\r\n" +            (read-string #f (request-port req))))) +  (test-group "HTTP/1.1" ; No need to test all things we test for 1.0 +    (test "Asterisk is treated specially and returns #f uri" +          #f (request-uri (test-read-request "OPTIONS * HTTP/1.1\r\n"))) +   (let ((req (test-read-request "GET /path/to/stuff?arg1=val1&arg2=val2 HTTP/1.1\r\n\r\n"))) +     (test 1 (request-major req)) +     (test 1 (request-minor req))) +   (test 'PUT (request-method (test-read-request "PUT /path HTTP/1.1\r\n\r\n"))) +   ; RFC 2616 5.1.1 +   (test "Method is case-sensitive" 'geT (request-method (test-read-request "geT /path HTTP/1.1\r\n\r\n"))) +   ; RFC 2616 3.1 + case-insensitivity BNF rule +   (test "Protocol is case-insensitive" '1 (request-minor (test-read-request "GET /path htTP/1.1\r\n\r\n"))) +   ;; TODO: Test chunking +   (test-error "Request line limit exceeded gives error" +               (parameterize ((http-line-limit 5)) +                 (test-read-request "GET /path HTTP/1.1\r\n\r\n"))) +   (test "Reading request body" +         '((abc . "def") (ghi . "jkl")) +         (let ((req (test-read-request +                     "GET / HTTP/1.1\r\nContent-Length: 15\r\n\r\nabc=def;ghi=jkl"))) +           (read-urlencoded-request-data req))) +   (test "Reading request body with bigger limit" +         '((abc . "def")) +         (let ((req (test-read-request +                     "GET / HTTP/1.1\r\nContent-Length: 7\r\n\r\nabc=def"))) +           ;; Test for 8, since 7 would error +           (parameterize ((http-urlencoded-request-data-limit 8)) +             (read-urlencoded-request-data req)))) +   (test-error "Request body limit exceeded gives error" +               (let ((req (test-read-request +                           "GET / HTTP/1.1\r\nContent-Length: 7\r\n\r\nabc=def"))) +                 ;; This errors when the limit is hit, not when it is exceeded +                 (parameterize ((http-urlencoded-request-data-limit 7)) +                   (read-urlencoded-request-data req)))) +   (let ((req (test-read-request "POST / HTTP/1.1\r\nTransfer-Encoding: chunked\r\n\r\n3\r\nfoo\r\na\r\n1234567890\r\n0\r\n\r\n"))) +      (test "Chunking" +            "foo1234567890" +            (read-string #f (request-port req))))) +  (test-group "Invalid protocols" +    (test-error "Total cruft is unrecognised" +                (test-read-request "whatever\r\n")) +    (test-error "Invalid URI also causes protocol not to be recognised" +                (test-read-request "GET //path HTTP/1.0\r\n")))) + +(define (test-write-request req . outputs) +  (call-with-output-string +    (lambda (out) +      (request-port-set! req out) +      (let ((r (write-request req))) +       (for-each (lambda (output) +		   (display output (request-port r))) +		 outputs) +       (finish-request-body r))))) + +(test-group "writing of requests" +  ;; This can also be called Simple-Request as per RFC 1945 4.1 +  ;; RFC 2616 19.6 also states we should recognise 0.9 requests, but if +  ;; we understand those we should also be able to generate them because +  ;; a 0.9 server does not understand 1.x requests. +  (test-group "HTTP/0.9" +    (let ((req (make-request major: 0 minor: 9 method: 'GET +                             uri: (uri-reference "/foo/bar.html")))) +      (test-error* "By default, HTTP/0.9 is disabled" +                   (exn http unknown-protocol) +                   (test-write-request req)) +      (parameterize ((request-unparsers (list http-1.x-request-unparser +                                              http-1.0-request-unparser +                                              http-0.9-request-unparser))) +        (test "Always empty headers" +              "GET /foo/bar.html\r\n" +              (test-write-request (update-request req +                                                  headers: +                                                  (headers `((foo bar)))))) +        (test "Always GET" +              "GET /foo/bar.html\r\n" +              (test-write-request (update-request req method: 'POST)))))) +  (test-group "HTTP/1.0" +    (let ((req (make-request major: 1 minor: 0 +                             method: 'GET +                             uri: (uri-reference "/foo/bar.html")))) +      (test "Headers" +            "GET /foo/bar.html HTTP/1.0\r\nFoo: bar\r\n\r\ntest" +            (test-write-request +             (update-request req +                             headers: (headers `((foo bar)))) +             "test")) +      (test "Chunking ignored" +            "GET /foo/bar.html HTTP/1.0\r\nTransfer-Encoding: chunked\r\n\r\nfoobar" +            (test-write-request +             (update-request req +                             headers: (headers `((transfer-encoding chunked)))) +             "foo" "" "bar")))) +  (test-group "HTTP/1.1" +    (let ((req (make-request major: 1 minor: 1 +                             method: 'GET +                             uri: (uri-reference "/foo/bar.html")))) +      (test "Headers" +            "GET /foo/bar.html HTTP/1.1\r\nFoo: bar\r\n\r\ntest" +            (test-write-request +             (update-request req +                             headers: (headers `((foo bar)))) +             "test")) +      (test "Chunking" +            "GET /foo/bar.html HTTP/1.1\r\nTransfer-Encoding: chunked\r\n\r\n3\r\nfoo\r\na\r\n1234567890\r\n0\r\n\r\n" +            (test-write-request +             (update-request req +                             headers: (headers `((transfer-encoding chunked)))) +             "foo" "" "1234567890")) +      (test "OPTIONS-type asterisk if no URI" +            "OPTIONS * HTTP/1.1\r\n\r\n" +            (test-write-request +             (update-request req method: 'OPTIONS uri: #f)))))) + +(define (test-read-response input-string) +  (call-with-input-string input-string read-response)) + +(test-group "reading of responses" +  (test-group "HTTP/1.1" +    (let ((res (test-read-response "HTTP/1.1 303 See other\r\nFoo: bar\r\n\r\nContents"))) +      (test "Version detection" +            '(1 . 1) +            (cons (response-major res) (response-minor res))) +      (test "Status" +            '(see-other 303 "See other") +            (list (response-status res) (response-code res) (response-reason res))) +      (test "Headers" +            '("bar") +            (header-values 'foo (response-headers res))) +      (test "Contents" +            "Contents" +            (read-string #f (response-port res)))) +    (test-error* (exn http unknown-protocol-line) (test-read-response "qux")) +    (test #f (test-read-request "")) +    (test-error "Response line limit exceeded gives error" +                (parameterize ((http-line-limit 5)) +                  (test-read-response "HTTP/1.1 200 OK\r\n\r\n"))) +    (let ((res (test-read-response "HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n3\r\nfoo\r\na\r\n1234567890\r\n0\r\n\r\n"))) +      (test "Chunking" +            "foo1234567890" +            (read-string #f (response-port res)))) +    ;; Reported by "sz0ka" via IRC +    (let ((res (test-read-response "HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n5\r\nfoo\r\n\r\n0\r\n\r\n"))) +      (test "First read of chunked port returns first line" +            "foo" +            (read-line (response-port res))) +      (test "Peek-char returns EOF" +            #!eof +            (peek-char (response-port res))) +      (test "Read-char also returns EOF" +            #!eof +            (read-char (response-port res))))) +  (test-group "HTTP/1.0" +    (let ((res (test-read-response "HTTP/1.0 303 See other\r\nFoo: bar\r\n\r\nContents"))) +      (test "Version detection" +            '(1 . 0) +            (cons (response-major res) (response-minor res))) +      (test "Status" +            '(303 . "See other") +            (cons (response-code res) (response-reason res))) +      (test "Headers" +            '("bar") +            (header-values 'foo (response-headers res))) +      (test "Contents" +            "Contents" +            (read-string #f (response-port res)))) +    (let ((res (test-read-response "HTTP/1.0 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n3\r\nfoo\r\na\r\n1234567890\r\n"))) +      (test "Chunking ignored" +            "3\r\nfoo\r\na\r\n1234567890\r\n" +            (read-string #f (response-port res))))) +  (test-group "HTTP/0.9" +    (test-error* "By default, HTTP/0.9 is disabled" +                 (exn http unknown-protocol-line) +                 (test-read-response "Doesn't matter what's here\r\nLine 2")) +    (parameterize ((response-parsers (list http-1.x-response-parser +                                           http-1.0-response-parser +                                           http-0.9-response-parser))) +      (let ((res (test-read-response "Doesn't matter what's here\r\nLine 2"))) +        (test "Always OK status" +              '(200 . "OK") +              (cons (response-code res) (response-reason res))) +        (test "Version detection; fallback to 0.9" +              '(0 . 9) +              (cons (response-major res) (response-minor res))) +        (test "No headers" +              (headers '()) (response-headers res)) +        (test "Contents" +              "Doesn't matter what's here\r\nLine 2" +              (read-string #f (response-port res))))))) + +(define (test-write-response res . outputs) +  (call-with-output-string +    (lambda (out) +      (response-port-set! res out) +      (let ((r (write-response res))) +       (for-each (lambda (output) +		   (display output (response-port r))) +		 outputs) +       (finish-response-body r))))) + +(test-group "writing of responses" +  (test-group "HTTP/0.9" +    (let ((res (make-response major: 0 minor: 9 +                              code: 200 reason: "OK"))) +      (test-error* "By default, HTTP/0.9 is disabled" +                   (exn http unknown-protocol) +                   (test-write-response res "These are the contents\r\n")) +      (parameterize ((response-unparsers (list http-1.x-response-unparser +                                               http-1.0-response-unparser +                                               http-0.9-response-unparser))) +        (test "Headers ignored" +              "These are the contents\r\n" +              (test-write-response +               (update-response res headers: (headers `((foo bar)))) +               "These are the contents\r\n"))))) +  (test-group "HTTP/1.0" +    (let ((res (make-response major: 1 minor: 0 +                              code: 200 reason: "OK"))) +      (test "Headers used" +            "HTTP/1.0 200 OK\r\nFoo: bar\r\n\r\nThese are the contents\r\n" +            (test-write-response +             (update-response res headers: (headers `((foo bar)))) +             "These are the contents\r\n")) +      (test "Status code" +            "HTTP/1.0 303 See other\r\n\r\nThese are the contents\r\n" +            (test-write-response +             (update-response res code: 303 reason: "See other") +             "These are the contents\r\n")) +      (test "Chunking ignored" +            "HTTP/1.0 200 OK\r\nTransfer-Encoding: chunked\r\n\r\nfoo1234567890" +            (test-write-response +             (update-response +              res +              headers: (headers `((transfer-encoding chunked)))) +             "foo" "1234567890")))) +  (test-group "HTTP/1.1" +   (let ((res (make-response major: 1 minor: 1 +                             code: 200 reason: "OK"))) +     (test "Headers used" +           "HTTP/1.1 200 OK\r\nFoo: bar\r\n\r\nThese are the contents\r\n" +            (test-write-response +             (update-response res headers: (headers `((foo bar)))) +             "These are the contents\r\n")) +     (test "Status code" +           "HTTP/1.1 303 See other\r\n\r\nThese are the contents\r\n" +           (test-write-response +            (update-response res code: 303 reason: "See other") +            "These are the contents\r\n")) +     (test "Chunking" +           "HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n3\r\nfoo\r\na\r\n1234567890\r\n0\r\n\r\n" +           (test-write-response +            (update-response +             res +             headers: (headers `((transfer-encoding chunked)))) +            "foo" "1234567890")))) +  (test-group "status" +    (let ((res (make-response major: 1 minor: 1))) +      (test "reason and code are looked up by symbol properly" +            "HTTP/1.1 409 Conflict\r\n\r\ntest" +            (test-write-response (update-response res status: 'conflict) +                                 "test")) +      (test-error "an error is raised for unknown status codes" +                  (update-response res status: 'unknown)) +      (test "any status can be used when code and reason are given directly" +            "HTTP/1.1 999 No Way\r\n\r\ntest" +            (test-write-response  +             (update-response res code: 999 reason: "No Way") +             "test")) +      (test "defaults can be parameterized" +            "HTTP/1.1 999 Say What\r\n\r\ntest" +            (parameterize ((http-status-codes +                            (alist-cons 'say-what (cons 999 "Say What") +                                        (http-status-codes)))) +              (test-write-response (update-response res status: 'say-what) +                                   "test")))))) + +(test-group "etag comparison procedures" +  (test-group "weak comparison" +    (test-assert "Strong etag does not match list not containing it" +                 (not (etag-matches-weakly? +                       '(strong . "xyz") `((strong . "blabla"))))) +    (test-assert "Weak etag does not match list not containing it" +                 (not (etag-matches-weakly? +                       '(weak . "xyz") `((weak . "blabla"))))) +    (test-assert "Weak etag matches list containing it" +                 (etag-matches-weakly? +                  '(weak . "xyz") `((strong . "blabla") (weak . "xyz")))) +    (test-assert "Strong etag matches list containing it" +                 (etag-matches-weakly? +                  '(strong . "xyz") `((strong . "blabla") (strong . "xyz")))) +    (test-assert "Weak etag does not match list containing same tag but strong" +                 (not (etag-matches-weakly? +                       '(weak . "xyz") `((strong . "blabla") (strong . "xyz"))))) +    (test-assert "Strong etag does not match list containing same tag but weak" +                 (not (etag-matches-weakly? +                       '(strong . "xyz") `((strong . "blabla") (weak . "xyz"))))) +    (test-assert "Weak etag matches list containing wildcard" +                 (etag-matches-weakly? +                  '(weak . "xyz") `((strong . "blabla") *))) +    (test-assert "Strong etag matches list containing wildcard" +                 (etag-matches-weakly? +                  '(strong . "xyz") `((strong . "blabla") *)))) +  (test-group "strong comparison" +    (test-assert "Strong etag does not match list not containing it" +                 (not (etag-matches? +                       '(strong . "xyz") `((strong . "blabla"))))) +    (test-assert "Weak etag does not match list not containing it" +                 (not (etag-matches? +                       '(weak . "xyz") `((weak . "blabla"))))) +    (test-assert "Weak etag does *not* match list containing it" +                 (not (etag-matches? +                       '(weak . "xyz") `((strong . "blabla") (weak . "xyz"))))) +    (test-assert "Strong etag matches list containing it" +                 (etag-matches? +                  '(strong . "xyz") `((strong . "blabla") (strong . "xyz")))) +    (test-assert "Weak etag does not match list containing same tag but strong" +                 (not (etag-matches? +                       '(weak . "xyz") `((strong . "blabla") (strong . "xyz"))))) +    (test-assert "Strong etag does not match list containing same tag but weak" +                 (not (etag-matches? +                       '(strong . "xyz") `((strong . "blabla") (weak . "xyz"))))) +    (test-assert "Weak etag matches list containing wildcard" +                 (etag-matches? +                  '(weak . "xyz") `((strong . "blabla") *))) +    (test-assert "Strong etag matches list containing wildcard" +                 (etag-matches? +                  '(strong . "xyz") `((strong . "blabla") *))))) + + +;; We don't expose chunked-output-port/chunked-input-port.  Maybe we should? +;; To work around this, prepend some stuff and parse some headers +(define (chunked-inport string) +  (let ((res (test-read-response +              (string-append +               "HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n" +               string)))) +    (response-port res))) + +(test-group "Chunked ports" +  (let ((s "5\r\nab\ncd\r\n2\r\n\nx\r\n0\r\nDO NOT WANT")) +    (test "read-lines" '("ab" "cd" "x") (read-lines (chunked-inport s))) +    (let ((p (chunked-inport s))) +      (test "read-char" #\a (read-char p)) +      (test "peek-char" #\b (peek-char p)) +      (test "partial read" "b\n" (read-string 2 p)) +      (test "short read" "c" (read-string 1 p)) +      (test "read across chunk boundaries" "d\nx" (read-string 3 p)) +      (test "read at eof" #!eof (read-string 1 p))) +    (test "read beyond chunked port size" +          "ab\ncd\nx" (read-string 10 (chunked-inport s))))) + +(test-end) + +(unless (zero? (test-failure-count)) (exit 1)) + +;; TODO: +;; - Fix the parsing system so it's not so broken (more comfortable combinators) +;; - Test malformed headers +;; - Add parsing capability for quoted-pairs inside tokens and comments +;; - Rethink the auto-chunking stuff. Maybe this should be done at a higher level | 
