diff options
-rw-r--r-- | benchmarks/parsers.scm | 60 | ||||
-rw-r--r-- | benchmarks/run.scm | 7 | ||||
-rw-r--r-- | benchmarks/unparsers.scm | 54 | ||||
-rw-r--r-- | header-parsers.scm | 994 | ||||
-rw-r--r-- | intarweb.egg | 11 | ||||
-rw-r--r-- | intarweb.scm | 1055 | ||||
-rw-r--r-- | tests/run.scm | 1243 |
7 files changed, 3424 insertions, 0 deletions
diff --git a/benchmarks/parsers.scm b/benchmarks/parsers.scm new file mode 100644 index 0000000..e4612e2 --- /dev/null +++ b/benchmarks/parsers.scm @@ -0,0 +1,60 @@ +(import intarweb chicken.string chicken.time chicken.time.posix + chicken.port srfi-13) + +(define (mk-headers . strs) + (string-append (string-join strs "\r\n") "\r\n\r\n")) + +(begin (newline) + (print "--- Request parsing ---") + (begin (print "Parsing a minimal HTTP/1.0 request many times") + (let* ((str (mk-headers "GET / HTTP/1.0" + "Host: 127.0.0.1:8080" + "User-Agent: ApacheBench/2.3")) + (p (open-input-string str))) + (time (do ((i 0 (add1 i))) + ((= i 100000)) + (##sys#setslot p 10 0) ; rewind + (read-request p))))) + + (begin (print "Parsing a realistic HTTP/1.1 request many times") + (let* ((str (mk-headers "GET /foo HTTP/1.1" + "Host: localhost:8080" + "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:24.0) Gecko/20140722 Firefox/24.0 Iceweasel/24.7.0" + "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" + "Accept-Language: en-US,en;q=0.5" + "Accept-Encoding: gzip, deflate" + "Connection: keep-alive")) + (p (open-input-string str))) + (time (do ((i 0 (add1 i))) + ((= i 100000)) + (##sys#setslot p 10 0) ; rewind + (read-request p)))))) + +(begin (newline) + (print "--- Response parsing ---") + (begin (print "Parsing a minimal HTTP/1.0 response many times") + (let* ((str (mk-headers "HTTP/1.0 200 OK" + "Content-Length: 10")) + (p (open-input-string str))) + (time (do ((i 0 (add1 i))) + ((= i 100000)) + (##sys#setslot p 10 0) ; rewind + (read-response p))))) + + (begin (newline) + (print "Parsing a realistic HTTP/1.1 response many times") + (let* ((str (mk-headers + "HTTP/1.1 404 Not Found" + "Date: Tue, 19 Aug 2014 19:14:24 GMT" + "Server: Apache" + "Vary: Accept-Encoding" + "Content-Encoding: gzip" + "Content-Length: 176" + "Keep-Alive: timeout=15, max=100" + "Connection: Keep-Alive" + "Content-Type: text/html; charset=iso-8859-1")) + (p (open-input-string str))) + (time (do ((i 0 (add1 i))) + ((= i 100000)) + (##sys#setslot p 10 0) ; rewind + (read-response p)))))) diff --git a/benchmarks/run.scm b/benchmarks/run.scm new file mode 100644 index 0000000..c7d45c8 --- /dev/null +++ b/benchmarks/run.scm @@ -0,0 +1,7 @@ +(print "Unparsers:") +(print "==========\n") +(load "unparsers") + +(print "\nParsers:") +(print "==========\n") +(load "parsers") diff --git a/benchmarks/unparsers.scm b/benchmarks/unparsers.scm new file mode 100644 index 0000000..b8db0c8 --- /dev/null +++ b/benchmarks/unparsers.scm @@ -0,0 +1,54 @@ +(import chicken.port chicken.time chicken.time.posix intarweb uri-common) + +(define null-output-port + (make-output-port void void)) + +(begin (newline) + (print "--- Response unparsing ---") + (begin (print "Unparsing a minimal HTTP/1.1 response many times") + (let ((response (make-response port: null-output-port))) + (time (do ((i 0 (add1 i))) + ((= i 100000)) + (write-response response))))) + + (begin (print "Unparsing a realistic HTTP/1.1 response many times") + (let ((response + (make-response + port: null-output-port + headers: (headers + `((content-type text/css) + (etag (strong . "1234-0123456789")) + (content-length 1234) + (last-modified #(,(seconds->utc-time (current-seconds)) ())) + (date #(,(seconds->utc-time (current-seconds)) ()))))))) + (time (do ((i 0 (add1 i))) + ((= i 100000)) + (write-response response)))))) + +(begin (newline) + (print "--- Request unparsing ---") + (begin (print "Unparsing a minimal HTTP/1.1 request many times") + (let ((request + (make-request port: null-output-port))) + (time (do ((i 0 (add1 i))) + ((= i 100000)) + (write-request request))))) + + (begin (print "Unparsing a realistic HTTP/1.1 request many times") + (let ((request + (make-request + port: null-output-port + uri: (uri-reference "http://www.call-cc.org/test.example") + headers: (headers + `((user-agent (("Mozilla" "5.0" + "X11; U; NetBSD amd64; en-US; rv:1.9.0.3") + ("Gecko" "2008110501" #f) + ("Minefield" "3.0.3" #f))) + (host ("example.com" . 8080)) + (accept text/html application/xhtml+xml + #(application/xml ((q . 0.9))) #(*/* ((q . 0.8)))) + (accept-language en-US #(en ((q . 0.5)))) + (accept-encoding gzip deflate)))))) + (time (do ((i 0 (add1 i))) + ((= i 100000)) + (write-request request)))))) diff --git a/header-parsers.scm b/header-parsers.scm new file mode 100644 index 0000000..89edeae --- /dev/null +++ b/header-parsers.scm @@ -0,0 +1,994 @@ +;;;; Header value accessor procedures + +;; Get the raw contents of a header +(define (header-contents name headers #!optional default) + (cond ((assq name (headers-v headers)) => cdr) + (else default))) + +;; Get all values of a header +(define (header-values header-name headers) + (map (cut vector-ref <> 0) (header-contents header-name headers '()))) + +;; Get the value of a header, assuming it has only one value +(define (header-value header-name headers #!optional default) + (let ((contents (header-contents header-name headers '()))) + (if (null? contents) + default + (get-value (car contents))))) + +;; Get the parameters of a header, assuming it has only one value +(define (header-params header-name headers) + (let ((contents (header-contents header-name headers '()))) + (if (null? contents) + '() + (get-params (car contents))))) + +;; Get a specific parameter of a header, assuming it has only one value +(define (header-param param-name header-name headers #!optional default) + (let ((params (header-params header-name headers))) + (cond ((not (pair? params)) default) + ((assq param-name params) => cdr) + (else default)))) + +;; Get the value from one header entry +(define get-value (cut vector-ref <> 0)) +;; Fast local version +(define-compiler-syntax get-value + (syntax-rules () + ((_ header-entry) + (vector-ref header-entry 0)))) + +;; Get all params from one header entry +(define get-params (cut vector-ref <> 1)) +;; Fast local version +(define-compiler-syntax get-params + (syntax-rules () + ((_ header-entry) + (vector-ref header-entry 1)))) + +;; Get one specific parameter from one header entry +(define (get-param param contents #!optional (default #f)) + (let ((params (vector-ref contents 1))) + (cond ((not (pair? params)) default) + ((assq param params) => cdr) + (else default)))) + +(define (get-no-newline-value header) + (let ((v (get-value header))) + (when (string-index v #\newline) + ;; There's duplication with quote-string error here... + (signal-http-condition + 'raw-value + (conc "Unencoded newline in header contents! " + "Please encode the newline in a way " + "appropriate for this header") + (list string) 'unencoded-header 'value string)) + v)) + +;;;; Header parsers + +(define (single subparser #!optional (parameter-subparsers '())) + (lambda (contents) + (list ((with-params subparser parameter-subparsers) contents)))) + +(define (multiple subparser #!optional (parameter-subparsers '())) + (lambda (contents) + (map (with-params subparser parameter-subparsers) + (split-multi-header contents)))) + +;; RFC 2616 4.2 says multi-headers are a comma-separated list of tokens +(define (split-multi-header value) + (let ((len (string-length value))) + (let loop ((result '()) + (start-pos 0) ; Where the current header value starts + (search-pos 0)) ; Where the searching starts + (or (and-let* (((< search-pos len)) + (pos (string-index value (char-set #\, #\") search-pos))) + (if (char=? #\, (string-ref value pos)) + (loop (cons (string-copy value start-pos pos) result) (add1 pos) (add1 pos)) + (let ((end-pos (escaped-string-end value (add1 pos) (char-set #\")))) + (loop result start-pos (add1 end-pos))))) + (reverse (cons (string-drop value start-pos) result)))))) + +;; Remove all escape characters from the input, recognising "escaped escapes" +(define (unescape str) + (let ((last-char (sub1 (string-length str)))) + (let loop ((result "") + (start-pos 0)) + (or (and-let* ((pos (string-index str #\\ start-pos))) + (if (= pos last-char) + (string-append result (string-copy str start-pos)) + (loop (string-append result (string-copy str start-pos pos) + (string-copy str (add1 pos) (+ pos 2))) + (+ pos 2)))) + (string-append result (string-copy str start-pos)))))) + +;; Find a matching endpoint for a token, ignoring escaped copies of the token +(define (escaped-string-end str start stop-char-set) + (let ((len (string-length str))) + (let loop ((start start)) + (let ((pos (string-index str (char-set-adjoin stop-char-set #\\) start))) + (if pos + (if (char=? #\\ (string-ref str pos)) + ;; Escaped matching closing symbol + (if (= len (add1 pos)) + pos + (loop (+ pos 2))) + ;; Reached the matching closing symbol + pos) + len))))) ; No matching closing symbol? "Insert" it at the end + +;; Try to parse a token, starting at the provided offset, up until the +;; char-set where we should stop. Returns two values: the token or #f if +;; there is no token left, and the position on which the token ends. +(define (parse-token value start-pos + #!optional + (stop-char-set (char-set-adjoin char-set:whitespace #\,))) + (if (>= start-pos (string-length value)) + (values #f start-pos) + (let ((stop (char-set-adjoin stop-char-set #\"))) + (let ((pos (string-index value stop start-pos))) + (if pos + (if (not (char=? #\" (string-ref value pos))) + (values (string-trim-both + value char-set:whitespace start-pos pos) + pos) ; Stop-char found, but no quoting + (let ((end-pos (escaped-string-end value (add1 pos) + (char-set #\")))) + ;; Found the double quote? Recurse on the remainder + (receive (rest final-pos) + (parse-token value (add1 end-pos) stop-char-set) + (values (string-append + (string-trim-both + value char-set:whitespace start-pos pos) + (if (= pos end-pos) + (unescape (string-copy value (add1 pos))) + (unescape (string-copy value (add1 pos) end-pos))) + (or rest "")) + final-pos)))) + ;; Nothing found? Then the remainder of the string is the token + (values (string-trim-both + value char-set:whitespace start-pos) + (string-length value))))))) + +;; Comments are a bit like tokens, except they can be nested +(define (parse-comment value start-pos) + (let* ((len (string-length value)) + (nospace-pos (and (< start-pos len) + (string-skip value char-set:whitespace start-pos)))) + (if (and nospace-pos (char=? (string-ref value nospace-pos) #\()) + (let loop ((result "") + (start-pos (add1 nospace-pos))) + (if (>= start-pos len) + (values result len) + (let ((pos (string-index value (char-set #\" #\( #\)) start-pos))) + (if pos + (cond ((char=? #\( (string-ref value pos)) ; Nested comment + (receive (nested end-pos) + (parse-comment value pos) + (loop (sprintf "~A~A(~A)" + result + (string-copy value start-pos pos) + nested) + (add1 end-pos)))) + ((char=? #\" (string-ref value pos)) + (let lp ((end (add1 pos)) + (c (string-ref value (add1 pos))) + (res '())) + (cond ((char=? #\" c) + (loop (string-append + result + (reverse-list->string res)) + (add1 end))) + ((char=? #\\ c) + (lp (+ end 2) + (string-ref value (+ end 2)) + (cons (string-ref value (add1 end)) + res))) + (else + (lp (add1 end) + (string-ref value (add1 end)) + (cons c res)))))) + ;; Else it's a ) + (else (values (conc result (string-copy value start-pos pos)) (add1 pos)))) + ;; Nothing found? Then the remainder of the string is the token + (values (conc result (string-copy value start-pos)) + (string-length value)))))) + ;; No (? Then fail to match the 'comment' + (values #f start-pos)))) + +(define (parse-params string start-pos param-subparsers #!optional (stop-set (char-set #\;)) (separator-or-stop-set (char-set #\; #\=))) + (let loop ((start-pos start-pos) + (params '())) + (unless separator-or-stop-set + (error "The parse-params API has changed a bit for performance reasons: if you pass the optional stop char set, you need to pass one more argument which is the same char-set extended with an equals-sign")) + (receive (attribute-name pos) + (parse-token string start-pos separator-or-stop-set) + (if attribute-name + (let ((attribute (http-name->symbol attribute-name))) + (if (and (< pos (string-length string)) + (char=? (string-ref string pos) #\=)) + (receive (value pos) + (parse-token string (add1 pos) stop-set) + ;; In case of no value ("foo="), use the empty string as value + (let ((value ((alist-ref attribute param-subparsers + eq? identity) + (or value "")))) + (loop (add1 pos) (cons (cons attribute value) params)))) + ;; Missing value is interpreted as "present", + ;; so #t. If not present, it's #f when looking it up + (loop (add1 pos) (cons (cons attribute #t) params)))) + (values (reverse params) pos))))) + +(define (parse-value+params string start-pos value-subparser param-subparsers) + (receive (value pos) + (parse-token string start-pos (char-set #\;)) + (if (not value) + (values #f pos) ;; XXX this is wrong and not expected by the caller! + (receive (params pos) + (parse-params string (add1 pos) param-subparsers) + (values (vector (value-subparser value) params) pos))))) + +(define (with-params value-subparser parameter-subparsers) + (lambda (entry) + (receive (type+params pos) + (parse-value+params entry 0 value-subparser parameter-subparsers) + type+params))) + +(define (make-key/value-subparser key/value-subparsers) + (lambda (k/v) + ;; We're abusing parse-params here to read value + ;; instead of params. This is weird, but it works :) + (receive (key+value pos) + (parse-params k/v 0 key/value-subparsers) + (vector (car key+value) '())))) ;; There's only one key/value pair + +(foreign-declare "#include <locale.h>") + +(define-foreign-variable LC_TIME int) + +(define setlocale (foreign-lambda c-string setlocale int c-string)) + +(define-syntax let-locale + (syntax-rules () + ((let-locale ((cat val) ...) body ...) + (let ((backup '())) + (dynamic-wind + (lambda () (set! backup `((cat . ,(setlocale cat val)) ...))) + (lambda () body ...) + (lambda () (setlocale cat (alist-ref 'cat backup)) ...)))))) + +(define (make-date->string-parser spec) + (let ((date-regex + (irregex + (string-translate* + spec + '((" " . " +") ; Any number of spaces is very permissive + ("%a" . "(Sun|Mon|Tue|Wed|Thu|Fri|Sat)") + ("%A" . "(Sunday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday)") + ("%d" . "[0-9]{1,2}") + ("%b" . "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)") + ("%y" . "[0-9]{1,2}") + ("%Y" . "[0-9]{4}") + ("%X" . "[0-9]{2}:[0-9]{2}:[0-9]{2}")))))) + (lambda (str) + (and (irregex-match date-regex str) ; Or irregex-search? + (let-locale ((LC_TIME "POSIX")) + (string->time str spec)))))) + +(define rfc1123-string->time (make-date->string-parser "%a, %d %b %Y %X GMT")) + +;; This is a little more relaxed than strict rfc850 (it allows abbreviated +;; weekdays) - for example Google Web Server outputs cookies in this format. +(define rfc850-string->time + (disjoin (make-date->string-parser "%A, %d-%b-%y %X GMT") + (make-date->string-parser "%a, %d-%b-%Y %X GMT"))) + +(define asctime-string->time (make-date->string-parser "%a %b %d %X %Y")) + +(define http-date-string->time + (disjoin rfc1123-string->time rfc850-string->time asctime-string->time)) + +;; RFC 1123 updates RFC 822's datetime spec +(define (rfc1123-subparser str) + (or (rfc1123-string->time str) + (signal-http-condition + 'rfc1123-subparser + "Error parsing RFC 1123 date/time" (list str) + 'rfc1123-subparser 'value str))) + +(define (rfc850-subparser str) + (or (rfc850-string->time str) + (signal-http-condition + 'rfc850-subparser + "Error parsing RFC850 date/time" (list str) + 'rfc850-subparser 'value str))) + +(define (asctime-subparser str) + (or (asctime-string->time str) + (signal-http-condition + 'asctime-subparser + "Error parsing asctime() date/time" (list str) + 'asctime-subparser 'value str))) + +;; rfc1123-date | rfc850-date | asctime-date +(define (http-date-subparser str) + (or (http-date-string->time str) + (signal-http-condition + 'http-date-subparser + "Error parsing date/time" (list str) + 'http-date-subparser 'value str))) + +;; Change the accuracy of a number to 'digits' number of digits to the +;; right of the decimal point. +(define (chop-number num digits) + (let ((factor (expt 10 digits))) + (/ (round (* num factor)) factor))) + +(define (quality-subparser str) + ;; Anything that's not a number is seen as if the value is missing, hence 1.0 + (let* ((num (or (string->number str) 1.0)) + (imprecise (chop-number num 3))) + (max 0.0 (min 1.0 imprecise)))) + +;; Just put all header strings in a list, so we can pass it on. +;; Make no assumptions about the contents (only value, don't try to parse params) +;; This is different from (multiple (without-params generic-header-parser)) +;; because this does not assume it can split up comma-separated values. +;; It also will ensure that the value is raw, +(define (unknown-header-parser contents) + (list (vector contents 'raw))) + +(define symbol-subparser + (compose string->symbol string-trim-both)) + +(define symbol-subparser-ci + (compose string->symbol string-trim-both string-downcase)) + +(define (natnum-subparser contents) + (let ((num (string->number contents))) + (if num (inexact->exact (max 0 (round num))) 0))) + +(define (host/port-subparser contents) + (let* ((idx (string-index-right contents #\:)) + (portnum (and idx (string->number + (substring/shared contents (add1 idx)))))) + (if (and idx portnum) + (cons (substring/shared contents 0 idx) + (inexact->exact (round portnum))) + (cons contents #f)))) + +; base64 of 128 bit hex digest as per RFC1864 (eg, Content-md5) +(define base64-subparser base64-decode) + +;; This is retarded. The websocket spec (RFC6455) explicitly says the +;; product token must be compared case-insensitively. RFC2817, +;; "upgrading to TLS within HTTP/1.1" doesn't mention anything about +;; case sensitivity, but defines an all-uppercase token. This means +;; that there're going to be servers which accept only "TLS/1.0" +;; as-is. This all just means we can't add the convenience of +;; downcasing and switching to a symbol, pushing the case-(in)sensitive +;; comparison down the the user level, causing more bugs :( +(define (product-subparser contents) + (let* ((idx (string-index contents #\/)) + (version (and idx (substring/shared contents (add1 idx))))) + (if (and idx version) + (cons (substring/shared contents 0 idx) version) + (cons contents #f)))) + +;; bytes <start>-<end>/<total> +(define range-subparser + (let ((range-regex + (irregex '(seq "bytes" (+ space) + (=> start (+ digit)) "-" (=> end (+ digit)) + "/" (=> total (+ digit)))))) + (lambda (s) + (and-let* ((m (irregex-match range-regex s)) + (start (string->number (irregex-match-substring m 'start))) + (end (string->number (irregex-match-substring m 'end))) + (total (string->number (irregex-match-substring m 'total)))) + (list start end total))))) + +;; Accept *just* a filename, not a full path (simply strips directories) +;; This matches the content-disposition recommendation in RFC2616, 19.5.1: +;; "The receiving user agent SHOULD NOT respect any directory path +;; information present in the filename-parm parameter, which is the only +;; parameter believed to apply to HTTP implementations at this time. The +;; filename SHOULD be treated as a terminal component only." +;; This echoes RFC2183 (and RFC1806 which it supersedes), section 2.3: +;; "The receiving MUA SHOULD NOT respect any directory path information +;; that may seem to be present in the filename parameter. The filename +;; should be treated as a terminal component only." +(define (filename-subparser fn) + (let ((base-fn (pathname-strip-directory (string-trim-both fn)))) + (and (not (member base-fn '("" "." ".."))) + (not (string-index base-fn (char-set #\/ #\nul))) + base-fn))) + +;; [W/]<string> +;; This is a full parser, because it needs to be able to distinguish +;; between W/"foo" and "W/foo". If it's preprocessed by the tokenizer +;; both get "normalised" to the same thing: W/foo +;; +;; XXX It could be a good idea if the single/multiple token parsers +;; did not do anything to their contents. If the consuming parsers +;; want tokens, they know how to have it. OTOH, it would mean much +;; more code for all the parsers as they need to tokenize more... +(define (etag-parser contents) + (let ((contents (string-trim-both contents))) + (list (vector + (if (string-prefix? "W/" contents) + `(weak . ,(parse-token contents 2 char-set:whitespace)) + `(strong . ,(parse-token contents 0 char-set:whitespace))) + '())))) + +;; Used for both if-match and if-none-match +;; This is either a wilcard ('*') or several entities +(define (if-match-parser contents) + (let ((contents (string-trim-both contents))) + (if (string=? "*" contents) + (list (vector '* '())) + (let loop ((pos 0) + (etags '())) + (let ((weak (string-prefix? "W/" contents 0 2 pos))) + (receive (etag newpos) + (parse-token contents (+ pos (if weak 2 0)) char-set:whitespace) + (let ((newpos (string-skip contents char-set:whitespace newpos)) + (value (and etag (vector (cons (if weak 'weak 'strong) + etag) '())))) + (if value + (if newpos + (loop newpos (cons value etags)) + (reverse! (cons value etags))) + (reverse! etags))))))))) + +;; ( <product>[/<version>] [<comment>] )+ +;; This parser is a full parser because parse-token cannot handle +;; comments yet... (if a ; is in a comment, it breaks down) +(define software-parser + (let ((char-set:space-or-paren (char-set-union (char-set #\() + char-set:whitespace)) + (char-set:slash-or-paren (char-set #\/ #\())) + (lambda (contents) + (let loop ((start-pos 0) + (products '())) + (let*-values (((product pos) + (parse-token contents start-pos + char-set:slash-or-paren)) + ((version pos2) + (parse-token contents pos ; (add1 pos) + char-set:space-or-paren)) + ((comment pos3) (parse-comment contents pos2)) + ;; Ugh + ((real-version) (and version (not (string-null? version)) (string-trim version #\/)))) + (if product + (loop pos3 (cons (list product real-version comment) products)) + (list (vector (reverse products) '())))))))) + +;;;; MAJOR TODOs +;; RFC1123 mailbox parser - just strings for now +(define mailbox-subparser identity) + +;; Either an entity-tag or a http-date +(define (if-range-parser contents) + (let ((http-date ((with-params http-date-string->time '()) contents))) + (if (get-value http-date) + (list http-date) + (etag-parser contents)))) + +;; Either delta-seconds or http-date +(define retry-after-subparser (disjoin http-date-subparser natnum-subparser)) + +;; Tricky - see 2616 14.45 +;; We probably shouldn't try to do too much parsing here +(define via-parser (multiple identity)) + +;; Tricky - see 2616 14.46 +(define warning-parser split-multi-header) +;;;; END MAJOR TODOs + +(define (key/value-subparser str) + (let ((idx (string-index str #\=))) + (cons (string->symbol (string-take str idx)) (string-drop str (add1 idx))))) + +;; The 'expires' header defined by the Netscape cookie spec contains +;; an embedded comma. RFC 2109 cookies use Max-Age instead. +(define old-style-cookie? + (let ((old-cookie-regex + (irregex '(seq bos (+ (~ #\= #\")) "=" (* (~ #\;)) ";" ; + (* any) (w/nocase "expires") (* space) "=")))) + (lambda (cookie) + (irregex-search old-cookie-regex cookie)))) + +(define (string->number-list str) + (map string->number (string-split str ","))) + +(define (relative-ref/path-only s) + (and-let* ((ref (uri-reference s)) + ((not (uri-host ref))) + ((null? (uri-query ref))) + ((not (uri-fragment ref)))) + ref)) + +;; We're using http-date-subparser for 'expires' instead of rfc1123-subparser +;; (which would be the correct thing to do) because several well-known web +;; server software packages (tested: PHP and Rails) get it wrong. So we +;; will go by the robustness principle and allow any kind of HTTP date. +(define set-cookie-parser + (let ((param-subparsers `((expires . ,http-date-subparser) + (max-age . ,string->number) + (version . ,string->number) + (port . ,string->number-list) + (path . ,relative-ref/path-only))) + (name/value-parser (lambda (str) + (let ((idx (string-index str #\=))) + (cons (string-take str idx) + (string-drop str (add1 idx))))))) + (lambda (contents) + (if (old-style-cookie? contents) + (list ((with-params name/value-parser param-subparsers) contents)) + (map (with-params name/value-parser param-subparsers) + (split-multi-header contents)))))) + +(define cache-control-parser + (let ((splitter (lambda (str) ;; Is this correct? + (map (compose string->symbol string-trim-both) + (string-split str ","))))) + (lambda (contents) + (map + (make-key/value-subparser `((max-age . ,natnum-subparser) + (s-maxage . ,natnum-subparser) + (max-stale . ,natnum-subparser) + (min-fresh . ,natnum-subparser) + (private . ,splitter) + (no-cache . ,splitter))) + (split-multi-header contents))))) + +(define (strict-transport-security-parser contents) + ;; This is ridiculous; there are no parameters because everything + ;; is a parameter (or everything is a value, depending on your point + ;; of view). The header has no main value. For convenience and sanity + ;; we just return an alist as a single value. + (list (vector (parse-params contents 0 `((max-age . ,natnum-subparser))) '()))) + +(define (basic-auth-param-subparser contents pos) + (receive (secret pos) + (parse-token contents pos (char-set #\,)) + (let* ((decoded (base64-decode secret)) + (colon-idx (string-index decoded #\:)) + (user (string-take decoded colon-idx)) + (pass (string-drop decoded (add1 colon-idx)))) + (values `((username . ,user) (password . ,pass)) pos)))) + +(define (digest-auth-param-subparser contents pos) + (parse-params contents pos + `((nc . ,(lambda (n) (string->number n 16))) + (uri . ,uri-reference) + (qop . ,symbol-subparser) + (algorithm . ,symbol-subparser-ci)) + (char-set #\,) (char-set #\, #\=))) + +(define authorization-param-subparsers + (make-parameter `((basic . ,basic-auth-param-subparser) + (digest . ,digest-auth-param-subparser)))) + +(define (authorization-parser contents) + (let loop ((pos 0) + (result '())) + (receive (authtype pos) + (parse-token contents pos char-set:whitespace) + (if (not authtype) + (reverse result) + (let ((authtype (http-name->symbol authtype)) + (default-subparser (lambda (contents pos) + (parse-params contents pos '())))) + (receive (params pos) + ((alist-ref authtype (authorization-param-subparsers) + eq? default-subparser) contents (add1 pos)) + (loop (add1 pos) + (cons (vector authtype params) result)))))))) + +(define (authenticate-parser contents) + (let loop ((pos 0) + (result '())) + (receive (authtype pos) + (parse-token contents pos char-set:whitespace) + (if (not authtype) + (reverse result) + (let ((authtype (http-name->symbol authtype))) + (receive (params pos) + (parse-params contents pos + `((domain . ,(lambda (s) + (map uri-reference + (string-split s)))) + (qop . ,(lambda (s) + (map (compose symbol-subparser + string-trim) + (string-split s ",")))) + (algorithm . ,symbol-subparser-ci) + (stale . ,(lambda (s) + (string-ci=? (string-trim s) + "TRUE")))) + (char-set #\,) (char-set #\, #\=)) + (loop (add1 pos) (cons (vector authtype params) result)))))))) + +(define (pragma-parser contents) + (map (make-key/value-subparser `()) (split-multi-header contents))) + +(define (te-parser contents) + (map (make-key/value-subparser `((q . ,quality-subparser))) + (split-multi-header contents))) + +;; Cookie headers are also braindead: there can be several cookies in one header, +;; separated by either commas or semicolons. The only way to distinguish a +;; new cookie from a parameter of the current cookie is the dollar in front +;; of all parameter names. +;; Also, there's a $Version attribute that prepends all cookies, which is +;; considered to apply to all cookies that follow. +(define (cookie-parser contents) + ;; Local version of parse-params that stops when param without $ is seen + (define (read-params start-pos) + (let next-param ((start-pos start-pos) + (params '())) + (receive (attribute-name pos) + (parse-token contents start-pos (char-set #\; #\=)) + (if (or (not attribute-name) ;; Still reading params? + (not (char=? (string-ref attribute-name 0) #\$))) + (values (reverse! params) start-pos) + (let ((attrib (http-name->symbol (string-drop attribute-name 1)))) + (if (and (< pos (string-length contents)) + (char=? (string-ref contents pos) #\=)) + (receive (value pos) + (parse-token contents (add1 pos) (char-set #\;)) + (let ((value (case attrib + ((version port) (string->number (or value ""))) + ((path) (relative-ref/path-only (or value ""))) + (else value)))) + (next-param (add1 pos) (cons (cons attrib value) params)))) + ;; Missing value is interpreted as "present", so #t + (next-param (add1 pos) (cons (cons attrib #t) params)))))))) + (receive (global-params pos) + (read-params 0) + (let loop ((cookies '()) + (pos pos)) + (let*-values (((name pos) (parse-token contents pos (char-set #\= #\;))) + ((val pos) (parse-token contents (add1 pos) (char-set #\;)))) + (if (or (not name) (not val)) + (reverse! cookies) + (receive (local-params pos) + (read-params (add1 pos)) + (loop (cons (vector (cons name val) + (append! local-params global-params)) + cookies) (add1 pos)))))))) + +;;; Unparsers ;;; +(define (unparse-params params unparsers #!key + (separator "; ") (grammar 'prefix) + (keyword-unparser ->string) + (value-unparser unparse-token)) + (let loop ((params params) + (results '())) + (if (null? params) + (string-join (reverse results) separator grammar) + (let* ((name (caar params)) + (val (cdar params)) + (str (case val + ;; #t means param is present (no value) + ((#t) (keyword-unparser name)) + ;; #f means param is missing + ((#f) #f) + (else (let* ((unparser (assq name unparsers)) + (unparsed-val (if unparser + ((cdr unparser) val) + val))) + (string-append (keyword-unparser name) "=" + (value-unparser unparsed-val))))))) + (loop (cdr params) (if str (cons str results) results)))))) + +(define must-be-quoted-chars (char-set-adjoin char-set:iso-control #\")) + +(define (quote-string string) + (reverse-list->string + (cons #\" + (string-fold (lambda (c result) + (cond ((char=? c #\newline) + (signal-http-condition + 'quote-string + (conc "Unencoded newline in header contents! " + "Please encode the newline in a way " + "appropriate for this header") + (list string) 'unencoded-header 'value string)) + ((char-set-contains? must-be-quoted-chars c) + (cons c (cons #\\ result))) + (else (cons c result)))) + '(#\") + string)))) + +;; Unparse a value as token, converting it to a string first +(define unparse-token + (let ((default-trigger-chars (char-set-union must-be-quoted-chars + (char-set #\= #\; #\,) + char-set:blank))) + (lambda (token #!optional separator-chars) + (let ((trigger-quoting-chars + (if separator-chars + (char-set-union must-be-quoted-chars separator-chars char-set:blank) + default-trigger-chars)) + (token-string (->string token))) + (if (string-any trigger-quoting-chars token-string) + (quote-string token-string) + token-string))))) + +(define (unparse-etag etag) + (string-append + (if (eq? 'weak (car etag)) "W/" "") + (quote-string (cdr etag)))) ;; Etags are _always_ quoted + +;; There's no need to make a specific header unparser for every header type. +;; Usually, the Scheme value representing a header can unambiguously be +;; unparsed into a header just by checking its type. +(define (default-header-unparser header-contents) + (let loop ((headers (reverse header-contents)) + (result '())) + (if (null? headers) + (list (string-join result ", ")) + (let* ((header (car headers)) + (contents (get-value header)) + (value (cond + ((pair? contents) ; alist? + (let ((attribute (symbol->http-name (car contents)))) + (if (eq? (cdr contents) #t) + (unparse-token attribute) + (string-append attribute "=" + (unparse-token (cdr contents)))))) + ((uri-reference? contents) + (unparse-token (uri->string contents) (char-set))) + (else (unparse-token contents))))) + (loop (cdr headers) + (cons + (string-append + value (unparse-params (get-params header) '())) + result)))))) + +;; RFC2616 19.5.1 says that the "filename" attribute _must_ be quoted. +;; It's a bit annoying that our API currently can't specify for particular +;; attributes that only those must be unparsed specially, so we quote _all_ +;; attributes (which, strictly speaking, is always allowed for tokens) unless +;; otherwise specified by a hack (when the value is prefixed by RAW). +;; This may be dangerous or wrong, if a server doesn't accept quoted "name" +;; attributes, for example. Not too likely since names can contain spaces etc. +(define (content-disposition-unparser header-contents) + (let* ((type (get-value (car header-contents))) + (RAW (list 'raw)) + (unparser (lambda (x) (if (and (pair? x) (eq? RAW (car x))) + (cdr x) + (quote-string (->string x)))))) + (list (conc (unparse-token type) + (unparse-params (get-params (car header-contents)) + `((filename . ,pathname-strip-directory) + (size . ,(lambda (x) (cons RAW (number->string x)))) + (creation-date . ,rfc1123-time->string) + (modification-date . ,rfc1123-time->string) + (read-date . ,rfc1123-time->string)) + value-unparser: unparser))))) + +(define (etag-unparser header-contents) + (list (unparse-etag (get-value (car header-contents))))) + +(define (if-match-unparser header-contents) + (let loop ((headers (reverse header-contents)) + (result '())) + (cond + ((null? headers) (list (string-join result ", "))) + ((eq? '* (get-value (car headers))) '("*")) ;; We're done. * means anything + (else (loop (cdr headers) + (cons (unparse-etag (get-value (car headers))) result)))))) + +(define (host/port-unparser header-contents) + (let ((contents (get-value (car header-contents)))) + ;; XXX: urlencode? + (if (cdr contents) + (list (conc (car contents) ":" (cdr contents))) + (list (car contents))))) + +;; Handled specially because cookie value is not an alist but a cons of strings +(define (set-cookie-unparser header-contents) + (map (lambda (header) + (let* ((unparsed-params + (map (lambda (p) + (if (eq? (cdr p) #t) + (unparse-token (symbol->http-name (car p))) + (string-append + (unparse-token (symbol->http-name (car p))) + "=" + (cond + ((and (eq? (car p) 'port) (pair? (cdr p))) + (string-join + (map number->string (cdr p)) ",")) + ((and (eq? (car p) 'path) (cdr p)) + (uri->string (cdr p))) + ((eq? (car p) 'expires) + (let-locale ((LC_TIME "POSIX")) + (time->string (cdr p) "%A, %d-%b-%y %X GMT"))) + (else (unparse-token (cdr p))))))) + ;; Remove #f values + (filter (lambda (p) (cdr p)) (get-params header)))) + (cookie (get-value header)) + (unparsed-cookie (string-append (car cookie) "=" + (unparse-token (cdr cookie))))) + (string-join (cons unparsed-cookie unparsed-params) "; "))) + header-contents)) + +(define (cookie-unparser header-contents) + (let loop ((prefix "") + (headers (reverse header-contents)) + (result '())) + (if (null? headers) + (list (conc prefix (string-join result "; "))) + (let* ((version (get-param 'version (car headers) #f)) + (params (alist-delete 'version (get-params (car headers)))) + (unparsed-params + (map (lambda (p) + (if (eq? (cdr p) #t) + (unparse-token (conc "$" (symbol->http-name (car p)))) + (string-append + (unparse-token + (conc "$" (symbol->http-name (car p)))) + "=" + (cond + ((and (eq? (car p) 'port) (pair? (cdr p))) + (string-join + (map number->string (cdr p)) ",")) + ((and (eq? (car p) 'path) (cdr p)) + (uri->string (cdr p))) + (else (unparse-token (cdr p))))))) + ;; Remove #f values + (filter (lambda (p) (cdr p)) params))) + (cookie (get-value (car headers))) + (unparsed-cookie (string-append (car cookie) "=" + (unparse-token (cdr cookie))))) + ;; Doing it like this means we can't unparse cookies of + ;; mixed versions... + (loop (if version (sprintf "$Version: ~A; " version) prefix) + (cdr headers) + (cons (string-join (cons unparsed-cookie unparsed-params) "; ") + result)))))) + +(define (software-unparser header-contents) + (list + (string-join + (map (lambda (content) + (conc (unparse-token (first content)) + (if (second content) + (conc "/" (unparse-token (second content))) + "") + (if (third content) + (conc " (" (third content) ")") + ""))) + (get-value (car header-contents)))))) + +(define (product-unparser header-contents) + (list + (string-join + (map (lambda (header) + (let* ((default-unparser ; Not great, but better than nothing + (lambda (params) (unparse-params params '()))) + (product+version (get-value header))) + (conc (unparse-token (car product+version)) + (if (cdr product+version) + (conc "/" (unparse-token (cdr product+version))) + "")))) + header-contents) + ", "))) + +(define (via-unparser header-contents) + (list (string-join (map get-value header-contents) ", "))) + +(define (rfc1123-unparser header-contents) + (list (rfc1123-time->string (get-value (car header-contents))))) + +(define-constant short-weekdays `#("Sun" "Mon" "Tue" "Wed" "Thu" "Fri" "Sat")) +(define-constant short-months `#("Jan" "Feb" "Mar" "Apr" "May" "Jun" "Jul" "Aug" "Sep" "Oct" "Nov" "Dec")) + +(define (rfc1123-time->string time) + (let ((padded (lambda (n) + (if (fx< n 10) + (string-append "0" (number->string n)) + (number->string n)))) + (secs (vector-ref time 0)) + (mins (vector-ref time 1)) + (hours (vector-ref time 2)) + (mday (vector-ref time 3)) + (month (vector-ref time 4)) + (year (vector-ref time 5)) + (wday (vector-ref time 6))) + (string-append (vector-ref short-weekdays wday) ", " + (padded mday) " " (vector-ref short-months month) " " + (number->string (+ 1900 year)) " " (padded hours) ":" + (padded mins) ":" (padded secs) " GMT"))) + +(define (basic-auth-param-subunparser params) + (let ((user (alist-ref 'username params)) + (pass (alist-ref 'password params))) + (if (string-index user #\:) + (signal-http-condition + 'basic-auth-param-subunparser + "Colon detected in username. This is not supported by basic auth!" + (list user) 'username-with-colon 'value user) + (base64-encode (string-append user ":" pass))))) + +(define (digest-auth-param-subunparser params) + (unparse-params params + `((nc . ,identity) ;; see below + (uri . ,uri->string) + (qop . ,symbol->string) + (algorithm . ,symbol->string)) + separator: ", " + grammar: 'infix + keyword-unparser: symbol->string + value-unparser: + ;; Nasty exception for "nc", which is an unquoted + ;; padded integer... + (lambda (x) + (if (number? x) + (string-pad (number->string x 16) 8 #\0) + (quote-string (->string x)))))) + +(define authorization-param-subunparsers + (make-parameter `((basic . ,basic-auth-param-subunparser) + (digest . ,digest-auth-param-subunparser)))) + +(define (authorization-unparser header-contents) + (map (lambda (header) + (let* ((default-unparser ; Not great, but better than nothing + (lambda (params) (unparse-params params '()))) + (auth-scheme (get-value header)) + (unparser (alist-ref auth-scheme + (authorization-param-subunparsers) + eq? default-unparser))) + (string-append + (symbol->http-name auth-scheme) " " + (->string (unparser (get-params header)))))) + header-contents)) + +(define (authenticate-unparser header-contents) + (map (lambda (header) + (string-append + (symbol->http-name (get-value header)) + " " + (let* ((old (get-params header)) + ;; A quick hack to get presence of "stale" + ;; coded as TRUE instead of value-less param + ;; false value is coded by its absense + (params (if (alist-ref 'stale old) + (alist-update! 'stale 'TRUE old) + (alist-delete 'stale old)))) + (unparse-params params + `((domain . ,(lambda (u) + (string-join + (map uri->string u)))) + (qop . ,(lambda (q) + (string-join + (map symbol->string q) + ","))) + (algorithm . ,symbol->string)) + separator: ", " + grammar: 'infix + keyword-unparser: symbol->string + value-unparser: + (lambda (x) + (if (eq? x 'TRUE) + "TRUE" + (quote-string (->string x)))))))) + header-contents)) + +(define (strict-transport-security-unparser header-contents) + (map (lambda (header) ; Should only be one header; the rest must be ignored! + (unparse-params (get-value header) '() + grammar: 'infix + keyword-unparser: ; Ugly but neccessary + (lambda (k) + (if (eq? k 'includesubdomains) + "includeSubDomains" + (->string k))))) + header-contents)) diff --git a/intarweb.egg b/intarweb.egg new file mode 100644 index 0000000..43eb73d --- /dev/null +++ b/intarweb.egg @@ -0,0 +1,11 @@ +;; -*- Scheme -*- +((synopsis "A more convenient HTTP library") + (author "Peter Bex") + (category web) + (license "BSD") + (dependencies srfi-1 srfi-13 srfi-14 defstruct uri-common base64) + (test-dependencies test srfi-18) + (components (extension intarweb + (csc-options "-O3") + (source-dependencies "intarweb.scm" + "header-parsers.scm")))) diff --git a/intarweb.scm b/intarweb.scm new file mode 100644 index 0000000..3df4690 --- /dev/null +++ b/intarweb.scm @@ -0,0 +1,1055 @@ +;;; +;;; Intarweb is an improved HTTP library for Chicken +;;; +;; Copyright (c) 2008-2018, Peter Bex +;; All rights reserved. +;; +;; Redistribution and use in source and binary forms, with or without +;; modification, are permitted provided that the following conditions +;; are met: +;; +;; 1. Redistributions of source code must retain the above copyright +;; notice, this list of conditions and the following disclaimer. +;; 2. Redistributions in binary form must reproduce the above copyright +;; notice, this list of conditions and the following disclaimer in the +;; documentation and/or other materials provided with the distribution. +;; 3. Neither the name of the author nor the names of its +;; contributors may be used to endorse or promote products derived +;; from this software without specific prior written permission. +;; +;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +;; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +;; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +;; FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +;; COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +;; INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +;; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +;; HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +;; STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +;; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +;; OF THE POSSIBILITY OF SUCH DAMAGE. + +;; TODO: Support RFC5987? Seems awfully messy though (need to pull in iconv?) +;; We could use http://www.greenbytes.de/tech/tc2231/ in the testsuite. +;; Look at that URI's toplevel directory for more HTTP/URI-related testcases! + +(module intarweb + (http-line-limit http-header-limit http-urlencoded-request-data-limit + replace-header-contents replace-header-contents! remove-header remove-header! + update-header-contents update-header-contents! headers single-headers + headers? headers->list http-name->symbol symbol->http-name + header-parsers header-unparsers unparse-header unparse-headers read-headers + safe-methods safe? idempotent-methods idempotent? keep-alive? response-class + etag=? etag=-weakly? etag-matches? etag-matches-weakly? + + make-request request? request-major request-major-set! + request-minor request-minor-set! + request-method request-method-set! request-uri request-uri-set! + request-headers request-headers-set! request-port request-port-set! + update-request set-request! request-has-message-body? + + request-parsers read-request request-unparsers write-request + finish-request-body http-0.9-request-parser http-1.x-request-parser + http-0.9-request-unparser http-1.0-request-unparser http-1.x-request-unparser + header-parse-error-handler + read-urlencoded-request-data + + make-response response? response-major response-major-set! + response-minor response-minor-set! + response-code response-code-set! response-reason response-reason-set! + response-status response-status-set! response-headers response-headers-set! + response-port response-port-set! update-response set-response! + response-has-message-body-for-request? + + write-response response-parsers response-unparsers read-response + finish-response-body http-0.9-response-parser http-0.9-response-unparser + http-1.0-response-parser http-1.0-response-unparser + http-1.x-response-parser http-1.x-response-unparser + http-status-codes http-status->code&reason + + ;; http-header-parsers + header-contents header-values header-value header-params header-param + get-value get-params get-param + + split-multi-header parse-token parse-comment + parse-params parse-value+params unparse-params + multiple single make-key/value-subparser + + rfc1123-string->time rfc850-string->time asctime-string->time + http-date-string->time + rfc1123-subparser rfc850-subparser asctime-subparser http-date-subparser + product-subparser quality-subparser unknown-header-parser + filename-subparser symbol-subparser symbol-subparser-ci natnum-subparser + host/port-subparser base64-subparser range-subparser filename-subparser + etag-parser software-parser mailbox-subparser + if-range-parser retry-after-subparser via-parser warning-parser + key/value-subparser set-cookie-parser cache-control-parser pragma-parser + te-parser cookie-parser strict-transport-security-parser + + must-be-quoted-chars quote-string unparse-token + default-header-unparser etag-unparser host/port-unparser + product-unparser software-unparser rfc1123-unparser cookie-unparser + strict-transport-security-unparser + + ;; Subparsers/subunparsers + authorization-param-subparsers + basic-auth-param-subparser digest-auth-param-subparser + + authorization-param-subunparsers + basic-auth-param-subunparser digest-auth-param-subunparser + ) + +(import scheme (chicken base) (chicken foreign) (chicken irregex) + (chicken format) (chicken io) (chicken string) + (chicken time posix) (chicken pathname) (chicken fixnum) + (chicken condition) (chicken port) (chicken syntax) + srfi-1 srfi-13 srfi-14 base64 uri-common defstruct) + +;; The below can all be #f if you want no limit (not recommended!) +(define http-line-limit (make-parameter 4096)) +(define http-header-limit (make-parameter 64)) +(define http-urlencoded-request-data-limit (make-parameter (* 4 1024 1024))) + +(define (read-urlencoded-request-data + request #!optional (max-length (http-urlencoded-request-data-limit))) + (let* ((p (request-port request)) + (len (header-value 'content-length (request-headers request))) + ;; For simplicity's sake, we don't allow exactly the max request limit + (limit (if (and len max-length) + (min len max-length) + (or max-length len))) + (data (read-string limit (request-port request)))) + (if (and (not (eof-object? data)) max-length (= max-length (string-length data))) + (signal-http-condition + 'read-urlencoded-request-data + "Max allowed URLencoded request size exceeded" + (list request max-length) + 'urlencoded-request-data-limit-exceeded + 'contents data 'limit limit) + (form-urldecode data)))) + +(define (raise-line-limit-exceeded-error line limit port) + (let ((safe-line-prefix + (if (< limit 128) + (sprintf "~A[..and more (was limited to ~A)..]" line limit) + (sprintf "~A[..~A+ more chars (was limited to ~A)..]" + (substring line 0 128) (- limit 128) limit)))) + (signal-http-condition + 'safe-read-line + "Max allowed line length exceeded" + (list port safe-line-prefix) + 'line-limit-exceeded 'contents line 'limit limit))) + +(define (safe-read-line p) + (let* ((limit (http-line-limit)) + (line (read-line p (http-line-limit)))) + (if (and (not (eof-object? line)) limit (= limit (string-length line))) + (raise-line-limit-exceeded-error line limit p) + line))) + +;; Make headers a new type, to force the use of the HEADERS procedure +;; and ensure only proper header values are passed to all procedures +;; that deal with headers. +(define-record headers v) + +(define-record-printer (headers h out) + (fprintf out "#(headers: ~S)" (headers-v h))) + +(define headers->list headers-v) + +(define (remove-header! name headers) + (let loop ((h (headers-v headers))) + (cond + ((null? h) headers) + ((eq? name (caar h)) + (set-cdr! h (cdr h)) + headers) + (else (loop (cdr h)))))) + +(define (remove-header name headers) + (make-headers + (let loop ((h (headers-v headers))) + (cond + ((null? h) h) + ((eq? name (caar h)) (loop (cdr h))) + (else (cons (car h) (loop (cdr h)))))))) + +;; Check that the header values are valid vectors, and that if there +;; is a raw value, there is only one value at all. +(define (check-header-values loc name contents) + (let lp ((mode 'unknown) (todo contents)) + (let ((head (car todo))) + (if (not (and (vector? head) (= 2 (vector-length head)))) + (signal-http-condition + loc "header values must be vectors of length 2" + (list name contents) 'header-value) + (let ((type (if (eq? (get-params head) 'raw) 'raw 'cooked))) + (unless (or (eq? mode 'unknown) (eq? mode type)) + (signal-http-condition + loc "When using raw headers, all values must be raw" + (list name contents) 'header-value) + (lp type (cdr todo)))))))) + +;; XXX: Do we need these replace procedures in the exports list? It +;; looks like we can use update everywhere. +(define (replace-header-contents! name contents headers) + (check-header-values 'replace-header-contents! name contents) + (let loop ((h (headers-v headers))) + (cond + ((null? h) + (headers-v-set! + headers (cons (cons name contents) (headers-v headers))) + headers) + ((eq? name (caar h)) + (set-cdr! (car h) contents) + headers) + (else (loop (cdr h)))))) + +(define (replace-header-contents name contents headers) + (check-header-values 'replace-header-contents! name contents) + (make-headers + (let loop ((h (headers-v headers))) + (cond + ((null? h) (cons (cons name contents) h)) + ((eq? name (caar h)) + (cons (cons (caar h) contents) (cdr h))) + (else (cons (car h) (loop (cdr h)))))))) + +(define (make-updater replacer) + (lambda (name contents headers) + (let ((old (header-contents name headers '()))) + (replacer name + (if (member name (single-headers)) + (list (last contents)) + (append old contents)) + headers)))) + +(define update-header-contents (make-updater replace-header-contents)) +(define update-header-contents! (make-updater replace-header-contents!)) + +(define http-name->symbol (compose string->symbol string-downcase!)) +(define symbol->http-name (compose string-titlecase symbol->string)) + +;; Make a header set from a literal expression by folding in the headers +;; with any previous ones +(define (headers headers-to-be #!optional (old-headers (make-headers '()))) + (fold (lambda (h new-headers) + (update-header-contents + (car h) + (map (lambda (v) + (if (vector? v) v (vector v '()))) ; normalize to vector + (cdr h)) + new-headers)) + old-headers + headers-to-be)) + +(define (normalized-uri str) + (and-let* ((uri (uri-reference str))) + (uri-normalize-path-segments uri))) + +(include "header-parsers") ; Also includes header unparsers + +;; Any unknown headers are considered to be multi-headers, always +(define single-headers + (make-parameter '(accept-ranges age authorization content-disposition + content-length content-location content-md5 content-type + date etag expect expires host if-modified-since + if-unmodified-since last-modified location max-forwards + proxy-authorization range referer retry-after server + transfer-encoding user-agent www-authenticate))) + +(define string->http-method string->symbol) +(define http-method->string symbol->string) + +;; Make an output port automatically "chunked" +(define (chunked-output-port port) + (let ((chunked-port + (make-output-port (lambda (s) ; write + (let ((len (string-length s))) + (unless (zero? len) + (fprintf port "~X\r\n~A\r\n" len s)))) + (lambda () ; close + (close-output-port port)) + (lambda () ; flush + (flush-output port))))) + ;; first "reserved" slot + ;; Slot 7 should probably stay 'custom + (##sys#setslot chunked-port 10 'chunked-output-port) + ;; second "reserved" slot + (##sys#setslot chunked-port 11 port) + chunked-port)) + +;; Make an input port automatically "chunked" +(define (chunked-input-port port) + (let* ((chunk-length 0) + (position 0) + (check-position (lambda () + (when (and position (>= position chunk-length)) + (unless (eq? chunk-length 0) + (safe-read-line port)) ; Read \r\n data trailer + (let ((line (safe-read-line port))) + (if (eof-object? line) + (set! position #f) + (begin + (set! chunk-length (string->number line 16)) + (cond + ((not chunk-length) (set! position #f)) + ((zero? chunk-length) ; Read final data trailer + (safe-read-line port) + (set! position #f)) + (else (set! position 0)))))))))) + (make-input-port (lambda () ; read-char + (check-position) + (if position + (let ((char (read-char port))) + (unless (eof-object? char) + (set! position (add1 position))) + char) + #!eof)) + (lambda () ; ready? + (check-position) + (or (not position) (char-ready? port))) + (lambda () ; close + (close-input-port port)) + (lambda () ; peek-char + (check-position) + (if position + (peek-char port) + #!eof)) + (lambda (p bytes buf off) ; read-string! + (let lp ((todo bytes) + (total-bytes-read 0) + (off off)) + (check-position) + (if (or (not position) (= todo 0)) + total-bytes-read + (let* ((n (min todo (- chunk-length position))) + (bytes-read (read-string! n buf port off))) + (set! position (+ position bytes-read)) + (lp (- todo bytes-read) + (+ total-bytes-read bytes-read) + (+ off bytes-read))))))))) +;; TODO: Note that in the above, read-line is not currently +;; implemented. It is *extremely* tricky to correctly maintain the +;; port position when all \r *AND/OR* \n characters get chopped off +;; the line-string. It can be done by maintaining our own extra +;; buffer, but that complicates all the procedures here enormously, +;; including read-line itself. + +;; RFC2616, Section 4.3: "The presence of a message-body in a request +;; is signaled by the inclusion of a Content-Length or Transfer-Encoding +;; header field in the request's message-headers." +;; We don't check the method since "a server SHOULD read and forward the +;; a message-body on any request", even it shouldn't be sent for that method. +;; +;; Because HTTP/1.0 has no official definition of when a message body +;; is present, we'll assume it's always present, unless there is no +;; content-length and we have a keep-alive connection. +(define request-has-message-body? + (make-parameter + (lambda (req) + (let ((headers (request-headers req))) + (if (and (= 1 (request-major req)) (= 0 (request-minor req))) + (not (eq? 'keep-alive (header-contents 'connection headers))) + (or (header-contents 'content-length headers) + (header-contents 'transfer-encoding headers))))))) + +;; RFC2616, Section 4.3: "For response messages, whether or not a +;; message-body is included with a message is dependent on both the +;; request method and the response status code (section 6.1.1)." +(define response-has-message-body-for-request? + (make-parameter + (lambda (resp req) + (not (or (= (response-class resp) 100) + (memv (response-code resp) '(204 304)) + (eq? 'HEAD (request-method req))))))) + +;; OPTIONS and TRACE are not explicitly mentioned in section 9.1.1, +;; but section 9.1.2 says they SHOULD NOT have side-effects by +;; definition, which means they are safe, as well. +(define safe-methods + (make-parameter '(GET HEAD OPTIONS TRACE))) + +;; RFC2616, Section 9.1.1 +(define (safe? obj) + (let ((method (if (request? obj) (request-method obj) obj))) + (not (not (member method (safe-methods)))))) + +(define idempotent-methods + (make-parameter '(GET HEAD PUT DELETE OPTIONS TRACE))) + +;; RFC2616, Section 9.1.2 +(define (idempotent? obj) + (let ((method (if (request? obj) (request-method obj) obj))) + (not (not (member method (idempotent-methods)))))) + +(define (keep-alive? obj) + (let ((major (if (request? obj) (request-major obj) (response-major obj))) + (minor (if (request? obj) (request-minor obj) (response-minor obj))) + (con (header-value 'connection (if (request? obj) + (request-headers obj) + (response-headers obj))))) + (if (and (= major 1) (> minor 0)) + (not (eq? con 'close)) + ;; RFC 2068, section 19.7.1 (see also RFC 2616, section 19.6.2) + (eq? con 'keep-alive)))) + +(define (etag=? a b) + (and (not (eq? 'weak (car a))) + (eq? (car a) (car b)) + (string=? (cdr a) (cdr b)))) + +(define (etag=-weakly? a b) + (and (eq? (car a) (car b)) + (string=? (cdr a) (cdr b)))) + +(define (etag-matches? etag matchlist) + (any (lambda (m) (or (eq? m '*) (etag=? etag m))) matchlist)) + +(define (etag-matches-weakly? etag matchlist) + (any (lambda (m) (or (eq? m '*) (etag=-weakly? etag m))) matchlist)) + +;;;;;;;;;;;;;;;;;;;;;;;;; +;;;; Request parsing ;;;; +;;;;;;;;;;;;;;;;;;;;;;;;; + +;; This includes parsers for all RFC-defined headers +(define header-parsers + (make-parameter + `((accept . ,(multiple symbol-subparser-ci + `((q . ,quality-subparser)))) + (accept-charset . ,(multiple symbol-subparser-ci + `((q . ,quality-subparser)))) + (accept-encoding . ,(multiple symbol-subparser-ci + `((q . ,quality-subparser)))) + (accept-language . ,(multiple symbol-subparser-ci + `((q . ,quality-subparser)))) + (accept-ranges . ,(single symbol-subparser-ci)) + (age . ,(single natnum-subparser)) + (allow . ,(multiple symbol-subparser)) + (authorization . ,authorization-parser) + (cache-control . ,cache-control-parser) + (connection . ,(multiple symbol-subparser-ci)) + (content-encoding . ,(multiple symbol-subparser-ci)) + (content-language . ,(multiple symbol-subparser-ci)) + (content-length . ,(single natnum-subparser)) + (content-location . ,(single normalized-uri)) + (content-md5 . ,(single base64-subparser)) + (content-range . ,(single range-subparser)) + (content-type . ,(single symbol-subparser-ci + `((charset . ,symbol-subparser-ci)))) + (date . ,(single http-date-subparser)) + (etag . ,etag-parser) + (expect . ,(single (make-key/value-subparser '()))) + (expires . ,(single http-date-subparser)) + (from . ,(multiple mailbox-subparser)) + (host . ,(single host/port-subparser)) + (if-match . ,if-match-parser) + (if-modified-since . ,(single http-date-subparser)) + (if-none-match . ,if-match-parser) + (if-range . ,if-range-parser) + (if-unmodified-since . ,(single http-date-subparser)) + (last-modified . ,(single http-date-subparser)) + (location . ,(single normalized-uri)) + (max-forwards . ,(single natnum-subparser)) + (pragma . ,pragma-parser) + (proxy-authenticate . ,authenticate-parser) + (proxy-authorization . ,authorization-parser) + (range . ,(multiple range-subparser)) + (referer . ,(single normalized-uri)) + (retry-after . ,(single retry-after-subparser)) + (server . ,software-parser) + (te . ,te-parser) + (trailer . ,(multiple symbol-subparser-ci)) + (transfer-encoding . ,(single symbol-subparser-ci)) + (upgrade . ,(multiple product-subparser)) + (user-agent . ,software-parser) + (vary . ,(multiple symbol-subparser-ci)) + (via . ,via-parser) + (warning . ,warning-parser) + (www-authenticate . ,authenticate-parser) + ;; RFC 2183 + (content-disposition . ,(single symbol-subparser-ci + `((filename . ,filename-subparser) + (creation-date . ,rfc1123-subparser) + (modification-date . ,rfc1123-subparser) + (read-date . ,rfc1123-subparser) + (size . ,natnum-subparser)))) + ;; RFC 2109 + (set-cookie . ,set-cookie-parser) + (cookie . ,cookie-parser) + ;; + ;; TODO: RFC 2965? + ;; + ;; RFC 6797 + (strict-transport-security . ,strict-transport-security-parser) + ;; Nonstandard but common headers + (x-forwarded-for . ,(multiple identity)) + ))) + +(define header-parse-error-handler ;; ignore errors + (make-parameter (lambda (header-name contents headers exn) headers))) + +;; The parser is supposed to return a list of header values for its header +(define (parse-header name contents) + (let* ((default unknown-header-parser) + (parser (alist-ref name (header-parsers) eq? default))) + (parser contents))) + +(define (parse-header-line line headers) + (or + (and-let* ((colon-idx (string-index line #\:)) + (header-name (http-name->symbol (string-take line colon-idx))) + (contents (string-trim-both (string-drop line (add1 colon-idx))))) + (handle-exceptions + exn + ((header-parse-error-handler) header-name contents headers exn) + (update-header-contents! + header-name (parse-header header-name contents) headers))) + (signal-http-condition + 'parse-header-line "Bad header line" (list line) + 'header-error 'contents line))) + +;; XXXX: Bottleneck? +(define (read-headers port) + (if (eof-object? (peek-char port)) ; Yeah, so sue me + (make-headers '()) + (let ((header-limit (http-header-limit)) + (line-limit (http-line-limit))) + (let lp ((c (read-char port)) + (ln '()) + (headers (make-headers '())) + (hc 0) + (len 0)) + (cond ((eqv? len line-limit) + (raise-line-limit-exceeded-error + (reverse-list->string ln) line-limit port)) + ((eof-object? c) + (if (null? ln) + headers + (parse-header-line (reverse-list->string ln) headers))) + ;; Only accept CRLF (we're not this strict everywhere...) + ((and (eqv? c #\return) (eqv? (peek-char port) #\newline)) + (read-char port) ; Consume and discard NL + (if (null? ln) ; Nothing came before: end of headers + headers + (let ((pc (peek-char port))) + (if (and (not (eof-object? pc)) + (or (eqv? pc #\space) (eqv? pc #\tab))) + ;; If the next line starts with whitespace, + ;; it's a continuation line of the same + ;; header. See section 2.2 of RFC 2616. + (let skip ((pc (read-char port)) (len len) (ln ln)) + (if (and (not (eqv? len line-limit)) + (or (eqv? pc #\space) (eqv? pc #\tab))) + (skip (read-char port) (add1 len) (cons pc ln)) + (lp pc ln headers hc len))) + (let* ((ln (reverse-list->string ln)) + (headers (parse-header-line ln headers)) + (hc (add1 hc))) + (when (eqv? hc header-limit) + (signal-http-condition + 'read-headers + "Max allowed header count exceeded" + (list port) + 'header-limit-exceeded + 'contents ln + 'headers headers + 'limit header-limit)) + (lp (read-char port) '() headers hc 0)))))) + ((eqv? c #\") + (let lp2 ((c2 (read-char port)) + (ln (cons c ln)) + (len len)) + (cond ((or (eqv? 0 len) (eof-object? c2)) + (lp c2 ln headers hc len)) + ((eqv? c2 #\") + (lp (read-char port) (cons c2 ln) + headers hc (add1 len))) + ((eqv? c2 #\\) + (let ((c3 (read-char port)) + (len len)) + (if (or (eof-object? c3) (eqv? 0 len)) + (lp c3 (cons c2 ln) headers hc len) + (lp2 (read-char port) + (cons c3 (cons c2 ln)) + (add1 len))))) + (else + (lp2 (read-char port) (cons c2 ln) (add1 len)))))) + (else + (lp (read-char port) (cons c ln) headers hc (add1 len)))))))) + +(define (signal-http-condition loc msg args type . more-info) + (signal (make-composite-condition + (make-property-condition 'http) + (apply make-property-condition type more-info) + (make-property-condition + 'exn 'location loc 'message msg 'arguments args)))) + +(defstruct request + (method 'GET) uri (major 1) (minor 1) (headers (make-headers '())) port) + +;; Perhaps we should have header parsers indexed by version or +;; something like that, so you can define the maximum version. Useful +;; for when expecting a response. Then we group request/response parsers +;; together, as with request/response unparsers. +(define http-0.9-request-parser + (let ((req (irregex '(seq (w/nocase "GET") (+ space) (=> uri (* any)))))) + (lambda (line in) + (and-let* ((m (irregex-match req line)) + (uri (normalized-uri (irregex-match-substring m 'uri)))) + (make-request method: 'GET uri: uri + major: 0 minor: 9 port: in))))) + +;; Might want to reuse this elsewhere +(define token-sre '(+ (~ "()<>@,;:\\\"/[]?={}\t "))) + +;; XXX This actually parses anything >= HTTP/1.0 +(define http-1.x-request-parser + (let ((req (irregex `(seq (=> method ,token-sre) (+ space) + (=> uri (+ (~ blank))) ; uri-common handles details + (+ space) (w/nocase "HTTP/") + (=> major (+ digit)) "." (=> minor (+ digit)))))) + (lambda (line in) + (and-let* ((m (irregex-match req line)) + (uri-string (irregex-match-substring m 'uri)) + (major (string->number (irregex-match-substring m 'major))) + (minor (string->number (irregex-match-substring m 'minor))) + (method (string->http-method (irregex-match-substring m 'method))) + (headers (read-headers in))) + (let* ((wildcard (string=? uri-string "*")) + (uri (and (not wildcard) (normalized-uri uri-string))) + ;; HTTP/1.0 has no chunking + (port (if (and (or (> major 1) (>= minor 1)) + (memq 'chunked + (header-values + 'transfer-encoding headers))) + (chunked-input-port in) + in))) + ;; HTTP/1.1 allows several "things" as "URI" (RFC2616, 5.1.2): + ;; Request-URI = "*" | absoluteURI | abs_path | authority + ;; + ;; HTTP/1.0, URIs are more limited (RFC1945, 5.1.2): + ;; Request-URI = absoluteURI | abs_path + ;; + ;; Currently, a plain authority is not accepted. This would + ;; require deep changes in the representation of request + ;; objects. It is only used in CONNECT requests, so + ;; currently not much of a problem. If we want to support + ;; this, we'd need a separate object type and expose a + ;; parser from uri-generic/uri-common for just authority. + (and (or (and wildcard (or (> major 1) (>= minor 1))) + (and uri (or (absolute-uri? uri) + (and (uri-path-absolute? uri) + (not (uri-host uri)))))) + (make-request method: method uri: uri + major: major minor: minor + headers: headers + port: port))))))) + +(define request-parsers ; order matters here + (make-parameter (list http-1.x-request-parser))) + +(define (read-request inport) + (let ((line (safe-read-line inport))) + (and (not (eof-object? line)) + ;; Try each parser in turn to process the request-line. + ;; A parser returns either #f or a request object + (let loop ((parsers (request-parsers))) + (if (null? parsers) + (signal-http-condition + 'read-request "Unknown protocol line" (list line) + 'unknown-protocol-line 'line line) + (or ((car parsers) line inport) (loop (cdr parsers)))))))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;; Request unparsing ;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define header-unparsers + (make-parameter + `((content-disposition . ,content-disposition-unparser) + (date . ,rfc1123-unparser) + (etag . ,etag-unparser) + (expires . ,rfc1123-unparser) + (host . ,host/port-unparser) + (if-match . ,if-match-unparser) + (if-modified-since . ,rfc1123-unparser) + (if-none-match . ,if-match-unparser) + (if-unmodified-since . ,rfc1123-unparser) + (last-modified . ,rfc1123-unparser) + (user-agent . ,software-unparser) + (server . ,software-unparser) + (upgrade . ,product-unparser) + (cookie . ,cookie-unparser) + (set-cookie . ,set-cookie-unparser) + (authorization . ,authorization-unparser) + (www-authenticate . ,authenticate-unparser) + (proxy-authorization . ,authorization-unparser) + (proxy-authenticate . ,authenticate-unparser) + (via . ,via-unparser) + ;; RFC 6797 + (strict-transport-security . ,strict-transport-security-unparser)))) + +(define (unparse-header header-name header-value) + (cond ((and (not (null? header-value)) + (eq? 'raw (get-params (car header-value)))) + (map get-no-newline-value header-value)) + ((assq header-name (header-unparsers)) + => (lambda (unparser) ((cdr unparser) header-value))) + (else (default-header-unparser header-value)))) + +(define (unparse-headers headers out) + (let ((unparsers (header-unparsers))) ; Don't access parameter for each header + (for-each + (lambda (h) + (let* ((name (car h)) + (name-s (symbol->http-name name)) + (contents (cdr h)) + (unparse (cond ((assq name unparsers) => cdr) ; inlined for perf + (else default-header-unparser)))) + (handle-exceptions exn + (if ((condition-predicate 'http) exn) + (signal exn) ;; Do not tamper with our own custom errors + (let* ((none "(no error message provided in original exn)") + (msg ((condition-property-accessor + 'exn 'message none) exn)) + (loc ((condition-property-accessor + 'exn 'location #f) exn)) + (args ((condition-property-accessor + 'exn 'arguments '()) exn))) + (signal-http-condition + 'unparse-headers + (sprintf "could not unparse ~S header ~S: ~A~A" + name-s contents (if loc (sprintf "(~A) " loc) "") msg) + args + 'unparse-error + 'header-name name + 'header-value contents + 'unparser unparse + 'original-exn exn))) + (let ((lines (if (and (not (null? contents)) + (eq? 'raw (get-params (car contents)))) + (map get-no-newline-value contents) + (unparse contents)))) + (for-each (lambda (value) + ;; Verify there's no \r\n or \r or \n in value? + (display (string-append name-s ": " value "\r\n") out)) + lines))))) + (headers-v headers)))) + +;; Use string-append and display rather than fprintf so the line gets +;; written in one burst. This supposedly avoids a strange race +;; condition, see #800. We use string-append instead of sprintf for +;; performance reasons. This is not exported, and our callers compare +;; request-major and request-minor so we can assume they're numbers. +(define (write-request-line request) + (let ((uri (request-uri request))) + (display (string-append + (http-method->string (request-method request)) + " " (if uri (uri->string uri) "*") " HTTP/" + (number->string (request-major request)) "." + (number->string (request-minor request)) "\r\n") + (request-port request)))) + +(define (http-0.9-request-unparser request) + (display (string-append "GET " (uri->string (request-uri request)) "\r\n") + (request-port request)) + request) + +(define (http-1.0-request-unparser request) + (and-let* (((= (request-major request) 1)) + ((= (request-minor request) 0)) + (o (request-port request))) + (write-request-line request) + (unparse-headers (request-headers request) o) + (display "\r\n" o) + request)) + +;; XXX This actually unparses anything >= HTTP/1.1 +(define (http-1.x-request-unparser request) + (and-let* (((or (> (request-major request) 1) + (and (= (request-major request) 1) + (> (request-minor request) 0)))) + (o (request-port request))) + (write-request-line request) + (unparse-headers (request-headers request) o) + (display "\r\n" o) + (if (memq 'chunked (header-values 'transfer-encoding + (request-headers request))) + (update-request request + port: (chunked-output-port (request-port request))) + request))) + +(define request-unparsers ; order matters here + (make-parameter (list http-1.x-request-unparser http-1.0-request-unparser))) + +(define (write-request request) + ;; Try each unparser in turn to write the request-line. + ;; An unparser returns either #f or a new request object. + (let loop ((unparsers (request-unparsers))) + (if (null? unparsers) + (let ((major (request-major request)) + (minor (request-minor request))) + (signal-http-condition + 'write-request + "Unknown protocol" (list (conc major "." minor)) + 'unknown-protocol 'major major 'minor minor)) + (or ((car unparsers) request) (loop (cdr unparsers)))))) + +;; Required for chunked requests. This is a bit of a hack! +(define (finish-request-body request) + (when (and (memq 'chunked (header-values 'transfer-encoding + (request-headers request))) + (eq? (##sys#slot (request-port request) 10) 'chunked-output-port)) + (display "0\r\n\r\n" (##sys#slot (request-port request) 11))) + request) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;; Response unparsing ;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(defstruct response + (code 200) (reason "OK") (major 1) (minor 1) (headers (make-headers '())) port) + +(define make-response + (let ((old-make-response make-response)) + (lambda (#!rest args #!key status code reason) + (let ((resp (apply old-make-response args))) + (when (and status (not code) (not reason)) + (response-status-set! resp status)) + resp)))) + +(define update-response + (let ((old-update-response update-response)) + (lambda (resp #!rest args #!key status code reason) + (let ((resp (apply old-update-response resp args))) + (when (and status (not code) (not reason)) + (response-status-set! resp status)) + resp)))) + +(define (response-status-set! resp status) + (receive (code reason) (http-status->code&reason status) + (response-code-set! resp code) + (response-reason-set! resp reason) + resp)) + +(define (response-class obj) + (let ((code (if (response? obj) (response-code obj) obj))) + (- code (modulo code 100)))) + +(define (response-status obj) + (let* ((c (if (response? obj) (response-code obj) obj)) + (s (find (lambda (x) (= (cadr x) c)) (http-status-codes)))) + (if s + (car s) + (signal-http-condition + 'response-status "Unknown status code" (list c) + 'unknown-code 'code c)))) + +(define (http-status->code&reason status) + (let ((s (alist-ref status (http-status-codes)))) + (unless s + (signal-http-condition + 'http-status->code&reason + ;; haha, status symbol ;) + "Unknown response status symbol" + (list status) 'unknown-status 'status status)) + (values (car s) (cdr s)))) + +;; List of HTTP status codes based on: +;; http://www.iana.org/assignments/http-status-codes/http-status-codes.xml +(define http-status-codes + (make-parameter + `((continue . (100 . "Continue")) + (switching-protocols . (101 . "Switching Protocols")) + (processing . (102 . "Processing")) + (ok . (200 . "OK")) + (created . (201 . "Created")) + (accepted . (202 . "Accepted")) + (non-authoritative-information . (203 . "Non-Authoritative Information")) + (no-content . (204 . "No Content")) + (reset-content . (205 . "Reset Content")) + (partial-content . (206 . "Partial Content")) + (multi-status . (207 . "Multi-Status")) + (already-reported . (208 . "Already Reported")) + (im-used . (226 . "IM Used")) + (multiple-choices . (300 . "Multiple Choices")) + (moved-permanently . (301 . "Moved Permanently")) + (found . (302 . "Found")) + (see-other . (303 . "See Other")) + (not-modified . (304 . "Not Modified")) + (use-proxy . (305 . "Use Proxy")) + (temporary-redirect . (307 . "Temporary Redirect")) + (bad-request . (400 . "Bad Request")) + (unauthorized . (401 . "Unauthorized")) + (payment-required . (402 . "Payment Required")) + (forbidden . (403 . "Forbidden")) + (not-found . (404 . "Not Found")) + (method-not-allowed . (405 . "Method Not Allowed")) + (not-acceptable . (406 . "Not Acceptable")) + (proxy-authentication-required . (407 . "Proxy Authentication Required")) + (request-time-out . (408 . "Request Time-out")) + (conflict . (409 . "Conflict")) + (gone . (410 . "Gone")) + (length-required . (411 . "Length Required")) + (precondition-failed . (412 . "Precondition Failed")) + (request-entity-too-large . (413 . "Request Entity Too Large")) + (request-uri-too-large . (414 . "Request-URI Too Large")) + (unsupported-media-type . (415 . "Unsupported Media Type")) + (requested-range-not-satisfiable . (416 . "Requested Range Not Satisfiable")) + (expectation-failed . (417 . "Expectation Failed")) + (unprocessable-entity . (422 . "Unprocessable Entity")) + (locked . (423 . "Locked")) + (failed-dependency . (424 . "Failed Dependency")) + (upgrade-required . (426 . "Upgrade Required")) + (precondition-required . (428 . "Precondition Required")) + (too-many-requests . (429 . "Too Many Requests")) + (request-header-fields-too-large . (431 . "Request Header Fields Too Large")) + (internal-server-error . (500 . "Internal Server Error")) + (not-implemented . (501 . "Not Implemented")) + (bad-gateway . (502 . "Bad Gateway")) + (service-unavailable . (503 . "Service Unavailable")) + (gateway-time-out . (504 . "Gateway Time-out")) + (http-version-not-supported . (505 . "HTTP Version Not Supported")) + (insufficient-storage . (507 . "Insufficient Storage")) + (loop-detected . (508 . "Loop Detected")) + (not-extended . (510 . "Not Extended")) + (network-authentication-required . (511 . "Network Authentication Required"))))) + +(define (http-0.9-response-unparser response) + response) ;; The response-body will just follow + +;; See notes at write-request-line +(define (write-response-line response) + (display (string-append + "HTTP/" + (number->string (response-major response)) "." + (number->string (response-minor response)) " " + (->string (response-code response)) " " + (->string (response-reason response)) "\r\n") + (response-port response))) + +(define (http-1.0-response-unparser response) + (and-let* (((= (response-major response) 1)) + ((= (response-minor response) 0)) + (o (response-port response))) + (write-response-line response) + (unparse-headers (response-headers response) o) + (display "\r\n" o) + response)) + +;; XXX This actually unparses anything >= HTTP/1.1 +(define (http-1.x-response-unparser response) + (and-let* (((or (> (response-major response) 1) + (and (= (response-major response) 1) + (> (response-minor response) 0)))) + (o (response-port response))) + (write-response-line response) + (unparse-headers (response-headers response) o) + (display "\r\n" o) + (if (memq 'chunked (header-values 'transfer-encoding + (response-headers response))) + (update-response response + port: (chunked-output-port (response-port response))) + response))) + +(define response-unparsers + (make-parameter (list http-1.x-response-unparser http-1.0-response-unparser))) + +(define (write-response response) + ;; Try each unparser in turn to write the response-line. + ;; An unparser returns either #f or a new response object. + (let loop ((unparsers (response-unparsers))) + (if (null? unparsers) + (let ((major (response-major response)) + (minor (response-minor response))) + (signal-http-condition + 'write-response + "Unknown protocol" (list (conc major "." minor)) + 'unknown-protocol 'major major 'minor minor)) + (or ((car unparsers) response) (loop (cdr unparsers)))))) + +;; Required for chunked requests. This is a bit of a hack! +(define (finish-response-body response) + (when (and (memq 'chunked (header-values 'transfer-encoding + (response-headers response))) + (eq? (##sys#slot (response-port response) 10) 'chunked-output-port)) + (display "0\r\n\r\n" (##sys#slot (response-port response) 11))) + response) + +;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;; Response parsing ;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define http-1.x-response-parser + (let ((resp (irregex '(seq (w/nocase "HTTP/") + (=> major (+ digit)) "." (=> minor (+ digit)) + ;; Could use '(= 3 digit) for status-code, but + ;; that's currently not compilable + (+ space) (=> status-code digit digit digit) + (+ space) (=> reason-phrase (* nonl)))))) + (lambda (line in) + (and-let* ((m (irregex-match resp line)) + (code (string->number (irregex-match-substring m 'status-code))) + (major (string->number (irregex-match-substring m 'major))) + (minor (string->number (irregex-match-substring m 'minor))) + ((or (> major 1) (and (= major 1) (> minor 0)))) + (reason (irregex-match-substring m 'reason-phrase)) + (h (read-headers in)) + (port (if (memq 'chunked (header-values 'transfer-encoding h)) + (chunked-input-port in) + in))) + (make-response code: code reason: reason + major: major minor: minor + headers: h + port: port))))) + +(define http-1.0-response-parser + (let ((resp (irregex '(seq (w/nocase "HTTP/1.0") + ;; Could use '(= 3 digit) for status-code, but + ;; that's currently not compilable + (+ space) (=> status-code digit digit digit) + (+ space) (=> reason-phrase (* nonl)))))) + (lambda (line in) + (and-let* ((m (irregex-match resp line)) + (code (string->number (irregex-match-substring m 'status-code))) + (reason (irregex-match-substring m 'reason-phrase)) + (h (read-headers in))) + ;; HTTP/1.0 has no chunking + (make-response code: code reason: reason + major: 1 minor: 0 + headers: h + port: in))))) + +;; You can't "detect" a 0.9 response, because there is no response line. +;; It will simply output the body directly, so we will just assume that +;; if we can't recognise the output string, we just got a 0.9 response. +(define (http-0.9-response-parser line in) + (make-response code: 200 reason: "OK" + major: 0 minor: 9 + ;; XXX This is wrong, it re-inserts \r\n, while it may have + ;; been a \n only. To work around this, we'd have to write + ;; a custom (safe-)read-line procedure. + ;; However, it does not matter much because HTTP 0.9 is only + ;; defined to ever return text/html, no binary or any other + ;; content type. + port: (call-with-input-string (string-append line "\r\n") + (lambda (str) + (make-concatenated-port str in))))) + +(define response-parsers ;; order matters here + (make-parameter (list http-1.x-response-parser http-1.0-response-parser))) + +(define (read-response inport) + (let ((line (safe-read-line inport))) + (and (not (eof-object? line)) + (let loop ((parsers (response-parsers))) + (if (null? parsers) + (signal-http-condition + 'read-response "Unknown protocol line" (list line) + 'unknown-protocol-line 'line line) + (or ((car parsers) line inport) (loop (cdr parsers)))))))) + +) diff --git a/tests/run.scm b/tests/run.scm new file mode 100644 index 0000000..e2d0b70 --- /dev/null +++ b/tests/run.scm @@ -0,0 +1,1243 @@ +(import scheme chicken.base chicken.port + chicken.condition chicken.time.posix srfi-1 srfi-18 + test uri-common intarweb chicken.io chicken.format) + +;; Below, there are specific tests for when these do have a value +(http-header-limit #f) +(http-line-limit #f) +(http-urlencoded-request-data-limit #f) + +(define-syntax test-error* + (syntax-rules () + ((_ ?msg (?error-type ...) ?expr) + (let-syntax ((expression: + (syntax-rules () + ((_ ?expr) + (condition-case (begin ?expr "<no error thrown>") + ((?error-type ...) '(?error-type ...)) + (exn () (##sys#slot exn 1))))))) + (test ?msg '(?error-type ...) (expression: ?expr)))) + ((_ ?msg ?error-type ?expr) + (test-error* ?msg (?error-type) ?expr)) + ((_ ?error-type ?expr) + (test-error* (sprintf "~S" '?expr) ?error-type ?expr)))) + +(header-parse-error-handler (lambda (header-name contents header exn) + (raise exn))) + +(define (test-read-headers str) + (call-with-input-string str read-headers)) + +(test-begin "intarweb") +(test-group "headers" + (test-group "single headers" + (parameterize ((single-headers '(foo qux)) + (header-parsers `((foo . ,(single identity)) + (qux . ,(single identity))))) + (let ((headers (test-read-headers "foo: bar\r\nqux:\t \tmooh\t \r\n\r\n"))) + (test "Basic test" + '("bar") (header-values 'foo headers)) + ;; RFC 2616 4.2 + (test "Extra spaces are ignored" + '("mooh") (header-values 'qux headers))) + (let ((headers (test-read-headers "foo: bar\r\n qux: mooh\r\nquux: mumble\r\n\r\n"))) + ;; RFC 2616 2.2 + (test "Continuation chars" + '("bar qux: mooh") (header-values 'foo headers))) + ;; Not in RFC but common behaviour - also, robustness principle + (let ((headers (test-read-headers "foo: bar\r\nfoo: qux\r\n"))) + (test "Multiple headers for singular header types discarded" + '("qux") (header-values 'foo headers))))) + ;; All this RFC 2616 4.2 + (test-group "multi-headers" + (parameterize ((header-parsers `((foo . ,(multiple identity))))) + (let ((headers (test-read-headers "foo: bar\r\nfoo: qux\r\nquux: mumble\r\n\r\n"))) + (test "Multiple headers" + '("bar" "qux") (header-values 'foo headers))) + (let ((headers (test-read-headers "Foo: bar\r\nFoO: qux\r\nquux: mumble\r\n\r\n"))) + (test "Multiple headers: case insensitivity" + '("bar" "qux") (header-values 'foo headers))) + (let ((headers (test-read-headers "foo: bar, qux\r\nquux: mumble\r\n\r\n"))) + (test "Comma-separated headers" + '("bar" "qux") (header-values 'foo headers))) + (let ((headers (test-read-headers "foo: \"ba\\\"r, qux\"\r\nfoo: mooh\r\n\r\n"))) + (test "Quoted headers" + '("ba\"r, qux" "mooh") (header-values 'foo headers)))) + ;; RFC 2616 4.5 + ;; "Unrecognized header fields are treated as entity-header fields." + ;; + ;; RFC 2616 7.1 + ;; "Unrecognized header fields SHOULD be ignored by the recipient and MUST be + ;; forwarded by transparent proxies." + (let ((headers (test-read-headers "unknown: foo, bar\r\nunknown: blah\r\n\r\n"))) + (test "Unknown headers are not parsed and put into lists" + '("foo, bar" "blah") (header-values 'unknown headers)) + (test "Unknown headers get raw instead of a parameter list" + 'raw (header-params 'unknown headers)))) + (test-group "miscellaneous header stuff" + (parameterize ((header-parsers `((foo . ,(multiple identity)) + (bar . ,(lambda x (error "bad header"))))) + (http-header-limit 2)) + (test-error "Missing header contents" (test-read-headers "foo\r\n\r\n")) + (test-error "Bad header w/ handler" (test-read-headers "bar: x\r\n\r\n")) + (parameterize ((header-parse-error-handler (lambda (n c h exn) h))) + (test "Bad header w/o handler" #t (headers? (test-read-headers "bar: x\r\n\r\n")))) + ;; RFC 2616 2.2 + ;; "The backslash character ("\") MAY be used as a single-character + ;; quoting mechanism only within quoted-string and comment constructs." + ;; quoted-pair = "\" CHAR + ;; CHAR implies any char, *including* CR/LF. This is clarified by RFC 822, + ;; on which RFC 2616 is based. + ;; Apparently, even \CRLF is allowed (as opposed to \CR\LF) + (test "Embedded newlines" + '("bar\r\nqux") + ;; It's unclear whether we should interpret the "\r\n" as EOL + ;; in "\\\r\n", or whether it should be seen as an embedded \r + ;; followed by a \n (which is then interpreted as a literal \n?) + (header-values 'foo (test-read-headers "Foo: \"bar\\\r\\\nqux\""))) + (test-error "Too many headers is an error" + (test-read-headers "foo: bar\r\nfoo: qux\r\nfoo: hoohoo\r\n"))))) + +(test-group "specialized header parsers" + (test-group "host/port" + (test "Hostname and port" + '(("foo.example.com" . 8080)) + (header-values 'host (test-read-headers "Host: foo.example.com:8080"))) + (test "Hostname, no port" + '(("foo.example.com" . #f)) + (header-values 'host (test-read-headers "Host: foo.example.com")))) + (test-group "quality parameter" + (let* ((headers (test-read-headers "Accept: text/plain; Q=0.5, text/html, text/plain; q=0.123456, application/pdf; q=1.2345, text/xml; q=-0.234, text/whatever; q=")) + (accept (header-contents 'accept headers))) + ;; RFC 2616 3.6: "All transfer-coding values are case insensitive". + ;; This includes the parameter name (attribute) and value. + (test "quality value (case-insensitive)" + 0.5 (get-param 'q (first accept) 1.0)) + (test "quality encoding value" + 'text/plain (get-value (first accept))) + (test "quality values have only three digits" + 0.123 (get-param 'q (third accept) 1.0)) + (test "quality values maximum is 1.0" + 1.0 (get-param 'q (fourth accept) 1.0)) + (test "quality values minimum is 0.0" + 0.0 (get-param 'q (fifth accept) 1.0)) + (test "missing quality value ok" + 1.0 (get-param 'q (sixth accept) 1.0)))) + (test-group "charset parameter" + (let* ((headers (test-read-headers "Content-Type: text/PLAIN; charset=ISO-8859-1")) + (content-type (header-contents 'content-type headers))) + (test "content-type value is lowercase symbol" + 'text/plain (get-value (car content-type))) + ;; RFC 2616 3.4: "HTTP character sets are identified by + ;; case-insensitive tokens. The complete set of tokens is defined + ;; by the IANA Character Set registry." + (test "content-type charset is lowercase symbol" + 'iso-8859-1 (get-param 'charset (car content-type))))) + + (test-group "symbol-parser-ci" + (let* ((headers (test-read-headers "Accept-Ranges: FoO"))) + (test "Case-insensitive" + '(foo) (header-values 'accept-ranges headers)))) + + (test-group "symbol-parser" + (let* ((headers (test-read-headers "Allow: FoO, foo"))) + (test "Case-sensitive" + '(FoO foo) (header-values 'allow headers)))) + + (test-group "natnum-subparser" + (parameterize ((single-headers '(foo bar qux mooh)) + (header-parsers `((foo . ,(single natnum-subparser)) + (bar . ,(single natnum-subparser)) + (qux . ,(single natnum-subparser)) + (mooh . ,(single natnum-subparser))))) + (let ((headers (test-read-headers "Foo: 10\r\nBar: abc\r\nQux: -10\r\nMooh: 1.6"))) + (test "Simple test" + 10 (header-value 'foo headers)) + (test "No number defaults to 0" + 0 (header-value 'bar headers)) + (test "No negative numbers" + 0 (header-value 'qux headers)) + ;; This is a "feature" in the interest of the robustness principle + (test "Rounding of real numbers" + 2 (header-value 'mooh headers))))) + + (test-group "cache-control-parser" + (let ((headers (test-read-headers "Cache-control: max-age=10, private"))) + (test "max-age is a number" + '(max-age . 10) (assq 'max-age (header-values 'cache-control headers))) + (test "private without value" + '(private . #t) (assq 'private (header-values 'cache-control headers)))) + (let ((headers (test-read-headers "Cache-control: private=\"accept-encoding, accept-ranges\"\r\nCache-control: must-revalidate"))) + (test "private with values" + '(private . (accept-encoding accept-ranges)) + (assq 'private (header-values 'cache-control headers))) + (test "Acts like a multi-header" + '(must-revalidate . #t) (assq 'must-revalidate (header-values 'cache-control headers))))) + + (test-group "authorization-parser" + (test-group "basic auth" + (let ((headers (test-read-headers "Authorization: Basic QWxpIEJhYmE6b3BlbiBzZXNhbWU=\r\n"))) + (test "basic" + 'basic + (header-value 'authorization headers)) + (test "username" + "Ali Baba" + (header-param 'username 'authorization headers)) + (test "password" + "open sesame" + (header-param 'password 'authorization headers)))) + (test-group "digest auth" + (let ((headers (test-read-headers "Authorization: Digest username=\"Mufasa\", realm=\"testrealm@host.com\", nonce=\"dcd98b7102dd2f0e8b11d0f600bfb0c093\", uri=\"/dir/index.html\", qop=auth, nc=00000001, cnonce=\"0a4f113b\", response=\"6629fae49393a05397450978507c4ef1\", opaque=\"5ccc069c403ebaf9f0171e9517f40e41\", algorithm=MD5"))) + (test "digest" + 'digest + (header-value 'authorization headers)) + (test "realm" + "testrealm@host.com" + (header-param 'realm 'authorization headers)) + (test "nonce" + "dcd98b7102dd2f0e8b11d0f600bfb0c093" + (header-param 'nonce 'authorization headers)) + (test "username" + "Mufasa" + (header-param 'username 'authorization headers)) + (test "qop" + 'auth + (header-param 'qop 'authorization headers)) + (test "digest uri" + "/dir/index.html" + (uri->string (header-param 'uri 'authorization headers))) + (test "nonce count" + 1 + (header-param 'nc 'authorization headers)) + (test "cnonce" + "0a4f113b" + (header-param 'cnonce 'authorization headers)) + (test "response" + "6629fae49393a05397450978507c4ef1" + (header-param 'response 'authorization headers)) + (test "opaque" + "5ccc069c403ebaf9f0171e9517f40e41" + (header-param 'opaque 'authorization headers)) + (test "algorithm" + 'md5 + (header-param 'algorithm 'authorization headers)))) + (test-group "custom authorization scheme" + (parameterize ((authorization-param-subparsers + `((custom . ,(lambda (contents pos) + (receive (c p) + (parse-token contents pos) + (values `((contents . ,(http-name->symbol c))) p)))) + . ,(authorization-param-subparsers)))) + (let ((headers (test-read-headers "Authorization: Custom Security-through-obscurity"))) + (test "Custom" + 'custom + (header-value 'authorization headers)) + (test "Custom contents" + 'security-through-obscurity + (header-param 'contents 'authorization headers)))))) + + (test-group "authenticate parser" + (test-group "basic auth" + (let ((headers (test-read-headers "WWW-Authenticate: Basic realm=\"WallyWorld\""))) + (test "basic" + 'basic + (header-value 'www-authenticate headers)) + (test "realm" + "WallyWorld" + (header-param 'realm 'www-authenticate headers)))) + (test-group "digest auth" + (let ((headers (test-read-headers "WWW-Authenticate: Digest realm=\"testrealm@host.com\", qop=\"auth, auth-int\", nonce=\"dcd98b7102dd2f0e8b11d0f600bfb0c093\", opaque=\"5ccc069c403ebaf9f0171e9517f40e41\""))) + (test "digest" + 'digest + (header-value 'www-authenticate headers)) + (test "realm" + "testrealm@host.com" + (header-param 'realm 'www-authenticate headers)) + (test "qop" + '(auth auth-int) + (header-param 'qop 'www-authenticate headers)) + (test "nonce" + "dcd98b7102dd2f0e8b11d0f600bfb0c093" + (header-param 'nonce 'www-authenticate headers)) + (test "opaque" + "5ccc069c403ebaf9f0171e9517f40e41" + (header-param 'opaque 'www-authenticate headers)) + (test "missing stale value" + #f + (header-param 'stale 'www-authenticate headers))) + (let ((headers (test-read-headers "WWW-Authenticate: Digest domain=\"/example http://foo.com/bar\", stale=TRUE"))) + (test "domains" + '("/example" "http://foo.com/bar") + (map uri->string + (header-param 'domain 'www-authenticate headers))) + (test "stale" + #t + (header-param 'stale 'www-authenticate headers))) + (let ((headers (test-read-headers "WWW-Authenticate: Digest stale=whatever"))) + (test "non-true stale value" + #f + (header-param 'stale 'www-authenticate headers))))) + + (test-group "pragma-parser" + (let ((headers (test-read-headers "Pragma: custom-value=10, no-cache"))) + (test "value" + '(custom-value . "10") + (assq 'custom-value (header-values 'pragma headers))) + (test "no value" + '(no-cache . #t) (assq 'no-cache (header-values 'pragma headers)))) + (let ((headers (test-read-headers "Cache-control: private=\"accept-encoding, accept-ranges\"\r\nCache-control: must-revalidate"))) + (test "private with values" + '(private . (accept-encoding accept-ranges)) + (assq 'private (header-values 'cache-control headers))) + (test "Acts like a multi-header" + '(must-revalidate . #t) (assq 'must-revalidate (header-values 'cache-control headers))))) + + ;; RFC 2616, 14.15 & RFC 1864 (Base64) + (test-group "base64-parser" + (let ((headers (test-read-headers "Content-md5: Q2hlY2sgSW50ZWdyaXR5IQ=="))) + (test "md5 is base64-decoded" + "Check Integrity!" + (header-value 'content-md5 headers)))) + + (test-group "range-parser" + (let ((headers (test-read-headers "content-range: bytes 500-999/1234"))) + (test "Simple range" + '(500 999 1234) + (header-value 'content-range headers)))) + + (test-group "content-disposition" + (let ((headers (test-read-headers "Content-Disposition: attachment; filename=dir/foo.jpg"))) + (test "Attachment with filename parameter containing directory" + `(attachment (filename . "foo.jpg")) + (cons (header-value 'content-disposition headers) + (header-params 'content-disposition headers)))) + (let ((headers (test-read-headers "Content-Disposition: inline; filename=foo.jpg; creation-date=Sun, 06 Nov 1994 08:49:37 GMT"))) + (test "Inline with filename and (not quoted) creation-date parameter" + `(inline + (filename . "foo.jpg") + (creation-date . ,(utc-time->seconds '#(37 49 08 06 10 94 0 310 #f 0)))) + (cons (header-value 'content-disposition headers) + (map (lambda (x) + (if (vector? (cdr x)) + (cons (car x) (utc-time->seconds (cdr x))) + x)) + (header-params 'content-disposition headers))))) + (let ((headers (test-read-headers "Content-Disposition: inline; read-date=\"Sun, 06 Nov 1994 08:49:37 GMT\"; size=100"))) + (test "Inline with size and (quoted) read-date parameter" + `(inline + (read-date . ,(utc-time->seconds '#(37 49 08 06 10 94 0 310 #f 0))) + (size . 100)) + (cons (header-value 'content-disposition headers) + (map (lambda (x) + (if (vector? (cdr x)) + (cons (car x) (utc-time->seconds (cdr x))) + x)) + (header-params 'content-disposition headers)))))) + + (test-group "normalized-uri" + (let ((headers (test-read-headers "Location: http://example.com/foo"))) + (test "Uri" + (uri-reference "http://example.com/foo") + (header-value 'location headers))) + (let ((headers (test-read-headers "Location: http://example.com/foo/../bar"))) + (test "Auto-normalization" + (uri-reference "http://example.com/bar") + (header-value 'location headers)))) + + (test-group "etag-parser" + (let ((headers (test-read-headers "Etag: \"foo\""))) + (test "Strong tag" + '(strong . "foo") + (header-value 'etag headers))) + (let ((headers (test-read-headers "Etag: W/\"bar\""))) + (test "Weak tag" + '(weak . "bar") + (header-value 'etag headers))) + (let ((headers (test-read-headers "Etag: \"\""))) + (test "Empty tag" + '(strong . "") + (header-value 'etag headers))) + (let ((headers (test-read-headers "Etag: \"W/bar\""))) + (test "Strong tag, containing W/ prefix" + '(strong . "W/bar") + (header-value 'etag headers)))) + + (test-group "if-match parser" + (let ((headers (test-read-headers "If-match: foo"))) + (test "Strong etag" + '(strong . "foo") + (header-value 'if-match headers))) + (let ((headers (test-read-headers "If-match: W/foo"))) + (test "Weak etag" + '(weak . "foo") + (header-value 'if-match headers))) + (let ((headers (test-read-headers "If-match: W/foo bar"))) + (test "Multiple etags" + '((weak . "foo") (strong . "bar")) + (header-values 'if-match headers))) + (let ((headers (test-read-headers "If-match: *"))) + (test "Wildcard" + '* + (header-value 'if-match headers)))) + + (test-group "http-date-parser" + (let ((headers (test-read-headers "Date: Sun, 06 Nov 1994 08:49:37 GMT"))) + (test "RFC1123 time" + (utc-time->seconds '#(37 49 08 06 10 94 0 310 #f 0)) + (utc-time->seconds (header-value 'date headers)))) + (let ((headers (test-read-headers "Date: Sunday, 06-Nov-94 08:49:37 GMT"))) + (test "RFC850 time" + (utc-time->seconds '#(37 49 08 06 10 94 0 310 #f 0)) + (utc-time->seconds (header-value 'date headers)))) + (let ((headers (test-read-headers "Date: Sun Nov 6 08:49:37 1994"))) + (test "asctime time" + (utc-time->seconds '#(37 49 08 06 10 94 0 310 #f 0)) + (utc-time->seconds (header-value 'date headers))))) + + ;; This seems a little excessive.. Maybe find a way to reduce the number + ;; of cases and still have a good representative test? + (test-group "If-Range parser" + (let ((headers (test-read-headers "If-Range: Sun, 06 Nov 1994 08:49:37 GMT"))) + (test "RFC1123 time" + (utc-time->seconds '#(37 49 08 06 10 94 0 310 #f 0)) + (utc-time->seconds (header-value 'if-range headers)))) + (let ((headers (test-read-headers "If-Range: Sunday, 06-Nov-94 08:49:37 GMT"))) + (test "RFC850 time" + (utc-time->seconds '#(37 49 08 06 10 94 0 310 #f 0)) + (utc-time->seconds (header-value 'if-range headers)))) + (let ((headers (test-read-headers "If-Range: Sun Nov 6 08:49:37 1994"))) + (test "asctime time" + (utc-time->seconds '#(37 49 08 06 10 94 0 310 #f 0)) + (utc-time->seconds (header-value 'if-range headers)))) + (let ((headers (test-read-headers "If-Range: \"foo\""))) + (test "Strong Etag" + '(strong . "foo") + (header-value 'if-range headers))) + (let ((headers (test-read-headers "If-Range: W/\"bar\""))) + (test "Weak Etag" + '(weak . "bar") + (header-value 'if-range headers))) + (let ((headers (test-read-headers "If-Range: \"\""))) + (test "Empty Etag" + '(strong . "") + (header-value 'if-range headers))) + (let ((headers (test-read-headers "If-Range: \"W/bar\""))) + (test "Strong Etag, containing W/ prefix" + '(strong . "W/bar") + (header-value 'if-range headers))) ) + + (test-group "via parser" + (let ((headers (test-read-headers "Via: 1.1"))) + (test "simple" + '("1.1") + (header-values 'via headers))) + (let ((headers (test-read-headers "Via: 1.1 foo:80 (comment)"))) + (test "complex" + '("1.1 foo:80 (comment)") + (header-values 'via headers))) + (let ((headers (test-read-headers "Via: 1.1 foo"))) + (test "one hop" + '("1.1 foo") + (header-values 'via headers))) + (let ((headers (test-read-headers "Via: 1.1 foo, 1.0 bar"))) + (test "two hops" + '("1.1 foo" "1.0 bar") + (header-values 'via headers)))) + + (test-group "product parser" + (test "Simple product" + '("websocket" . #f) + (header-value 'upgrade (test-read-headers "Upgrade: websocket\r\n"))) + (test "Product with version" + '("TLS" . "1.0") + (header-value 'upgrade (test-read-headers "Upgrade: TLS/1.0\r\n")))) + + (test-group "software parser" + (test "Simple product" + '(("Mozilla" "5.0" #f)) + (header-value 'user-agent (test-read-headers "User-Agent: Mozilla/5.0\r\n"))) + (test "Product with comment" + '(("Mozilla" #f "foo")) + (header-value 'user-agent (test-read-headers "User-Agent: Mozilla (foo)\r\n"))) + (test "Realistic product (comments, semicolons)" + '(("Mozilla" "5.0" "X11; U; NetBSD amd64; en-US; rv:1.9.0.3") ("Gecko" "2008110501" #f) ("Minefield" "3.0.3" #f)) + (header-value 'user-agent (test-read-headers "User-Agent: Mozilla/5.0 (X11; U; NetBSD amd64; en-US; rv:1.9.0.3) Gecko/2008110501 Minefield/3.0.3\r\n"))) + ;; Reported by Peter Danenberg; Google Drive returns this header + (test "Realistic product (quoted comment)" + '(("UploadServer" #f "Built on May 4 2015 17:31:43 (1430785903)")) + (header-value 'server (test-read-headers "Server: UploadServer (\"Built on May 4 2015 17:31:43 (1430785903)\")\r\n")))) + + (test-group "Set-Cookie parser" + (let* ((headers (test-read-headers "Set-Cookie: foo=\"bar\""))) + (test "Simple name/value pair" + '("foo" . "bar") + (get-value (first (header-contents 'set-cookie headers))))) + (let* ((headers (test-read-headers "Set-Cookie: foo=qux\r\nSet-Cookie: Foo=\"bar\""))) + ;; XXX: Should intarweb remove these, or should the user code handle this? + ;; What if interacting with actual broken code on the other side? + (test "Multiple cookies with same name (CI) are all kept" + '(("foo" . "qux") ("Foo" . "bar")) + (map get-value (header-contents 'set-cookie headers)))) + (let* ((headers (test-read-headers "Set-Cookie: Foo=bar"))) + (test "Cookie names preserve case" + '("Foo" . "bar") + (get-value (first (header-contents 'set-cookie headers))))) + (let ((headers (test-read-headers "Set-Cookie: foo=bar=qux; max-age=10"))) + (test "Cookie with = signs" + '("foo" . "bar=qux") + (get-value (first (header-contents 'set-cookie headers))))) + (let* ((headers (test-read-headers "Set-Cookie: foo=bar; Comment=\"Hi, there!\", qux=mooh\r\nSet-Cookie: mumble=mutter\r\n"))) + (test "Comment" + "Hi, there!" + (get-param 'comment + (first (header-contents 'set-cookie headers)))) + (test "Multiple cookies in one header" + '("qux" . "mooh") + (get-value (second (header-contents 'set-cookie headers)))) + (test "Multiple cookies in multiple headers" + '("mumble" . "mutter") + (get-value (third (header-contents 'set-cookie headers)))) + (test "Missing \"secure\" value" + #f + (get-param 'secure + (third (header-contents 'set-cookie headers))))) + (let* ((headers (test-read-headers "Set-Cookie: foo=; expires=Sunday, 20-Jul-08 15:23:42 GMT; secure; path = / ; Port=80,8080"))) + (test "Missing value" + '("foo" . "") + (get-value (first (header-contents 'set-cookie headers)))) + (test "Old-style cookie expires value" + (utc-time->seconds '#(42 23 15 20 6 108 0 309 #f 0)) + (utc-time->seconds + (get-param 'expires + (first (header-contents 'set-cookie headers))))) + (test "Secure value" + #t + (get-param 'secure + (first (header-contents 'set-cookie headers)))) + (test "Path" + (uri-reference "/") + (get-param 'path + (first (header-contents 'set-cookie headers)))) + (test "Port numbers" + '(80 8080) + (get-param 'port + (first (header-contents 'set-cookie headers))))) + (let* ((headers (test-read-headers "Set-Cookie: foo=; expires=Sun, 20 Jul 2008 15:23:42 GMT; secure; path = / "))) + (test "Noncompliant syntax cookie expiry value (rfc1123)" + (utc-time->seconds '#(42 23 15 20 6 108 0 309 #f 0)) + (utc-time->seconds + (get-param 'expires + (first (header-contents 'set-cookie headers)))))) + (let* ((headers (test-read-headers "Set-Cookie: foo=; expires=Sun, 20-Jul-2008 15:23:42 GMT; secure; path = / "))) + (test "Noncompliant syntax cookie expiry value (rfc850-like, abbrev day)" + (utc-time->seconds '#(42 23 15 20 6 108 0 309 #f 0)) + (utc-time->seconds + (get-param 'expires + (first (header-contents 'set-cookie headers))))))) + + (test-group "cookie-parser" + (let* ((headers (test-read-headers "Cookie: Foo=bar; $Path=/; qux=mooh; $unknown=something"))) + (test "Multiple cookies in the same header" + '(("Foo" . "bar") . ("qux" . "mooh")) + (cons + (get-value (first (header-contents 'cookie headers))) + (get-value (second (header-contents 'cookie headers))))) + (test "Parameters of cookies (spaces stripped)" + (uri-reference "/") + (get-param 'path (first (header-contents 'cookie headers)))) + (test "Parameters of cookies" + "something" + (get-param 'unknown (second (header-contents 'cookie headers))))) + (let* ((headers (test-read-headers "Cookie: $Version=\"1\"; Foo=bar; $Path=/; qux=mooh; $unknown=something"))) + (test "Version string is used for all cookies" + (cons 1 1) + (cons + (get-param 'version (first (header-contents 'cookie headers))) + (get-param 'version (second (header-contents 'cookie headers))))))) + + (test-group "strict-transport-security-parser" + (let ((headers (test-read-headers "Strict-Transport-Security: max-age=10; includeSubDomains"))) + (test "max-age is a number" + '(max-age . 10) + (assq 'max-age (header-value 'strict-transport-security headers))) + (test "includeSubDomains without value" + '(includesubdomains . #t) + (assq 'includesubdomains (header-value 'strict-transport-security headers))))) + + (test-group "headers" + (test "Simple test" + `(bar qux) + (header-values 'foo (headers `((foo bar qux))))) + (test "Multi headers are folded" + `(bar qux) + (header-values 'foo (headers `((foo bar) + (foo qux))))) + (test "Single headers are unique" + `(qux) + (header-values 'foo (parameterize ((single-headers '(foo))) + (headers `((foo bar) + (foo qux)))))) + (test "Extra single headers are ignored" + `(qux) + (header-values 'foo (parameterize ((single-headers '(foo))) + (headers `((foo bar qux)))))) + (test "Parameters" + `((bar . qux)) + (get-params + (car (header-contents 'foo (headers `((foo #(mooh ((bar . qux)))))))))) + (test "Multi headers are folded into old headers" + `(bar qux) + (header-values 'foo (headers `((foo qux)) + (headers `((foo bar)))))))) + +(define (test-unparse-headers h) + (call-with-output-string + (lambda (o) + (unparse-headers (headers h) o)))) + +(test-group "unparsers" + (test-group "default unparser" + (test "String" + "Foo: bar\r\n" + (test-unparse-headers `((foo "bar")))) + (test "Multiple strings" + "Foo: bar, qux\r\n" + (test-unparse-headers `((foo "bar" "qux")))) + (test "Auto-quoting on commas and whitespace" + "Foo: \"bar, qux\", \"mooh blah\"\r\n" + (test-unparse-headers `((foo "bar, qux" "mooh blah")))) + ;; RFC 2616 2.2 + (test "Escaping quotes" + "Foo: \"bar \\\" qux\", mooh\r\n" + (test-unparse-headers `((foo "bar \" qux" "mooh")))) + (test "Escaping control characters" + "Foo: \"bar\\\r\\\x01qux\"\r\n" + (test-unparse-headers `((foo "bar\r\x01qux")))) + ;; Unfortunately, there are no or very few HTTP implementations + ;; which understand that newlines can be escaped with a backslash + ;; in a quoted string. That's why we don't allow it. + ;; The user is expected to escape the newlines according to the type + ;; of header (URLencoding, removing the newlines from cookies, etc) + (test-error* "Embedded newlines throw an error" + (exn http unencoded-header) + (test-unparse-headers `((foo "bar\n\x01qux")))) + (test "Alist" + "Foo: Bar=qux, Mooh=mumble\r\n" + (test-unparse-headers `((foo (bar . qux) (mooh . mumble))))) + (test "Alist with escapes" + "Foo: Bar=qux, Mooh=\"mum, ble\"\r\n" + (test-unparse-headers `((foo (bar . "qux") (mooh . "mum, ble"))))) + (test "URI" + "Foo: http://foo.com/bar;xyz?a=b\r\n" + (test-unparse-headers `((foo ,(uri-reference "http://foo.com/bar;xyz?a=b"))))) + (test "Parameters" + "Foo: bar; qux=mooh; mumble=mutter; blah\r\n" + (test-unparse-headers `((foo #(bar ((qux . mooh) + (mumble . mutter) + (blah . #t) + (feh . #f))))))) + (test "Raw headers are unparsed as-is" + "Foo: bla bla; whatever \"ohai\"\r\n" + (test-unparse-headers `((foo #("bla bla; whatever \"ohai\"" raw))))) + (test "Raw headers are unparsed as-is for known headers, too" + "Etag: \"hi there\r\n" ;; unclosed quote is intentional here + (test-unparse-headers `((etag #("\"hi there" raw))))) + (test-error* "Embedded newlines in raw headers also throw an error" + (exn http unencoded-header) + (test-unparse-headers `((foo #("bar\n\x01qux" raw)))))) + (test-group "etag unparser" + (test "Weak tag" + "Etag: W/\"blah\"\r\n" + (test-unparse-headers `((etag (weak . "blah"))))) + (test "Strong tag" + "Etag: \"blah\"\r\n" + (test-unparse-headers `((etag (strong . "blah"))))) + (test "Strong tag starting with W/" + "Etag: \"W/blah\"\r\n" + (test-unparse-headers `((etag (strong . "W/blah")))))) + (test-group "if-match unparser" + (test "List of etags" + "If-Match: \"foo\", \"bar\", W/\"qux\"\r\n" + (test-unparse-headers + `((if-match (strong . "foo") (strong . "bar") (weak . "qux"))))) + (test "Wildcard" + "If-Match: *\r\n" + (test-unparse-headers + `((if-match (strong . "foo") * (weak . "qux")))))) + ;; http-dates are all deserialized as rfc1123 + (test-group "date/time unparser" + (test "RFC1123 time" + "If-Modified-Since: Sun, 06 Nov 1994 08:49:37 GMT\r\n" + ;; Having to specify a vector here twice is sucky and counter-intuitive + (test-unparse-headers + `((if-modified-since #(#(37 49 08 06 10 94 0 310 #f 0) ())))))) + (test-group "host/port unparser" + (test "No port specified" + "Host: foo.example.com\r\n" + (test-unparse-headers `((host ("foo.example.com" . #f))))) + (test "Different port" + "Host: foo.example.com:8080\r\n" + (test-unparse-headers `((host ("foo.example.com" . 8080)))))) + (test-group "product unparser" + (test "Products without version" + "Upgrade: websocket, foo\r\n" + (test-unparse-headers `((upgrade ("websocket" . #f) ("foo" . #f))))) + (test "Products with version" + "Upgrade: TLS/1.0, bar/2\r\n" + (test-unparse-headers `((upgrade ("TLS" . "1.0") ("bar" . "2")))))) + (test-group "software unparser" + (test "Product with comments" + "User-Agent: Mozilla (X11) Gecko/2008110501\r\n" + (test-unparse-headers `((user-agent (("Mozilla" #f "X11") ("Gecko" "2008110501" #f)))))) + (test "Realistic product" + "User-Agent: Mozilla/5.0 (X11; U; NetBSD amd64; en-US; rv:1.9.0.3) Gecko/2008110501 Minefield/3.0.3\r\n" + (test-unparse-headers `((user-agent (("Mozilla" "5.0" "X11; U; NetBSD amd64; en-US; rv:1.9.0.3") ("Gecko" "2008110501" #f) ("Minefield" "3.0.3" #f))))))) + (test-group "cookie unparser" + (test "Basic cookie" + "Cookie: foo=bar; $Path=/; Qux=mooh; $Unknown=something\r\n" + (test-unparse-headers `((cookie #(("foo" . "bar") + ((path . ,(uri-reference "/")))) + #(("Qux" . "mooh") + ((unknown . "something"))))))) + (test "Port list" + "Cookie: Foo=bar; $Port=80,8080\r\n" + (test-unparse-headers `((cookie #(("Foo" . "bar") + ((port . (80 8080)))))))) + (test "#t or #f values" + "Cookie: Foo=bar; $Port\r\n" + (test-unparse-headers `((cookie #(("Foo" . "bar") + ((port . #t) (domain . #f)))))))) + (test-group "Set-Cookie unparser" + (test "Simple name/value pair" + "Set-Cookie: foo=\"bar with space\"\r\n" + (test-unparse-headers `((set-cookie ("foo" . "bar with space"))))) + ;; XXX: Should intarweb remove these, or should the user code handle this? + ;; What if interacting with actual broken code on the other side? + (test "Multiple cookies with same name (CI) are all written" + "Set-Cookie: foo=qux\r\nSet-Cookie: Foo=bar\r\n" + (test-unparse-headers `((set-cookie ("foo" . "qux") ("Foo" . "bar"))))) + (test "Cookie names preserve case" + "Set-Cookie: Foo=bar\r\n" + (test-unparse-headers `((set-cookie ("Foo" . "bar"))))) + (test "Cookie with = signs" + "Set-Cookie: foo=\"bar=qux\"; Max-Age=10\r\n" + (test-unparse-headers `((set-cookie #(("foo" . "bar=qux") ((max-age . 10))))))) + (test "Comment" + "Set-Cookie: foo=bar; Comment=\"Hi, there!\"\r\n" + (test-unparse-headers `((set-cookie #(("foo" . "bar") + ((comment . "Hi, there!"))))))) + (test "Old-style cookie expires value" + "Set-Cookie: foo=; Expires=Sunday, 20-Jul-08 15:23:42 GMT\r\n" + (test-unparse-headers `((set-cookie #(("foo" . "") + ((expires . #(42 23 15 20 6 108 0 309 #f 0)))))))) + (test "Secure (true)" + "Set-Cookie: foo=bar; Secure\r\n" + (test-unparse-headers `((set-cookie #(("foo" . "bar") + ((secure . #t))))))) + (test "Secure (false)" + "Set-Cookie: foo=bar\r\n" + (test-unparse-headers `((set-cookie #(("foo" . "bar") + ((secure . #f))))))) + + (test "Path" + "Set-Cookie: foo=bar; Path=/blah\r\n" + (test-unparse-headers `((set-cookie #(("foo" . "bar") + ((path . ,(uri-reference "/blah")) + (secure . #f)))))))) + (test-group "authorization unparser" + (test "Basic auth" + "Authorization: Basic QWxpIEJhYmE6b3BlbiBzZXNhbWU=\r\n" + (test-unparse-headers + `((authorization #(basic + ((username . "Ali Baba") + (password . "open sesame"))))))) + (test-error* "Basic auth with colon in username" + (exn http username-with-colon) + (test-unparse-headers + `((authorization #(basic + ((username . "foo:bar") + (password . "qux"))))))) + (test "Digest auth" + "Authorization: Digest username=\"Mufasa\", realm=\"testrealm@host.com\", nonce=\"dcd98b7102dd2f0e8b11d0f600bfb0c093\", uri=\"/dir/index.html\", qop=\"auth\", cnonce=\"0a4f113b\", response=\"6629fae49393a05397450978507c4ef1\", opaque=\"5ccc069c403ebaf9f0171e9517f40e41\", nc=00000001, algorithm=\"md5\"\r\n" + (test-unparse-headers + `((authorization #(digest + ((username . "Mufasa") + (realm . "testrealm@host.com") + (nonce . "dcd98b7102dd2f0e8b11d0f600bfb0c093") + (uri . ,(uri-reference "/dir/index.html")) + (qop . auth) + (cnonce . "0a4f113b") + (response . "6629fae49393a05397450978507c4ef1") + (opaque . "5ccc069c403ebaf9f0171e9517f40e41") + (nc . 1) + (algorithm . md5))))))) + (test "Custom auth" + "Authorization: Custom some-random-contents\r\n" + (parameterize ((authorization-param-subunparsers + `((custom . ,(lambda (params) + (alist-ref 'contents params))) + . ,(authorization-param-subparsers)))) + (test-unparse-headers + `((authorization #(custom ((contents . some-random-contents))))))))) + + (test-group "authenticate unparser" + (test-group "basic auth" + (test "basic" + "Www-Authenticate: Basic realm=\"WallyWorld\"\r\n" + (test-unparse-headers + `((www-authenticate #(basic + ((realm . "WallyWorld")))))))) + (test-group "digest auth" + (test "digest" + "Www-Authenticate: Digest realm=\"testrealm@host.com\", qop=\"auth,auth-int\", nonce=\"dcd98b7102dd2f0e8b11d0f600bfb0c093\", opaque=\"5ccc069c403ebaf9f0171e9517f40e41\"\r\n" + (test-unparse-headers + `((www-authenticate #(digest + ((realm . "testrealm@host.com") + (qop . (auth auth-int)) + (nonce . "dcd98b7102dd2f0e8b11d0f600bfb0c093") + (opaque . "5ccc069c403ebaf9f0171e9517f40e41"))))))) + (test "domains" + "Www-Authenticate: Digest domain=\"/example http://foo.com/bar\"\r\n" + (test-unparse-headers + `((www-authenticate #(digest + ((domain . (,(uri-reference "/example") + ,(uri-reference "http://foo.com/bar"))))))))) + (test "stale" + "Www-Authenticate: Digest realm=\"foo\", stale=TRUE\r\n" + (test-unparse-headers + `((www-authenticate #(digest + ((realm . "foo") + (stale . #t))))))) + (test "stale present but false" + "Www-Authenticate: Digest realm=\"foo\"\r\n" + (test-unparse-headers + `((www-authenticate #(digest + ((realm . "foo") + (stale . #f))))))))) + (test-group "content-disposition unparser" + (test "Attributes are always fully quoted and filenames stripped" + "Content-Disposition: form-data; name=\"foo\"; filename=\"a b c\"\r\n" + (test-unparse-headers `((content-disposition + #(form-data ((name . foo) + (filename . "blabla/a b c"))))))) + (test "Size and dates are recognised correctly" + "Content-Disposition: inline; size=20; creation-date=\"Sun, 06 Nov 1994 08:49:37 GMT\"\r\n" + (test-unparse-headers `((content-disposition + #(inline ((size . 20) + (creation-date . #(37 49 08 06 10 94 0 310 #f 0))))))))) + + (test-group "strict-transport-security unparser" + (test "Silly capitalization is honored, even if unneccessary" + "Strict-Transport-Security: max-age=10; includeSubDomains\r\n" + (test-unparse-headers `((strict-transport-security + ((max-age . 10) + (includesubdomains . #t)))))))) + +(define (test-read-request str) + (call-with-input-string str read-request)) + +(test-group "reading of requests" + (parameterize ((request-parsers `(,(lambda (line in) + (and (string=? line "foo") 'foo)) + ,(lambda (line in) + (and (string=? line "bar") 'bar))))) + (test-error* (exn http unknown-protocol-line) (test-read-request "qux")) + (test #f (test-read-request "")) + (test 'foo (test-read-request "foo")) + (test 'bar (test-read-request "bar"))) + ;; Even though we officially "should" support HTTP/0.9, we disable it + ;; by default because there are security implications of just outputting + ;; responses for random resources that might be under attacker control. + (test-group "HTTP/0.9" + (test-error* "By default, HTTP/0.9 is disabled" + (exn http unknown-protocol-line) + (test-read-request "GET /path/../to/stuff?arg1=val1&arg2=val2\r\n")) + (parameterize ((request-parsers (list http-1.x-request-parser http-0.9-request-parser))) + (let ((req (test-read-request "GET /path/../to/stuff?arg1=val1&arg2=val2\r\n"))) + (test 0 (request-major req)) + (test 9 (request-minor req)) + (test 'GET (request-method req)) + ;; Path-normalized URI (dots removed) + (test (uri-reference "/to/stuff?arg1=val1&arg2=val2") (request-uri req)) + (test (headers '()) (request-headers req))) + ;; RFC 1945 5.0 does not mention case-sensitivity for the method in HTTP/0.9. + ;; It only mentions it in the context of HTTP/1.x (section 5.1.1). + ;; We obey the BNF syntax rule in 2.1: + ;; "literal" - Quotation marks surround literal text. + ;; Unless stated otherwise, the text is case-insensitive. + ;; Section 4.1 defines: + ;; Simple-Request = "GET" SP Request-URI CRLF + (test "Method is case-insensitive" 'GET (request-method (test-read-request "geT /path\r\n"))) + (test-error "0.9 only knows GET" (test-read-request "PUT /path")))) + (test-group "HTTP/1.0" + (test-error "Asterisk is not allowed for HTTP/1.0" + (request-uri (test-read-request "OPTIONS * HTTP/1.0\r\n"))) + (let ((req (test-read-request "GET /path/to/stuff?arg1=val1&arg2=val2 HTTP/1.0\r\n\r\n"))) + (test 1 (request-major req)) + (test 0 (request-minor req)) + (test 'GET (request-method req)) + (test (uri-reference "/path/to/stuff?arg1=val1&arg2=val2") (request-uri req)) + (test (headers '()) (request-headers req))) + (test 'PUT (request-method (test-read-request "PUT /path HTTP/1.0\r\n"))) + (let ((req (test-read-request "POST / HTTP/1.0\r\nTransfer-Encoding: chunked\r\n\r\n3\r\nfoo\r\na\r\n1234567890\r\n"))) + (test "Chunking ignored" + "3\r\nfoo\r\na\r\n1234567890\r\n" + (read-string #f (request-port req))))) + (test-group "HTTP/1.1" ; No need to test all things we test for 1.0 + (test "Asterisk is treated specially and returns #f uri" + #f (request-uri (test-read-request "OPTIONS * HTTP/1.1\r\n"))) + (let ((req (test-read-request "GET /path/to/stuff?arg1=val1&arg2=val2 HTTP/1.1\r\n\r\n"))) + (test 1 (request-major req)) + (test 1 (request-minor req))) + (test 'PUT (request-method (test-read-request "PUT /path HTTP/1.1\r\n\r\n"))) + ; RFC 2616 5.1.1 + (test "Method is case-sensitive" 'geT (request-method (test-read-request "geT /path HTTP/1.1\r\n\r\n"))) + ; RFC 2616 3.1 + case-insensitivity BNF rule + (test "Protocol is case-insensitive" '1 (request-minor (test-read-request "GET /path htTP/1.1\r\n\r\n"))) + ;; TODO: Test chunking + (test-error "Request line limit exceeded gives error" + (parameterize ((http-line-limit 5)) + (test-read-request "GET /path HTTP/1.1\r\n\r\n"))) + (test "Reading request body" + '((abc . "def") (ghi . "jkl")) + (let ((req (test-read-request + "GET / HTTP/1.1\r\nContent-Length: 15\r\n\r\nabc=def;ghi=jkl"))) + (read-urlencoded-request-data req))) + (test "Reading request body with bigger limit" + '((abc . "def")) + (let ((req (test-read-request + "GET / HTTP/1.1\r\nContent-Length: 7\r\n\r\nabc=def"))) + ;; Test for 8, since 7 would error + (parameterize ((http-urlencoded-request-data-limit 8)) + (read-urlencoded-request-data req)))) + (test-error "Request body limit exceeded gives error" + (let ((req (test-read-request + "GET / HTTP/1.1\r\nContent-Length: 7\r\n\r\nabc=def"))) + ;; This errors when the limit is hit, not when it is exceeded + (parameterize ((http-urlencoded-request-data-limit 7)) + (read-urlencoded-request-data req)))) + (let ((req (test-read-request "POST / HTTP/1.1\r\nTransfer-Encoding: chunked\r\n\r\n3\r\nfoo\r\na\r\n1234567890\r\n0\r\n\r\n"))) + (test "Chunking" + "foo1234567890" + (read-string #f (request-port req))))) + (test-group "Invalid protocols" + (test-error "Total cruft is unrecognised" + (test-read-request "whatever\r\n")) + (test-error "Invalid URI also causes protocol not to be recognised" + (test-read-request "GET //path HTTP/1.0\r\n")))) + +(define (test-write-request req . outputs) + (call-with-output-string + (lambda (out) + (request-port-set! req out) + (let ((r (write-request req))) + (for-each (lambda (output) + (display output (request-port r))) + outputs) + (finish-request-body r))))) + +(test-group "writing of requests" + ;; This can also be called Simple-Request as per RFC 1945 4.1 + ;; RFC 2616 19.6 also states we should recognise 0.9 requests, but if + ;; we understand those we should also be able to generate them because + ;; a 0.9 server does not understand 1.x requests. + (test-group "HTTP/0.9" + (let ((req (make-request major: 0 minor: 9 method: 'GET + uri: (uri-reference "/foo/bar.html")))) + (test-error* "By default, HTTP/0.9 is disabled" + (exn http unknown-protocol) + (test-write-request req)) + (parameterize ((request-unparsers (list http-1.x-request-unparser + http-1.0-request-unparser + http-0.9-request-unparser))) + (test "Always empty headers" + "GET /foo/bar.html\r\n" + (test-write-request (update-request req + headers: + (headers `((foo bar)))))) + (test "Always GET" + "GET /foo/bar.html\r\n" + (test-write-request (update-request req method: 'POST)))))) + (test-group "HTTP/1.0" + (let ((req (make-request major: 1 minor: 0 + method: 'GET + uri: (uri-reference "/foo/bar.html")))) + (test "Headers" + "GET /foo/bar.html HTTP/1.0\r\nFoo: bar\r\n\r\ntest" + (test-write-request + (update-request req + headers: (headers `((foo bar)))) + "test")) + (test "Chunking ignored" + "GET /foo/bar.html HTTP/1.0\r\nTransfer-Encoding: chunked\r\n\r\nfoobar" + (test-write-request + (update-request req + headers: (headers `((transfer-encoding chunked)))) + "foo" "" "bar")))) + (test-group "HTTP/1.1" + (let ((req (make-request major: 1 minor: 1 + method: 'GET + uri: (uri-reference "/foo/bar.html")))) + (test "Headers" + "GET /foo/bar.html HTTP/1.1\r\nFoo: bar\r\n\r\ntest" + (test-write-request + (update-request req + headers: (headers `((foo bar)))) + "test")) + (test "Chunking" + "GET /foo/bar.html HTTP/1.1\r\nTransfer-Encoding: chunked\r\n\r\n3\r\nfoo\r\na\r\n1234567890\r\n0\r\n\r\n" + (test-write-request + (update-request req + headers: (headers `((transfer-encoding chunked)))) + "foo" "" "1234567890")) + (test "OPTIONS-type asterisk if no URI" + "OPTIONS * HTTP/1.1\r\n\r\n" + (test-write-request + (update-request req method: 'OPTIONS uri: #f)))))) + +(define (test-read-response input-string) + (call-with-input-string input-string read-response)) + +(test-group "reading of responses" + (test-group "HTTP/1.1" + (let ((res (test-read-response "HTTP/1.1 303 See other\r\nFoo: bar\r\n\r\nContents"))) + (test "Version detection" + '(1 . 1) + (cons (response-major res) (response-minor res))) + (test "Status" + '(see-other 303 "See other") + (list (response-status res) (response-code res) (response-reason res))) + (test "Headers" + '("bar") + (header-values 'foo (response-headers res))) + (test "Contents" + "Contents" + (read-string #f (response-port res)))) + (test-error* (exn http unknown-protocol-line) (test-read-response "qux")) + (test #f (test-read-request "")) + (test-error "Response line limit exceeded gives error" + (parameterize ((http-line-limit 5)) + (test-read-response "HTTP/1.1 200 OK\r\n\r\n"))) + (let ((res (test-read-response "HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n3\r\nfoo\r\na\r\n1234567890\r\n0\r\n\r\n"))) + (test "Chunking" + "foo1234567890" + (read-string #f (response-port res)))) + ;; Reported by "sz0ka" via IRC + (let ((res (test-read-response "HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n5\r\nfoo\r\n\r\n0\r\n\r\n"))) + (test "First read of chunked port returns first line" + "foo" + (read-line (response-port res))) + (test "Peek-char returns EOF" + #!eof + (peek-char (response-port res))) + (test "Read-char also returns EOF" + #!eof + (read-char (response-port res))))) + (test-group "HTTP/1.0" + (let ((res (test-read-response "HTTP/1.0 303 See other\r\nFoo: bar\r\n\r\nContents"))) + (test "Version detection" + '(1 . 0) + (cons (response-major res) (response-minor res))) + (test "Status" + '(303 . "See other") + (cons (response-code res) (response-reason res))) + (test "Headers" + '("bar") + (header-values 'foo (response-headers res))) + (test "Contents" + "Contents" + (read-string #f (response-port res)))) + (let ((res (test-read-response "HTTP/1.0 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n3\r\nfoo\r\na\r\n1234567890\r\n"))) + (test "Chunking ignored" + "3\r\nfoo\r\na\r\n1234567890\r\n" + (read-string #f (response-port res))))) + (test-group "HTTP/0.9" + (test-error* "By default, HTTP/0.9 is disabled" + (exn http unknown-protocol-line) + (test-read-response "Doesn't matter what's here\r\nLine 2")) + (parameterize ((response-parsers (list http-1.x-response-parser + http-1.0-response-parser + http-0.9-response-parser))) + (let ((res (test-read-response "Doesn't matter what's here\r\nLine 2"))) + (test "Always OK status" + '(200 . "OK") + (cons (response-code res) (response-reason res))) + (test "Version detection; fallback to 0.9" + '(0 . 9) + (cons (response-major res) (response-minor res))) + (test "No headers" + (headers '()) (response-headers res)) + (test "Contents" + "Doesn't matter what's here\r\nLine 2" + (read-string #f (response-port res))))))) + +(define (test-write-response res . outputs) + (call-with-output-string + (lambda (out) + (response-port-set! res out) + (let ((r (write-response res))) + (for-each (lambda (output) + (display output (response-port r))) + outputs) + (finish-response-body r))))) + +(test-group "writing of responses" + (test-group "HTTP/0.9" + (let ((res (make-response major: 0 minor: 9 + code: 200 reason: "OK"))) + (test-error* "By default, HTTP/0.9 is disabled" + (exn http unknown-protocol) + (test-write-response res "These are the contents\r\n")) + (parameterize ((response-unparsers (list http-1.x-response-unparser + http-1.0-response-unparser + http-0.9-response-unparser))) + (test "Headers ignored" + "These are the contents\r\n" + (test-write-response + (update-response res headers: (headers `((foo bar)))) + "These are the contents\r\n"))))) + (test-group "HTTP/1.0" + (let ((res (make-response major: 1 minor: 0 + code: 200 reason: "OK"))) + (test "Headers used" + "HTTP/1.0 200 OK\r\nFoo: bar\r\n\r\nThese are the contents\r\n" + (test-write-response + (update-response res headers: (headers `((foo bar)))) + "These are the contents\r\n")) + (test "Status code" + "HTTP/1.0 303 See other\r\n\r\nThese are the contents\r\n" + (test-write-response + (update-response res code: 303 reason: "See other") + "These are the contents\r\n")) + (test "Chunking ignored" + "HTTP/1.0 200 OK\r\nTransfer-Encoding: chunked\r\n\r\nfoo1234567890" + (test-write-response + (update-response + res + headers: (headers `((transfer-encoding chunked)))) + "foo" "1234567890")))) + (test-group "HTTP/1.1" + (let ((res (make-response major: 1 minor: 1 + code: 200 reason: "OK"))) + (test "Headers used" + "HTTP/1.1 200 OK\r\nFoo: bar\r\n\r\nThese are the contents\r\n" + (test-write-response + (update-response res headers: (headers `((foo bar)))) + "These are the contents\r\n")) + (test "Status code" + "HTTP/1.1 303 See other\r\n\r\nThese are the contents\r\n" + (test-write-response + (update-response res code: 303 reason: "See other") + "These are the contents\r\n")) + (test "Chunking" + "HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n3\r\nfoo\r\na\r\n1234567890\r\n0\r\n\r\n" + (test-write-response + (update-response + res + headers: (headers `((transfer-encoding chunked)))) + "foo" "1234567890")))) + (test-group "status" + (let ((res (make-response major: 1 minor: 1))) + (test "reason and code are looked up by symbol properly" + "HTTP/1.1 409 Conflict\r\n\r\ntest" + (test-write-response (update-response res status: 'conflict) + "test")) + (test-error "an error is raised for unknown status codes" + (update-response res status: 'unknown)) + (test "any status can be used when code and reason are given directly" + "HTTP/1.1 999 No Way\r\n\r\ntest" + (test-write-response + (update-response res code: 999 reason: "No Way") + "test")) + (test "defaults can be parameterized" + "HTTP/1.1 999 Say What\r\n\r\ntest" + (parameterize ((http-status-codes + (alist-cons 'say-what (cons 999 "Say What") + (http-status-codes)))) + (test-write-response (update-response res status: 'say-what) + "test")))))) + +(test-group "etag comparison procedures" + (test-group "weak comparison" + (test-assert "Strong etag does not match list not containing it" + (not (etag-matches-weakly? + '(strong . "xyz") `((strong . "blabla"))))) + (test-assert "Weak etag does not match list not containing it" + (not (etag-matches-weakly? + '(weak . "xyz") `((weak . "blabla"))))) + (test-assert "Weak etag matches list containing it" + (etag-matches-weakly? + '(weak . "xyz") `((strong . "blabla") (weak . "xyz")))) + (test-assert "Strong etag matches list containing it" + (etag-matches-weakly? + '(strong . "xyz") `((strong . "blabla") (strong . "xyz")))) + (test-assert "Weak etag does not match list containing same tag but strong" + (not (etag-matches-weakly? + '(weak . "xyz") `((strong . "blabla") (strong . "xyz"))))) + (test-assert "Strong etag does not match list containing same tag but weak" + (not (etag-matches-weakly? + '(strong . "xyz") `((strong . "blabla") (weak . "xyz"))))) + (test-assert "Weak etag matches list containing wildcard" + (etag-matches-weakly? + '(weak . "xyz") `((strong . "blabla") *))) + (test-assert "Strong etag matches list containing wildcard" + (etag-matches-weakly? + '(strong . "xyz") `((strong . "blabla") *)))) + (test-group "strong comparison" + (test-assert "Strong etag does not match list not containing it" + (not (etag-matches? + '(strong . "xyz") `((strong . "blabla"))))) + (test-assert "Weak etag does not match list not containing it" + (not (etag-matches? + '(weak . "xyz") `((weak . "blabla"))))) + (test-assert "Weak etag does *not* match list containing it" + (not (etag-matches? + '(weak . "xyz") `((strong . "blabla") (weak . "xyz"))))) + (test-assert "Strong etag matches list containing it" + (etag-matches? + '(strong . "xyz") `((strong . "blabla") (strong . "xyz")))) + (test-assert "Weak etag does not match list containing same tag but strong" + (not (etag-matches? + '(weak . "xyz") `((strong . "blabla") (strong . "xyz"))))) + (test-assert "Strong etag does not match list containing same tag but weak" + (not (etag-matches? + '(strong . "xyz") `((strong . "blabla") (weak . "xyz"))))) + (test-assert "Weak etag matches list containing wildcard" + (etag-matches? + '(weak . "xyz") `((strong . "blabla") *))) + (test-assert "Strong etag matches list containing wildcard" + (etag-matches? + '(strong . "xyz") `((strong . "blabla") *))))) + + +;; We don't expose chunked-output-port/chunked-input-port. Maybe we should? +;; To work around this, prepend some stuff and parse some headers +(define (chunked-inport string) + (let ((res (test-read-response + (string-append + "HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n" + string)))) + (response-port res))) + +(test-group "Chunked ports" + (let ((s "5\r\nab\ncd\r\n2\r\n\nx\r\n0\r\nDO NOT WANT")) + (test "read-lines" '("ab" "cd" "x") (read-lines (chunked-inport s))) + (let ((p (chunked-inport s))) + (test "read-char" #\a (read-char p)) + (test "peek-char" #\b (peek-char p)) + (test "partial read" "b\n" (read-string 2 p)) + (test "short read" "c" (read-string 1 p)) + (test "read across chunk boundaries" "d\nx" (read-string 3 p)) + (test "read at eof" #!eof (read-string 1 p))) + (test "read beyond chunked port size" + "ab\ncd\nx" (read-string 10 (chunked-inport s))))) + +(test-end) + +(unless (zero? (test-failure-count)) (exit 1)) + +;; TODO: +;; - Fix the parsing system so it's not so broken (more comfortable combinators) +;; - Test malformed headers +;; - Add parsing capability for quoted-pairs inside tokens and comments +;; - Rethink the auto-chunking stuff. Maybe this should be done at a higher level |