From 90a1f7d47525cfffe928e9a89becf622bd85a8a1 Mon Sep 17 00:00:00 2001 From: Peter Bex Date: Fri, 22 Jun 2018 22:22:24 +0200 Subject: Initial CHICKEN 5 port of intarweb 1.7 --- intarweb.scm | 1055 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1055 insertions(+) create mode 100644 intarweb.scm (limited to 'intarweb.scm') diff --git a/intarweb.scm b/intarweb.scm new file mode 100644 index 0000000..3df4690 --- /dev/null +++ b/intarweb.scm @@ -0,0 +1,1055 @@ +;;; +;;; Intarweb is an improved HTTP library for Chicken +;;; +;; Copyright (c) 2008-2018, Peter Bex +;; All rights reserved. +;; +;; Redistribution and use in source and binary forms, with or without +;; modification, are permitted provided that the following conditions +;; are met: +;; +;; 1. Redistributions of source code must retain the above copyright +;; notice, this list of conditions and the following disclaimer. +;; 2. Redistributions in binary form must reproduce the above copyright +;; notice, this list of conditions and the following disclaimer in the +;; documentation and/or other materials provided with the distribution. +;; 3. Neither the name of the author nor the names of its +;; contributors may be used to endorse or promote products derived +;; from this software without specific prior written permission. +;; +;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +;; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +;; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +;; FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +;; COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +;; INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +;; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +;; HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +;; STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +;; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +;; OF THE POSSIBILITY OF SUCH DAMAGE. + +;; TODO: Support RFC5987? Seems awfully messy though (need to pull in iconv?) +;; We could use http://www.greenbytes.de/tech/tc2231/ in the testsuite. +;; Look at that URI's toplevel directory for more HTTP/URI-related testcases! + +(module intarweb + (http-line-limit http-header-limit http-urlencoded-request-data-limit + replace-header-contents replace-header-contents! remove-header remove-header! + update-header-contents update-header-contents! headers single-headers + headers? headers->list http-name->symbol symbol->http-name + header-parsers header-unparsers unparse-header unparse-headers read-headers + safe-methods safe? idempotent-methods idempotent? keep-alive? response-class + etag=? etag=-weakly? etag-matches? etag-matches-weakly? + + make-request request? request-major request-major-set! + request-minor request-minor-set! + request-method request-method-set! request-uri request-uri-set! + request-headers request-headers-set! request-port request-port-set! + update-request set-request! request-has-message-body? + + request-parsers read-request request-unparsers write-request + finish-request-body http-0.9-request-parser http-1.x-request-parser + http-0.9-request-unparser http-1.0-request-unparser http-1.x-request-unparser + header-parse-error-handler + read-urlencoded-request-data + + make-response response? response-major response-major-set! + response-minor response-minor-set! + response-code response-code-set! response-reason response-reason-set! + response-status response-status-set! response-headers response-headers-set! + response-port response-port-set! update-response set-response! + response-has-message-body-for-request? + + write-response response-parsers response-unparsers read-response + finish-response-body http-0.9-response-parser http-0.9-response-unparser + http-1.0-response-parser http-1.0-response-unparser + http-1.x-response-parser http-1.x-response-unparser + http-status-codes http-status->code&reason + + ;; http-header-parsers + header-contents header-values header-value header-params header-param + get-value get-params get-param + + split-multi-header parse-token parse-comment + parse-params parse-value+params unparse-params + multiple single make-key/value-subparser + + rfc1123-string->time rfc850-string->time asctime-string->time + http-date-string->time + rfc1123-subparser rfc850-subparser asctime-subparser http-date-subparser + product-subparser quality-subparser unknown-header-parser + filename-subparser symbol-subparser symbol-subparser-ci natnum-subparser + host/port-subparser base64-subparser range-subparser filename-subparser + etag-parser software-parser mailbox-subparser + if-range-parser retry-after-subparser via-parser warning-parser + key/value-subparser set-cookie-parser cache-control-parser pragma-parser + te-parser cookie-parser strict-transport-security-parser + + must-be-quoted-chars quote-string unparse-token + default-header-unparser etag-unparser host/port-unparser + product-unparser software-unparser rfc1123-unparser cookie-unparser + strict-transport-security-unparser + + ;; Subparsers/subunparsers + authorization-param-subparsers + basic-auth-param-subparser digest-auth-param-subparser + + authorization-param-subunparsers + basic-auth-param-subunparser digest-auth-param-subunparser + ) + +(import scheme (chicken base) (chicken foreign) (chicken irregex) + (chicken format) (chicken io) (chicken string) + (chicken time posix) (chicken pathname) (chicken fixnum) + (chicken condition) (chicken port) (chicken syntax) + srfi-1 srfi-13 srfi-14 base64 uri-common defstruct) + +;; The below can all be #f if you want no limit (not recommended!) +(define http-line-limit (make-parameter 4096)) +(define http-header-limit (make-parameter 64)) +(define http-urlencoded-request-data-limit (make-parameter (* 4 1024 1024))) + +(define (read-urlencoded-request-data + request #!optional (max-length (http-urlencoded-request-data-limit))) + (let* ((p (request-port request)) + (len (header-value 'content-length (request-headers request))) + ;; For simplicity's sake, we don't allow exactly the max request limit + (limit (if (and len max-length) + (min len max-length) + (or max-length len))) + (data (read-string limit (request-port request)))) + (if (and (not (eof-object? data)) max-length (= max-length (string-length data))) + (signal-http-condition + 'read-urlencoded-request-data + "Max allowed URLencoded request size exceeded" + (list request max-length) + 'urlencoded-request-data-limit-exceeded + 'contents data 'limit limit) + (form-urldecode data)))) + +(define (raise-line-limit-exceeded-error line limit port) + (let ((safe-line-prefix + (if (< limit 128) + (sprintf "~A[..and more (was limited to ~A)..]" line limit) + (sprintf "~A[..~A+ more chars (was limited to ~A)..]" + (substring line 0 128) (- limit 128) limit)))) + (signal-http-condition + 'safe-read-line + "Max allowed line length exceeded" + (list port safe-line-prefix) + 'line-limit-exceeded 'contents line 'limit limit))) + +(define (safe-read-line p) + (let* ((limit (http-line-limit)) + (line (read-line p (http-line-limit)))) + (if (and (not (eof-object? line)) limit (= limit (string-length line))) + (raise-line-limit-exceeded-error line limit p) + line))) + +;; Make headers a new type, to force the use of the HEADERS procedure +;; and ensure only proper header values are passed to all procedures +;; that deal with headers. +(define-record headers v) + +(define-record-printer (headers h out) + (fprintf out "#(headers: ~S)" (headers-v h))) + +(define headers->list headers-v) + +(define (remove-header! name headers) + (let loop ((h (headers-v headers))) + (cond + ((null? h) headers) + ((eq? name (caar h)) + (set-cdr! h (cdr h)) + headers) + (else (loop (cdr h)))))) + +(define (remove-header name headers) + (make-headers + (let loop ((h (headers-v headers))) + (cond + ((null? h) h) + ((eq? name (caar h)) (loop (cdr h))) + (else (cons (car h) (loop (cdr h)))))))) + +;; Check that the header values are valid vectors, and that if there +;; is a raw value, there is only one value at all. +(define (check-header-values loc name contents) + (let lp ((mode 'unknown) (todo contents)) + (let ((head (car todo))) + (if (not (and (vector? head) (= 2 (vector-length head)))) + (signal-http-condition + loc "header values must be vectors of length 2" + (list name contents) 'header-value) + (let ((type (if (eq? (get-params head) 'raw) 'raw 'cooked))) + (unless (or (eq? mode 'unknown) (eq? mode type)) + (signal-http-condition + loc "When using raw headers, all values must be raw" + (list name contents) 'header-value) + (lp type (cdr todo)))))))) + +;; XXX: Do we need these replace procedures in the exports list? It +;; looks like we can use update everywhere. +(define (replace-header-contents! name contents headers) + (check-header-values 'replace-header-contents! name contents) + (let loop ((h (headers-v headers))) + (cond + ((null? h) + (headers-v-set! + headers (cons (cons name contents) (headers-v headers))) + headers) + ((eq? name (caar h)) + (set-cdr! (car h) contents) + headers) + (else (loop (cdr h)))))) + +(define (replace-header-contents name contents headers) + (check-header-values 'replace-header-contents! name contents) + (make-headers + (let loop ((h (headers-v headers))) + (cond + ((null? h) (cons (cons name contents) h)) + ((eq? name (caar h)) + (cons (cons (caar h) contents) (cdr h))) + (else (cons (car h) (loop (cdr h)))))))) + +(define (make-updater replacer) + (lambda (name contents headers) + (let ((old (header-contents name headers '()))) + (replacer name + (if (member name (single-headers)) + (list (last contents)) + (append old contents)) + headers)))) + +(define update-header-contents (make-updater replace-header-contents)) +(define update-header-contents! (make-updater replace-header-contents!)) + +(define http-name->symbol (compose string->symbol string-downcase!)) +(define symbol->http-name (compose string-titlecase symbol->string)) + +;; Make a header set from a literal expression by folding in the headers +;; with any previous ones +(define (headers headers-to-be #!optional (old-headers (make-headers '()))) + (fold (lambda (h new-headers) + (update-header-contents + (car h) + (map (lambda (v) + (if (vector? v) v (vector v '()))) ; normalize to vector + (cdr h)) + new-headers)) + old-headers + headers-to-be)) + +(define (normalized-uri str) + (and-let* ((uri (uri-reference str))) + (uri-normalize-path-segments uri))) + +(include "header-parsers") ; Also includes header unparsers + +;; Any unknown headers are considered to be multi-headers, always +(define single-headers + (make-parameter '(accept-ranges age authorization content-disposition + content-length content-location content-md5 content-type + date etag expect expires host if-modified-since + if-unmodified-since last-modified location max-forwards + proxy-authorization range referer retry-after server + transfer-encoding user-agent www-authenticate))) + +(define string->http-method string->symbol) +(define http-method->string symbol->string) + +;; Make an output port automatically "chunked" +(define (chunked-output-port port) + (let ((chunked-port + (make-output-port (lambda (s) ; write + (let ((len (string-length s))) + (unless (zero? len) + (fprintf port "~X\r\n~A\r\n" len s)))) + (lambda () ; close + (close-output-port port)) + (lambda () ; flush + (flush-output port))))) + ;; first "reserved" slot + ;; Slot 7 should probably stay 'custom + (##sys#setslot chunked-port 10 'chunked-output-port) + ;; second "reserved" slot + (##sys#setslot chunked-port 11 port) + chunked-port)) + +;; Make an input port automatically "chunked" +(define (chunked-input-port port) + (let* ((chunk-length 0) + (position 0) + (check-position (lambda () + (when (and position (>= position chunk-length)) + (unless (eq? chunk-length 0) + (safe-read-line port)) ; Read \r\n data trailer + (let ((line (safe-read-line port))) + (if (eof-object? line) + (set! position #f) + (begin + (set! chunk-length (string->number line 16)) + (cond + ((not chunk-length) (set! position #f)) + ((zero? chunk-length) ; Read final data trailer + (safe-read-line port) + (set! position #f)) + (else (set! position 0)))))))))) + (make-input-port (lambda () ; read-char + (check-position) + (if position + (let ((char (read-char port))) + (unless (eof-object? char) + (set! position (add1 position))) + char) + #!eof)) + (lambda () ; ready? + (check-position) + (or (not position) (char-ready? port))) + (lambda () ; close + (close-input-port port)) + (lambda () ; peek-char + (check-position) + (if position + (peek-char port) + #!eof)) + (lambda (p bytes buf off) ; read-string! + (let lp ((todo bytes) + (total-bytes-read 0) + (off off)) + (check-position) + (if (or (not position) (= todo 0)) + total-bytes-read + (let* ((n (min todo (- chunk-length position))) + (bytes-read (read-string! n buf port off))) + (set! position (+ position bytes-read)) + (lp (- todo bytes-read) + (+ total-bytes-read bytes-read) + (+ off bytes-read))))))))) +;; TODO: Note that in the above, read-line is not currently +;; implemented. It is *extremely* tricky to correctly maintain the +;; port position when all \r *AND/OR* \n characters get chopped off +;; the line-string. It can be done by maintaining our own extra +;; buffer, but that complicates all the procedures here enormously, +;; including read-line itself. + +;; RFC2616, Section 4.3: "The presence of a message-body in a request +;; is signaled by the inclusion of a Content-Length or Transfer-Encoding +;; header field in the request's message-headers." +;; We don't check the method since "a server SHOULD read and forward the +;; a message-body on any request", even it shouldn't be sent for that method. +;; +;; Because HTTP/1.0 has no official definition of when a message body +;; is present, we'll assume it's always present, unless there is no +;; content-length and we have a keep-alive connection. +(define request-has-message-body? + (make-parameter + (lambda (req) + (let ((headers (request-headers req))) + (if (and (= 1 (request-major req)) (= 0 (request-minor req))) + (not (eq? 'keep-alive (header-contents 'connection headers))) + (or (header-contents 'content-length headers) + (header-contents 'transfer-encoding headers))))))) + +;; RFC2616, Section 4.3: "For response messages, whether or not a +;; message-body is included with a message is dependent on both the +;; request method and the response status code (section 6.1.1)." +(define response-has-message-body-for-request? + (make-parameter + (lambda (resp req) + (not (or (= (response-class resp) 100) + (memv (response-code resp) '(204 304)) + (eq? 'HEAD (request-method req))))))) + +;; OPTIONS and TRACE are not explicitly mentioned in section 9.1.1, +;; but section 9.1.2 says they SHOULD NOT have side-effects by +;; definition, which means they are safe, as well. +(define safe-methods + (make-parameter '(GET HEAD OPTIONS TRACE))) + +;; RFC2616, Section 9.1.1 +(define (safe? obj) + (let ((method (if (request? obj) (request-method obj) obj))) + (not (not (member method (safe-methods)))))) + +(define idempotent-methods + (make-parameter '(GET HEAD PUT DELETE OPTIONS TRACE))) + +;; RFC2616, Section 9.1.2 +(define (idempotent? obj) + (let ((method (if (request? obj) (request-method obj) obj))) + (not (not (member method (idempotent-methods)))))) + +(define (keep-alive? obj) + (let ((major (if (request? obj) (request-major obj) (response-major obj))) + (minor (if (request? obj) (request-minor obj) (response-minor obj))) + (con (header-value 'connection (if (request? obj) + (request-headers obj) + (response-headers obj))))) + (if (and (= major 1) (> minor 0)) + (not (eq? con 'close)) + ;; RFC 2068, section 19.7.1 (see also RFC 2616, section 19.6.2) + (eq? con 'keep-alive)))) + +(define (etag=? a b) + (and (not (eq? 'weak (car a))) + (eq? (car a) (car b)) + (string=? (cdr a) (cdr b)))) + +(define (etag=-weakly? a b) + (and (eq? (car a) (car b)) + (string=? (cdr a) (cdr b)))) + +(define (etag-matches? etag matchlist) + (any (lambda (m) (or (eq? m '*) (etag=? etag m))) matchlist)) + +(define (etag-matches-weakly? etag matchlist) + (any (lambda (m) (or (eq? m '*) (etag=-weakly? etag m))) matchlist)) + +;;;;;;;;;;;;;;;;;;;;;;;;; +;;;; Request parsing ;;;; +;;;;;;;;;;;;;;;;;;;;;;;;; + +;; This includes parsers for all RFC-defined headers +(define header-parsers + (make-parameter + `((accept . ,(multiple symbol-subparser-ci + `((q . ,quality-subparser)))) + (accept-charset . ,(multiple symbol-subparser-ci + `((q . ,quality-subparser)))) + (accept-encoding . ,(multiple symbol-subparser-ci + `((q . ,quality-subparser)))) + (accept-language . ,(multiple symbol-subparser-ci + `((q . ,quality-subparser)))) + (accept-ranges . ,(single symbol-subparser-ci)) + (age . ,(single natnum-subparser)) + (allow . ,(multiple symbol-subparser)) + (authorization . ,authorization-parser) + (cache-control . ,cache-control-parser) + (connection . ,(multiple symbol-subparser-ci)) + (content-encoding . ,(multiple symbol-subparser-ci)) + (content-language . ,(multiple symbol-subparser-ci)) + (content-length . ,(single natnum-subparser)) + (content-location . ,(single normalized-uri)) + (content-md5 . ,(single base64-subparser)) + (content-range . ,(single range-subparser)) + (content-type . ,(single symbol-subparser-ci + `((charset . ,symbol-subparser-ci)))) + (date . ,(single http-date-subparser)) + (etag . ,etag-parser) + (expect . ,(single (make-key/value-subparser '()))) + (expires . ,(single http-date-subparser)) + (from . ,(multiple mailbox-subparser)) + (host . ,(single host/port-subparser)) + (if-match . ,if-match-parser) + (if-modified-since . ,(single http-date-subparser)) + (if-none-match . ,if-match-parser) + (if-range . ,if-range-parser) + (if-unmodified-since . ,(single http-date-subparser)) + (last-modified . ,(single http-date-subparser)) + (location . ,(single normalized-uri)) + (max-forwards . ,(single natnum-subparser)) + (pragma . ,pragma-parser) + (proxy-authenticate . ,authenticate-parser) + (proxy-authorization . ,authorization-parser) + (range . ,(multiple range-subparser)) + (referer . ,(single normalized-uri)) + (retry-after . ,(single retry-after-subparser)) + (server . ,software-parser) + (te . ,te-parser) + (trailer . ,(multiple symbol-subparser-ci)) + (transfer-encoding . ,(single symbol-subparser-ci)) + (upgrade . ,(multiple product-subparser)) + (user-agent . ,software-parser) + (vary . ,(multiple symbol-subparser-ci)) + (via . ,via-parser) + (warning . ,warning-parser) + (www-authenticate . ,authenticate-parser) + ;; RFC 2183 + (content-disposition . ,(single symbol-subparser-ci + `((filename . ,filename-subparser) + (creation-date . ,rfc1123-subparser) + (modification-date . ,rfc1123-subparser) + (read-date . ,rfc1123-subparser) + (size . ,natnum-subparser)))) + ;; RFC 2109 + (set-cookie . ,set-cookie-parser) + (cookie . ,cookie-parser) + ;; + ;; TODO: RFC 2965? + ;; + ;; RFC 6797 + (strict-transport-security . ,strict-transport-security-parser) + ;; Nonstandard but common headers + (x-forwarded-for . ,(multiple identity)) + ))) + +(define header-parse-error-handler ;; ignore errors + (make-parameter (lambda (header-name contents headers exn) headers))) + +;; The parser is supposed to return a list of header values for its header +(define (parse-header name contents) + (let* ((default unknown-header-parser) + (parser (alist-ref name (header-parsers) eq? default))) + (parser contents))) + +(define (parse-header-line line headers) + (or + (and-let* ((colon-idx (string-index line #\:)) + (header-name (http-name->symbol (string-take line colon-idx))) + (contents (string-trim-both (string-drop line (add1 colon-idx))))) + (handle-exceptions + exn + ((header-parse-error-handler) header-name contents headers exn) + (update-header-contents! + header-name (parse-header header-name contents) headers))) + (signal-http-condition + 'parse-header-line "Bad header line" (list line) + 'header-error 'contents line))) + +;; XXXX: Bottleneck? +(define (read-headers port) + (if (eof-object? (peek-char port)) ; Yeah, so sue me + (make-headers '()) + (let ((header-limit (http-header-limit)) + (line-limit (http-line-limit))) + (let lp ((c (read-char port)) + (ln '()) + (headers (make-headers '())) + (hc 0) + (len 0)) + (cond ((eqv? len line-limit) + (raise-line-limit-exceeded-error + (reverse-list->string ln) line-limit port)) + ((eof-object? c) + (if (null? ln) + headers + (parse-header-line (reverse-list->string ln) headers))) + ;; Only accept CRLF (we're not this strict everywhere...) + ((and (eqv? c #\return) (eqv? (peek-char port) #\newline)) + (read-char port) ; Consume and discard NL + (if (null? ln) ; Nothing came before: end of headers + headers + (let ((pc (peek-char port))) + (if (and (not (eof-object? pc)) + (or (eqv? pc #\space) (eqv? pc #\tab))) + ;; If the next line starts with whitespace, + ;; it's a continuation line of the same + ;; header. See section 2.2 of RFC 2616. + (let skip ((pc (read-char port)) (len len) (ln ln)) + (if (and (not (eqv? len line-limit)) + (or (eqv? pc #\space) (eqv? pc #\tab))) + (skip (read-char port) (add1 len) (cons pc ln)) + (lp pc ln headers hc len))) + (let* ((ln (reverse-list->string ln)) + (headers (parse-header-line ln headers)) + (hc (add1 hc))) + (when (eqv? hc header-limit) + (signal-http-condition + 'read-headers + "Max allowed header count exceeded" + (list port) + 'header-limit-exceeded + 'contents ln + 'headers headers + 'limit header-limit)) + (lp (read-char port) '() headers hc 0)))))) + ((eqv? c #\") + (let lp2 ((c2 (read-char port)) + (ln (cons c ln)) + (len len)) + (cond ((or (eqv? 0 len) (eof-object? c2)) + (lp c2 ln headers hc len)) + ((eqv? c2 #\") + (lp (read-char port) (cons c2 ln) + headers hc (add1 len))) + ((eqv? c2 #\\) + (let ((c3 (read-char port)) + (len len)) + (if (or (eof-object? c3) (eqv? 0 len)) + (lp c3 (cons c2 ln) headers hc len) + (lp2 (read-char port) + (cons c3 (cons c2 ln)) + (add1 len))))) + (else + (lp2 (read-char port) (cons c2 ln) (add1 len)))))) + (else + (lp (read-char port) (cons c ln) headers hc (add1 len)))))))) + +(define (signal-http-condition loc msg args type . more-info) + (signal (make-composite-condition + (make-property-condition 'http) + (apply make-property-condition type more-info) + (make-property-condition + 'exn 'location loc 'message msg 'arguments args)))) + +(defstruct request + (method 'GET) uri (major 1) (minor 1) (headers (make-headers '())) port) + +;; Perhaps we should have header parsers indexed by version or +;; something like that, so you can define the maximum version. Useful +;; for when expecting a response. Then we group request/response parsers +;; together, as with request/response unparsers. +(define http-0.9-request-parser + (let ((req (irregex '(seq (w/nocase "GET") (+ space) (=> uri (* any)))))) + (lambda (line in) + (and-let* ((m (irregex-match req line)) + (uri (normalized-uri (irregex-match-substring m 'uri)))) + (make-request method: 'GET uri: uri + major: 0 minor: 9 port: in))))) + +;; Might want to reuse this elsewhere +(define token-sre '(+ (~ "()<>@,;:\\\"/[]?={}\t "))) + +;; XXX This actually parses anything >= HTTP/1.0 +(define http-1.x-request-parser + (let ((req (irregex `(seq (=> method ,token-sre) (+ space) + (=> uri (+ (~ blank))) ; uri-common handles details + (+ space) (w/nocase "HTTP/") + (=> major (+ digit)) "." (=> minor (+ digit)))))) + (lambda (line in) + (and-let* ((m (irregex-match req line)) + (uri-string (irregex-match-substring m 'uri)) + (major (string->number (irregex-match-substring m 'major))) + (minor (string->number (irregex-match-substring m 'minor))) + (method (string->http-method (irregex-match-substring m 'method))) + (headers (read-headers in))) + (let* ((wildcard (string=? uri-string "*")) + (uri (and (not wildcard) (normalized-uri uri-string))) + ;; HTTP/1.0 has no chunking + (port (if (and (or (> major 1) (>= minor 1)) + (memq 'chunked + (header-values + 'transfer-encoding headers))) + (chunked-input-port in) + in))) + ;; HTTP/1.1 allows several "things" as "URI" (RFC2616, 5.1.2): + ;; Request-URI = "*" | absoluteURI | abs_path | authority + ;; + ;; HTTP/1.0, URIs are more limited (RFC1945, 5.1.2): + ;; Request-URI = absoluteURI | abs_path + ;; + ;; Currently, a plain authority is not accepted. This would + ;; require deep changes in the representation of request + ;; objects. It is only used in CONNECT requests, so + ;; currently not much of a problem. If we want to support + ;; this, we'd need a separate object type and expose a + ;; parser from uri-generic/uri-common for just authority. + (and (or (and wildcard (or (> major 1) (>= minor 1))) + (and uri (or (absolute-uri? uri) + (and (uri-path-absolute? uri) + (not (uri-host uri)))))) + (make-request method: method uri: uri + major: major minor: minor + headers: headers + port: port))))))) + +(define request-parsers ; order matters here + (make-parameter (list http-1.x-request-parser))) + +(define (read-request inport) + (let ((line (safe-read-line inport))) + (and (not (eof-object? line)) + ;; Try each parser in turn to process the request-line. + ;; A parser returns either #f or a request object + (let loop ((parsers (request-parsers))) + (if (null? parsers) + (signal-http-condition + 'read-request "Unknown protocol line" (list line) + 'unknown-protocol-line 'line line) + (or ((car parsers) line inport) (loop (cdr parsers)))))))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;; Request unparsing ;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define header-unparsers + (make-parameter + `((content-disposition . ,content-disposition-unparser) + (date . ,rfc1123-unparser) + (etag . ,etag-unparser) + (expires . ,rfc1123-unparser) + (host . ,host/port-unparser) + (if-match . ,if-match-unparser) + (if-modified-since . ,rfc1123-unparser) + (if-none-match . ,if-match-unparser) + (if-unmodified-since . ,rfc1123-unparser) + (last-modified . ,rfc1123-unparser) + (user-agent . ,software-unparser) + (server . ,software-unparser) + (upgrade . ,product-unparser) + (cookie . ,cookie-unparser) + (set-cookie . ,set-cookie-unparser) + (authorization . ,authorization-unparser) + (www-authenticate . ,authenticate-unparser) + (proxy-authorization . ,authorization-unparser) + (proxy-authenticate . ,authenticate-unparser) + (via . ,via-unparser) + ;; RFC 6797 + (strict-transport-security . ,strict-transport-security-unparser)))) + +(define (unparse-header header-name header-value) + (cond ((and (not (null? header-value)) + (eq? 'raw (get-params (car header-value)))) + (map get-no-newline-value header-value)) + ((assq header-name (header-unparsers)) + => (lambda (unparser) ((cdr unparser) header-value))) + (else (default-header-unparser header-value)))) + +(define (unparse-headers headers out) + (let ((unparsers (header-unparsers))) ; Don't access parameter for each header + (for-each + (lambda (h) + (let* ((name (car h)) + (name-s (symbol->http-name name)) + (contents (cdr h)) + (unparse (cond ((assq name unparsers) => cdr) ; inlined for perf + (else default-header-unparser)))) + (handle-exceptions exn + (if ((condition-predicate 'http) exn) + (signal exn) ;; Do not tamper with our own custom errors + (let* ((none "(no error message provided in original exn)") + (msg ((condition-property-accessor + 'exn 'message none) exn)) + (loc ((condition-property-accessor + 'exn 'location #f) exn)) + (args ((condition-property-accessor + 'exn 'arguments '()) exn))) + (signal-http-condition + 'unparse-headers + (sprintf "could not unparse ~S header ~S: ~A~A" + name-s contents (if loc (sprintf "(~A) " loc) "") msg) + args + 'unparse-error + 'header-name name + 'header-value contents + 'unparser unparse + 'original-exn exn))) + (let ((lines (if (and (not (null? contents)) + (eq? 'raw (get-params (car contents)))) + (map get-no-newline-value contents) + (unparse contents)))) + (for-each (lambda (value) + ;; Verify there's no \r\n or \r or \n in value? + (display (string-append name-s ": " value "\r\n") out)) + lines))))) + (headers-v headers)))) + +;; Use string-append and display rather than fprintf so the line gets +;; written in one burst. This supposedly avoids a strange race +;; condition, see #800. We use string-append instead of sprintf for +;; performance reasons. This is not exported, and our callers compare +;; request-major and request-minor so we can assume they're numbers. +(define (write-request-line request) + (let ((uri (request-uri request))) + (display (string-append + (http-method->string (request-method request)) + " " (if uri (uri->string uri) "*") " HTTP/" + (number->string (request-major request)) "." + (number->string (request-minor request)) "\r\n") + (request-port request)))) + +(define (http-0.9-request-unparser request) + (display (string-append "GET " (uri->string (request-uri request)) "\r\n") + (request-port request)) + request) + +(define (http-1.0-request-unparser request) + (and-let* (((= (request-major request) 1)) + ((= (request-minor request) 0)) + (o (request-port request))) + (write-request-line request) + (unparse-headers (request-headers request) o) + (display "\r\n" o) + request)) + +;; XXX This actually unparses anything >= HTTP/1.1 +(define (http-1.x-request-unparser request) + (and-let* (((or (> (request-major request) 1) + (and (= (request-major request) 1) + (> (request-minor request) 0)))) + (o (request-port request))) + (write-request-line request) + (unparse-headers (request-headers request) o) + (display "\r\n" o) + (if (memq 'chunked (header-values 'transfer-encoding + (request-headers request))) + (update-request request + port: (chunked-output-port (request-port request))) + request))) + +(define request-unparsers ; order matters here + (make-parameter (list http-1.x-request-unparser http-1.0-request-unparser))) + +(define (write-request request) + ;; Try each unparser in turn to write the request-line. + ;; An unparser returns either #f or a new request object. + (let loop ((unparsers (request-unparsers))) + (if (null? unparsers) + (let ((major (request-major request)) + (minor (request-minor request))) + (signal-http-condition + 'write-request + "Unknown protocol" (list (conc major "." minor)) + 'unknown-protocol 'major major 'minor minor)) + (or ((car unparsers) request) (loop (cdr unparsers)))))) + +;; Required for chunked requests. This is a bit of a hack! +(define (finish-request-body request) + (when (and (memq 'chunked (header-values 'transfer-encoding + (request-headers request))) + (eq? (##sys#slot (request-port request) 10) 'chunked-output-port)) + (display "0\r\n\r\n" (##sys#slot (request-port request) 11))) + request) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;; Response unparsing ;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(defstruct response + (code 200) (reason "OK") (major 1) (minor 1) (headers (make-headers '())) port) + +(define make-response + (let ((old-make-response make-response)) + (lambda (#!rest args #!key status code reason) + (let ((resp (apply old-make-response args))) + (when (and status (not code) (not reason)) + (response-status-set! resp status)) + resp)))) + +(define update-response + (let ((old-update-response update-response)) + (lambda (resp #!rest args #!key status code reason) + (let ((resp (apply old-update-response resp args))) + (when (and status (not code) (not reason)) + (response-status-set! resp status)) + resp)))) + +(define (response-status-set! resp status) + (receive (code reason) (http-status->code&reason status) + (response-code-set! resp code) + (response-reason-set! resp reason) + resp)) + +(define (response-class obj) + (let ((code (if (response? obj) (response-code obj) obj))) + (- code (modulo code 100)))) + +(define (response-status obj) + (let* ((c (if (response? obj) (response-code obj) obj)) + (s (find (lambda (x) (= (cadr x) c)) (http-status-codes)))) + (if s + (car s) + (signal-http-condition + 'response-status "Unknown status code" (list c) + 'unknown-code 'code c)))) + +(define (http-status->code&reason status) + (let ((s (alist-ref status (http-status-codes)))) + (unless s + (signal-http-condition + 'http-status->code&reason + ;; haha, status symbol ;) + "Unknown response status symbol" + (list status) 'unknown-status 'status status)) + (values (car s) (cdr s)))) + +;; List of HTTP status codes based on: +;; http://www.iana.org/assignments/http-status-codes/http-status-codes.xml +(define http-status-codes + (make-parameter + `((continue . (100 . "Continue")) + (switching-protocols . (101 . "Switching Protocols")) + (processing . (102 . "Processing")) + (ok . (200 . "OK")) + (created . (201 . "Created")) + (accepted . (202 . "Accepted")) + (non-authoritative-information . (203 . "Non-Authoritative Information")) + (no-content . (204 . "No Content")) + (reset-content . (205 . "Reset Content")) + (partial-content . (206 . "Partial Content")) + (multi-status . (207 . "Multi-Status")) + (already-reported . (208 . "Already Reported")) + (im-used . (226 . "IM Used")) + (multiple-choices . (300 . "Multiple Choices")) + (moved-permanently . (301 . "Moved Permanently")) + (found . (302 . "Found")) + (see-other . (303 . "See Other")) + (not-modified . (304 . "Not Modified")) + (use-proxy . (305 . "Use Proxy")) + (temporary-redirect . (307 . "Temporary Redirect")) + (bad-request . (400 . "Bad Request")) + (unauthorized . (401 . "Unauthorized")) + (payment-required . (402 . "Payment Required")) + (forbidden . (403 . "Forbidden")) + (not-found . (404 . "Not Found")) + (method-not-allowed . (405 . "Method Not Allowed")) + (not-acceptable . (406 . "Not Acceptable")) + (proxy-authentication-required . (407 . "Proxy Authentication Required")) + (request-time-out . (408 . "Request Time-out")) + (conflict . (409 . "Conflict")) + (gone . (410 . "Gone")) + (length-required . (411 . "Length Required")) + (precondition-failed . (412 . "Precondition Failed")) + (request-entity-too-large . (413 . "Request Entity Too Large")) + (request-uri-too-large . (414 . "Request-URI Too Large")) + (unsupported-media-type . (415 . "Unsupported Media Type")) + (requested-range-not-satisfiable . (416 . "Requested Range Not Satisfiable")) + (expectation-failed . (417 . "Expectation Failed")) + (unprocessable-entity . (422 . "Unprocessable Entity")) + (locked . (423 . "Locked")) + (failed-dependency . (424 . "Failed Dependency")) + (upgrade-required . (426 . "Upgrade Required")) + (precondition-required . (428 . "Precondition Required")) + (too-many-requests . (429 . "Too Many Requests")) + (request-header-fields-too-large . (431 . "Request Header Fields Too Large")) + (internal-server-error . (500 . "Internal Server Error")) + (not-implemented . (501 . "Not Implemented")) + (bad-gateway . (502 . "Bad Gateway")) + (service-unavailable . (503 . "Service Unavailable")) + (gateway-time-out . (504 . "Gateway Time-out")) + (http-version-not-supported . (505 . "HTTP Version Not Supported")) + (insufficient-storage . (507 . "Insufficient Storage")) + (loop-detected . (508 . "Loop Detected")) + (not-extended . (510 . "Not Extended")) + (network-authentication-required . (511 . "Network Authentication Required"))))) + +(define (http-0.9-response-unparser response) + response) ;; The response-body will just follow + +;; See notes at write-request-line +(define (write-response-line response) + (display (string-append + "HTTP/" + (number->string (response-major response)) "." + (number->string (response-minor response)) " " + (->string (response-code response)) " " + (->string (response-reason response)) "\r\n") + (response-port response))) + +(define (http-1.0-response-unparser response) + (and-let* (((= (response-major response) 1)) + ((= (response-minor response) 0)) + (o (response-port response))) + (write-response-line response) + (unparse-headers (response-headers response) o) + (display "\r\n" o) + response)) + +;; XXX This actually unparses anything >= HTTP/1.1 +(define (http-1.x-response-unparser response) + (and-let* (((or (> (response-major response) 1) + (and (= (response-major response) 1) + (> (response-minor response) 0)))) + (o (response-port response))) + (write-response-line response) + (unparse-headers (response-headers response) o) + (display "\r\n" o) + (if (memq 'chunked (header-values 'transfer-encoding + (response-headers response))) + (update-response response + port: (chunked-output-port (response-port response))) + response))) + +(define response-unparsers + (make-parameter (list http-1.x-response-unparser http-1.0-response-unparser))) + +(define (write-response response) + ;; Try each unparser in turn to write the response-line. + ;; An unparser returns either #f or a new response object. + (let loop ((unparsers (response-unparsers))) + (if (null? unparsers) + (let ((major (response-major response)) + (minor (response-minor response))) + (signal-http-condition + 'write-response + "Unknown protocol" (list (conc major "." minor)) + 'unknown-protocol 'major major 'minor minor)) + (or ((car unparsers) response) (loop (cdr unparsers)))))) + +;; Required for chunked requests. This is a bit of a hack! +(define (finish-response-body response) + (when (and (memq 'chunked (header-values 'transfer-encoding + (response-headers response))) + (eq? (##sys#slot (response-port response) 10) 'chunked-output-port)) + (display "0\r\n\r\n" (##sys#slot (response-port response) 11))) + response) + +;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;; Response parsing ;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define http-1.x-response-parser + (let ((resp (irregex '(seq (w/nocase "HTTP/") + (=> major (+ digit)) "." (=> minor (+ digit)) + ;; Could use '(= 3 digit) for status-code, but + ;; that's currently not compilable + (+ space) (=> status-code digit digit digit) + (+ space) (=> reason-phrase (* nonl)))))) + (lambda (line in) + (and-let* ((m (irregex-match resp line)) + (code (string->number (irregex-match-substring m 'status-code))) + (major (string->number (irregex-match-substring m 'major))) + (minor (string->number (irregex-match-substring m 'minor))) + ((or (> major 1) (and (= major 1) (> minor 0)))) + (reason (irregex-match-substring m 'reason-phrase)) + (h (read-headers in)) + (port (if (memq 'chunked (header-values 'transfer-encoding h)) + (chunked-input-port in) + in))) + (make-response code: code reason: reason + major: major minor: minor + headers: h + port: port))))) + +(define http-1.0-response-parser + (let ((resp (irregex '(seq (w/nocase "HTTP/1.0") + ;; Could use '(= 3 digit) for status-code, but + ;; that's currently not compilable + (+ space) (=> status-code digit digit digit) + (+ space) (=> reason-phrase (* nonl)))))) + (lambda (line in) + (and-let* ((m (irregex-match resp line)) + (code (string->number (irregex-match-substring m 'status-code))) + (reason (irregex-match-substring m 'reason-phrase)) + (h (read-headers in))) + ;; HTTP/1.0 has no chunking + (make-response code: code reason: reason + major: 1 minor: 0 + headers: h + port: in))))) + +;; You can't "detect" a 0.9 response, because there is no response line. +;; It will simply output the body directly, so we will just assume that +;; if we can't recognise the output string, we just got a 0.9 response. +(define (http-0.9-response-parser line in) + (make-response code: 200 reason: "OK" + major: 0 minor: 9 + ;; XXX This is wrong, it re-inserts \r\n, while it may have + ;; been a \n only. To work around this, we'd have to write + ;; a custom (safe-)read-line procedure. + ;; However, it does not matter much because HTTP 0.9 is only + ;; defined to ever return text/html, no binary or any other + ;; content type. + port: (call-with-input-string (string-append line "\r\n") + (lambda (str) + (make-concatenated-port str in))))) + +(define response-parsers ;; order matters here + (make-parameter (list http-1.x-response-parser http-1.0-response-parser))) + +(define (read-response inport) + (let ((line (safe-read-line inport))) + (and (not (eof-object? line)) + (let loop ((parsers (response-parsers))) + (if (null? parsers) + (signal-http-condition + 'read-response "Unknown protocol line" (list line) + 'unknown-protocol-line 'line line) + (or ((car parsers) line inport) (loop (cdr parsers)))))))) + +) -- cgit v1.2.3