diff options
-rw-r--r-- | header-parsers.scm | 60 | ||||
-rw-r--r-- | intarweb.release-info | 3 | ||||
-rw-r--r-- | intarweb.scm | 27 | ||||
-rw-r--r-- | tests/run.scm | 68 |
4 files changed, 118 insertions, 40 deletions
diff --git a/header-parsers.scm b/header-parsers.scm index 89edeae..ac41a09 100644 --- a/header-parsers.scm +++ b/header-parsers.scm @@ -376,18 +376,42 @@ (cons (substring/shared contents 0 idx) version) (cons contents #f)))) -;; bytes <start>-<end>/<total> +;; Try getting a submatch from an irregex match and parse it, or return #f +(define (try-parse-submatch match group-name parser) + (and-let* ((value (irregex-match-substring match group-name))) + (parser value))) + +;; bytes=<start>-<end> +;; bytes=<start>- +;; bytes=-<end> (define range-subparser (let ((range-regex + (irregex '(seq "bytes=" + (=> start (* digit)) "-" (=> end (* digit)))))) + (lambda (s) + (and-let* ((m (irregex-match range-regex s))) + (let ((start (try-parse-submatch m 'start string->number)) + (end (try-parse-submatch m 'end string->number))) + (if (or start end) + (list start end) + #f)))))) + +;; bytes <start>-<end>/<total> +;; bytes <start>-<end>/* +;; bytes */<total> +(define content-range-subparser + (let ((content-range-regex (irregex '(seq "bytes" (+ space) - (=> start (+ digit)) "-" (=> end (+ digit)) - "/" (=> total (+ digit)))))) + (or (seq "*/" (=> total (+ digit))) + (seq (=> start (+ digit)) "-" (=> end (+ digit)) + "/" (or "*" + (=> total (+ digit))))))))) (lambda (s) - (and-let* ((m (irregex-match range-regex s)) - (start (string->number (irregex-match-substring m 'start))) - (end (string->number (irregex-match-substring m 'end))) - (total (string->number (irregex-match-substring m 'total)))) - (list start end total))))) + (and-let* ((m (irregex-match content-range-regex s))) + (let ((start (try-parse-submatch m 'start string->number)) + (end (try-parse-submatch m 'end string->number)) + (total (try-parse-submatch m 'total string->number))) + (list start end total)))))) ;; Accept *just* a filename, not a full path (simply strips directories) ;; This matches the content-disposition recommendation in RFC2616, 19.5.1: @@ -724,6 +748,23 @@ (if (eq? 'weak (car etag)) "W/" "") (quote-string (cdr etag)))) ;; Etags are _always_ quoted +(define (unparse-content-range content-range) + ; False values are used to represent wildcards "*" + (let ((content-range-value-unparser (lambda (value) + (if value + (number->string value) + "*")))) + (string-append "bytes " + ; If the start and end are wildcards, use only one + (if (not (or (car content-range) (cadr content-range))) + "*" + (string-append + (content-range-value-unparser (car content-range)) + "-" + (content-range-value-unparser (cadr content-range)))) + "/" + (content-range-value-unparser (caddr content-range))))) + ;; There's no need to make a specific header unparser for every header type. ;; Usually, the Scheme value representing a header can unambiguously be ;; unparsed into a header just by checking its type. @@ -772,6 +813,9 @@ (read-date . ,rfc1123-time->string)) value-unparser: unparser))))) +(define (content-range-unparser header-contents) + (list (unparse-content-range (get-value (car header-contents))))) + (define (etag-unparser header-contents) (list (unparse-etag (get-value (car header-contents))))) diff --git a/intarweb.release-info b/intarweb.release-info index e152e56..01c2fb0 100644 --- a/intarweb.release-info +++ b/intarweb.release-info @@ -1,4 +1,3 @@ (repo git "https://code.more-magic.net/{egg-name}") (uri targz "https://code.more-magic.net/{egg-name}/snapshot/{egg-name}-{egg-release}.tar.gz") -(release "2.0") -(release "2.0.1") +(release "3.0") diff --git a/intarweb.scm b/intarweb.scm index 013fafa..d2cca10 100644 --- a/intarweb.scm +++ b/intarweb.scm @@ -1,7 +1,7 @@ ;;; ;;; Intarweb is an improved HTTP library for Chicken ;;; -;; Copyright (c) 2008-2021, Peter Bex +;; Copyright (c) 2008-2024, Peter Bex ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without @@ -81,14 +81,14 @@ rfc1123-subparser rfc850-subparser asctime-subparser http-date-subparser product-subparser quality-subparser unknown-header-parser filename-subparser symbol-subparser symbol-subparser-ci natnum-subparser - host/port-subparser base64-subparser range-subparser filename-subparser - etag-parser software-parser mailbox-subparser + host/port-subparser base64-subparser range-subparser content-range-subparser + filename-subparser etag-parser software-parser mailbox-subparser if-range-parser retry-after-subparser via-parser warning-parser key/value-subparser set-cookie-parser cache-control-parser pragma-parser te-parser cookie-parser strict-transport-security-parser must-be-quoted-chars quote-string unparse-token - default-header-unparser etag-unparser host/port-unparser + content-range-unparser default-header-unparser etag-unparser host/port-unparser product-unparser software-unparser rfc1123-unparser cookie-unparser strict-transport-security-unparser @@ -100,7 +100,7 @@ basic-auth-param-subunparser digest-auth-param-subunparser ) -(import scheme (chicken base) (chicken foreign) (chicken irregex) +(import scheme (scheme base) (chicken base) (chicken foreign) (chicken irregex) (chicken format) (chicken io) (chicken string) (chicken time posix) (chicken pathname) (chicken fixnum) (chicken condition) (chicken port) (chicken syntax) @@ -153,8 +153,8 @@ ;; that deal with headers. (define-record headers v) -(define-record-printer (headers h out) - (fprintf out "#(headers: ~S)" (headers-v h))) +(set-record-printer! headers (lambda (h out) + (fprintf out "#(headers: ~S)" (headers-v h)))) (define headers->list headers-v) @@ -228,7 +228,7 @@ (define update-header-contents (make-updater replace-header-contents)) (define update-header-contents! (make-updater replace-header-contents!)) -(define http-name->symbol (compose string->symbol string-downcase!)) +(define http-name->symbol (compose string->symbol string-downcase)) (define symbol->http-name (compose string-titlecase symbol->string)) ;; Make a header set from a literal expression by folding in the headers @@ -271,7 +271,7 @@ (fprintf port "~X\r\n~A\r\n" len s)))) (lambda () ; close (close-output-port port)) - (lambda () ; flush + force-output: (lambda () (flush-output port))))) ;; first "reserved" slot ;; Slot 7 should probably stay 'custom @@ -312,12 +312,12 @@ (or (not position) (char-ready? port))) (lambda () ; close (close-input-port port)) - (lambda () ; peek-char + peek-char: (lambda () (check-position) (if position (peek-char port) #!eof)) - (lambda (p bytes buf off) ; read-string! + read-bytevector: (lambda (p bytes buf off) (let lp ((todo bytes) (total-bytes-read 0) (off off)) @@ -325,7 +325,7 @@ (if (or (not position) (= todo 0)) total-bytes-read (let* ((n (min todo (- chunk-length position))) - (bytes-read (read-string! n buf port off))) + (bytes-read (read-bytevector! buf port off (+ off n)))) (set! position (+ position bytes-read)) (lp (- todo bytes-read) (+ total-bytes-read bytes-read) @@ -436,7 +436,7 @@ (content-length . ,(single natnum-subparser)) (content-location . ,(single normalized-uri)) (content-md5 . ,(single base64-subparser)) - (content-range . ,(single range-subparser)) + (content-range . ,(single content-range-subparser)) (content-type . ,(single symbol-subparser-ci `((charset . ,symbol-subparser-ci)))) (date . ,(single http-date-subparser)) @@ -670,6 +670,7 @@ (define header-unparsers (make-parameter `((content-disposition . ,content-disposition-unparser) + (content-range . ,content-range-unparser) (date . ,rfc1123-unparser) (etag . ,etag-unparser) (expires . ,rfc1123-unparser) diff --git a/tests/run.scm b/tests/run.scm index d97f15d..f6f99ee 100644 --- a/tests/run.scm +++ b/tests/run.scm @@ -1,5 +1,5 @@ (import scheme chicken.base chicken.port - chicken.condition chicken.time.posix srfi-1 + chicken.condition chicken.time.posix srfi-1 test uri-common intarweb chicken.io chicken.format) ;; Below, there are specific tests for when these do have a value @@ -138,7 +138,7 @@ (let* ((headers (test-read-headers "Accept-Ranges: FoO"))) (test "Case-insensitive" '(foo) (header-values 'accept-ranges headers)))) - + (test-group "symbol-parser" (let* ((headers (test-read-headers "Allow: FoO, foo"))) (test "Case-sensitive" @@ -235,7 +235,7 @@ (test "Custom contents" 'security-through-obscurity (header-param 'contents 'authorization headers)))))) - + (test-group "authenticate parser" (test-group "basic auth" (let ((headers (test-read-headers "WWW-Authenticate: Basic realm=\"WallyWorld\""))) @@ -277,7 +277,7 @@ (test "non-true stale value" #f (header-param 'stale 'www-authenticate headers))))) - + (test-group "pragma-parser" (let ((headers (test-read-headers "Pragma: custom-value=10, no-cache"))) (test "value" @@ -300,10 +300,34 @@ (header-value 'content-md5 headers)))) (test-group "range-parser" - (let ((headers (test-read-headers "content-range: bytes 500-999/1234"))) - (test "Simple range" - '(500 999 1234) - (header-value 'content-range headers)))) + (let ((headers (test-read-headers "range: bytes=500-999"))) + (test "Simple range full" + '(500 999) + (header-value 'range headers))) + (let ((headers (test-read-headers "range: bytes=500-"))) + (test "Simple range start" + '(500 #f) + (header-value 'range headers))) + (let ((headers (test-read-headers "range: bytes=-999"))) + (test "Simple range end" + '(#f 999) + (header-value 'range headers))) + (let ((headers (test-read-headers "range: bytes 500-999/1234"))) + (test "Content-range failure" + #f + (header-value 'range headers))) + (let ((headers (test-read-headers "range: bytes=-"))) + (test "Numberless range failure" + #f + (header-value 'range headers))) + (let ((headers (test-read-headers "range: bytes 500-999"))) + (test "Space failure" + #f + (header-value 'range headers))) + (let ((headers (test-read-headers "range: bits=500-999"))) + (test "Wrong units failure" + #f + (header-value 'range headers)))) (test-group "content-disposition" (let ((headers (test-read-headers "Content-Disposition: attachment; filename=dir/foo.jpg"))) @@ -458,7 +482,7 @@ (header-value 'user-agent (test-read-headers "User-Agent: Mozilla/5.0\r\n"))) (test "Product with comment" '(("Mozilla" #f "foo")) - (header-value 'user-agent (test-read-headers "User-Agent: Mozilla (foo)\r\n"))) + (header-value 'user-agent (test-read-headers "User-Agent: Mozilla (foo)\r\n"))) (test "Realistic product (comments, semicolons)" '(("Mozilla" "5.0" "X11; U; NetBSD amd64; en-US; rv:1.9.0.3") ("Gecko" "2008110501" #f) ("Minefield" "3.0.3" #f)) (header-value 'user-agent (test-read-headers "User-Agent: Mozilla/5.0 (X11; U; NetBSD amd64; en-US; rv:1.9.0.3) Gecko/2008110501 Minefield/3.0.3\r\n"))) @@ -534,7 +558,7 @@ (utc-time->seconds (get-param 'expires (first (header-contents 'set-cookie headers))))))) - + (test-group "cookie-parser" (let* ((headers (test-read-headers "Cookie: Foo=bar; $Path=/; qux=mooh; $unknown=something"))) (test "Multiple cookies in the same header" @@ -611,8 +635,8 @@ "Foo: \"bar \\\" qux\", mooh\r\n" (test-unparse-headers `((foo "bar \" qux" "mooh")))) (test "Escaping control characters" - "Foo: \"bar\\\r\\\x01qux\"\r\n" - (test-unparse-headers `((foo "bar\r\x01qux")))) + "Foo: \"bar\\\r\\\x01;qux\"\r\n" + (test-unparse-headers `((foo "bar\r\x01;qux")))) ;; Unfortunately, there are no or very few HTTP implementations ;; which understand that newlines can be escaped with a backslash ;; in a quoted string. That's why we don't allow it. @@ -620,7 +644,7 @@ ;; of header (URLencoding, removing the newlines from cookies, etc) (test-error* "Embedded newlines throw an error" (exn http unencoded-header) - (test-unparse-headers `((foo "bar\n\x01qux")))) + (test-unparse-headers `((foo "bar\n\x01;qux")))) (test "Alist" "Foo: Bar=qux, Mooh=mumble\r\n" (test-unparse-headers `((foo (bar . qux) (mooh . mumble))))) @@ -644,7 +668,17 @@ (test-unparse-headers `((etag #("\"hi there" raw))))) (test-error* "Embedded newlines in raw headers also throw an error" (exn http unencoded-header) - (test-unparse-headers `((foo #("bar\n\x01qux" raw)))))) + (test-unparse-headers `((foo #("bar\n\x01;qux" raw)))))) + (test-group "content-range unparser" + (test "Full content-range" + "Content-Range: bytes 500-999/1234\r\n" + (test-unparse-headers `((content-range (500 999 1234))))) + (test "Wildcard size content-range" + "Content-Range: bytes 500-999/*\r\n" + (test-unparse-headers `((content-range (500 999 #f))))) + (test "Wildcard range content-range" + "Content-Range: bytes */1234\r\n" + (test-unparse-headers `((content-range (#f #f 1234)))))) (test-group "etag unparser" (test "Weak tag" "Etag: W/\"blah\"\r\n" @@ -729,7 +763,7 @@ (test "Old-style cookie expires value" "Set-Cookie: foo=; Expires=Sunday, 20-Jul-08 15:23:42 GMT\r\n" (test-unparse-headers `((set-cookie #(("foo" . "") - ((expires . #(42 23 15 20 6 108 0 309 #f 0)))))))) + ((expires . #(42 23 15 20 6 108 0 309 #f 0)))))))) (test "Secure (true)" "Set-Cookie: foo=bar; Secure\r\n" (test-unparse-headers `((set-cookie #(("foo" . "bar") @@ -743,7 +777,7 @@ "Set-Cookie: foo=bar; Path=/blah\r\n" (test-unparse-headers `((set-cookie #(("foo" . "bar") ((path . ,(uri-reference "/blah")) - (secure . #f)))))))) + (secure . #f)))))))) (test-group "authorization unparser" (test "Basic auth" "Authorization: Basic QWxpIEJhYmE6b3BlbiBzZXNhbWU=\r\n" @@ -1146,7 +1180,7 @@ (update-response res status: 'unknown)) (test "any status can be used when code and reason are given directly" "HTTP/1.1 999 No Way\r\n\r\ntest" - (test-write-response + (test-write-response (update-response res code: 999 reason: "No Way") "test")) (test "defaults can be parameterized" |