summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--header-parsers.scm60
-rw-r--r--intarweb.release-info3
-rw-r--r--intarweb.scm27
-rw-r--r--tests/run.scm68
4 files changed, 118 insertions, 40 deletions
diff --git a/header-parsers.scm b/header-parsers.scm
index 89edeae..ac41a09 100644
--- a/header-parsers.scm
+++ b/header-parsers.scm
@@ -376,18 +376,42 @@
(cons (substring/shared contents 0 idx) version)
(cons contents #f))))
-;; bytes <start>-<end>/<total>
+;; Try getting a submatch from an irregex match and parse it, or return #f
+(define (try-parse-submatch match group-name parser)
+ (and-let* ((value (irregex-match-substring match group-name)))
+ (parser value)))
+
+;; bytes=<start>-<end>
+;; bytes=<start>-
+;; bytes=-<end>
(define range-subparser
(let ((range-regex
+ (irregex '(seq "bytes="
+ (=> start (* digit)) "-" (=> end (* digit))))))
+ (lambda (s)
+ (and-let* ((m (irregex-match range-regex s)))
+ (let ((start (try-parse-submatch m 'start string->number))
+ (end (try-parse-submatch m 'end string->number)))
+ (if (or start end)
+ (list start end)
+ #f))))))
+
+;; bytes <start>-<end>/<total>
+;; bytes <start>-<end>/*
+;; bytes */<total>
+(define content-range-subparser
+ (let ((content-range-regex
(irregex '(seq "bytes" (+ space)
- (=> start (+ digit)) "-" (=> end (+ digit))
- "/" (=> total (+ digit))))))
+ (or (seq "*/" (=> total (+ digit)))
+ (seq (=> start (+ digit)) "-" (=> end (+ digit))
+ "/" (or "*"
+ (=> total (+ digit)))))))))
(lambda (s)
- (and-let* ((m (irregex-match range-regex s))
- (start (string->number (irregex-match-substring m 'start)))
- (end (string->number (irregex-match-substring m 'end)))
- (total (string->number (irregex-match-substring m 'total))))
- (list start end total)))))
+ (and-let* ((m (irregex-match content-range-regex s)))
+ (let ((start (try-parse-submatch m 'start string->number))
+ (end (try-parse-submatch m 'end string->number))
+ (total (try-parse-submatch m 'total string->number)))
+ (list start end total))))))
;; Accept *just* a filename, not a full path (simply strips directories)
;; This matches the content-disposition recommendation in RFC2616, 19.5.1:
@@ -724,6 +748,23 @@
(if (eq? 'weak (car etag)) "W/" "")
(quote-string (cdr etag)))) ;; Etags are _always_ quoted
+(define (unparse-content-range content-range)
+ ; False values are used to represent wildcards "*"
+ (let ((content-range-value-unparser (lambda (value)
+ (if value
+ (number->string value)
+ "*"))))
+ (string-append "bytes "
+ ; If the start and end are wildcards, use only one
+ (if (not (or (car content-range) (cadr content-range)))
+ "*"
+ (string-append
+ (content-range-value-unparser (car content-range))
+ "-"
+ (content-range-value-unparser (cadr content-range))))
+ "/"
+ (content-range-value-unparser (caddr content-range)))))
+
;; There's no need to make a specific header unparser for every header type.
;; Usually, the Scheme value representing a header can unambiguously be
;; unparsed into a header just by checking its type.
@@ -772,6 +813,9 @@
(read-date . ,rfc1123-time->string))
value-unparser: unparser)))))
+(define (content-range-unparser header-contents)
+ (list (unparse-content-range (get-value (car header-contents)))))
+
(define (etag-unparser header-contents)
(list (unparse-etag (get-value (car header-contents)))))
diff --git a/intarweb.release-info b/intarweb.release-info
index e152e56..01c2fb0 100644
--- a/intarweb.release-info
+++ b/intarweb.release-info
@@ -1,4 +1,3 @@
(repo git "https://code.more-magic.net/{egg-name}")
(uri targz "https://code.more-magic.net/{egg-name}/snapshot/{egg-name}-{egg-release}.tar.gz")
-(release "2.0")
-(release "2.0.1")
+(release "3.0")
diff --git a/intarweb.scm b/intarweb.scm
index 013fafa..d2cca10 100644
--- a/intarweb.scm
+++ b/intarweb.scm
@@ -1,7 +1,7 @@
;;;
;;; Intarweb is an improved HTTP library for Chicken
;;;
-;; Copyright (c) 2008-2021, Peter Bex
+;; Copyright (c) 2008-2024, Peter Bex
;; All rights reserved.
;;
;; Redistribution and use in source and binary forms, with or without
@@ -81,14 +81,14 @@
rfc1123-subparser rfc850-subparser asctime-subparser http-date-subparser
product-subparser quality-subparser unknown-header-parser
filename-subparser symbol-subparser symbol-subparser-ci natnum-subparser
- host/port-subparser base64-subparser range-subparser filename-subparser
- etag-parser software-parser mailbox-subparser
+ host/port-subparser base64-subparser range-subparser content-range-subparser
+ filename-subparser etag-parser software-parser mailbox-subparser
if-range-parser retry-after-subparser via-parser warning-parser
key/value-subparser set-cookie-parser cache-control-parser pragma-parser
te-parser cookie-parser strict-transport-security-parser
must-be-quoted-chars quote-string unparse-token
- default-header-unparser etag-unparser host/port-unparser
+ content-range-unparser default-header-unparser etag-unparser host/port-unparser
product-unparser software-unparser rfc1123-unparser cookie-unparser
strict-transport-security-unparser
@@ -100,7 +100,7 @@
basic-auth-param-subunparser digest-auth-param-subunparser
)
-(import scheme (chicken base) (chicken foreign) (chicken irregex)
+(import scheme (scheme base) (chicken base) (chicken foreign) (chicken irregex)
(chicken format) (chicken io) (chicken string)
(chicken time posix) (chicken pathname) (chicken fixnum)
(chicken condition) (chicken port) (chicken syntax)
@@ -153,8 +153,8 @@
;; that deal with headers.
(define-record headers v)
-(define-record-printer (headers h out)
- (fprintf out "#(headers: ~S)" (headers-v h)))
+(set-record-printer! headers (lambda (h out)
+ (fprintf out "#(headers: ~S)" (headers-v h))))
(define headers->list headers-v)
@@ -228,7 +228,7 @@
(define update-header-contents (make-updater replace-header-contents))
(define update-header-contents! (make-updater replace-header-contents!))
-(define http-name->symbol (compose string->symbol string-downcase!))
+(define http-name->symbol (compose string->symbol string-downcase))
(define symbol->http-name (compose string-titlecase symbol->string))
;; Make a header set from a literal expression by folding in the headers
@@ -271,7 +271,7 @@
(fprintf port "~X\r\n~A\r\n" len s))))
(lambda () ; close
(close-output-port port))
- (lambda () ; flush
+ force-output: (lambda ()
(flush-output port)))))
;; first "reserved" slot
;; Slot 7 should probably stay 'custom
@@ -312,12 +312,12 @@
(or (not position) (char-ready? port)))
(lambda () ; close
(close-input-port port))
- (lambda () ; peek-char
+ peek-char: (lambda ()
(check-position)
(if position
(peek-char port)
#!eof))
- (lambda (p bytes buf off) ; read-string!
+ read-bytevector: (lambda (p bytes buf off)
(let lp ((todo bytes)
(total-bytes-read 0)
(off off))
@@ -325,7 +325,7 @@
(if (or (not position) (= todo 0))
total-bytes-read
(let* ((n (min todo (- chunk-length position)))
- (bytes-read (read-string! n buf port off)))
+ (bytes-read (read-bytevector! buf port off (+ off n))))
(set! position (+ position bytes-read))
(lp (- todo bytes-read)
(+ total-bytes-read bytes-read)
@@ -436,7 +436,7 @@
(content-length . ,(single natnum-subparser))
(content-location . ,(single normalized-uri))
(content-md5 . ,(single base64-subparser))
- (content-range . ,(single range-subparser))
+ (content-range . ,(single content-range-subparser))
(content-type . ,(single symbol-subparser-ci
`((charset . ,symbol-subparser-ci))))
(date . ,(single http-date-subparser))
@@ -670,6 +670,7 @@
(define header-unparsers
(make-parameter
`((content-disposition . ,content-disposition-unparser)
+ (content-range . ,content-range-unparser)
(date . ,rfc1123-unparser)
(etag . ,etag-unparser)
(expires . ,rfc1123-unparser)
diff --git a/tests/run.scm b/tests/run.scm
index d97f15d..f6f99ee 100644
--- a/tests/run.scm
+++ b/tests/run.scm
@@ -1,5 +1,5 @@
(import scheme chicken.base chicken.port
- chicken.condition chicken.time.posix srfi-1
+ chicken.condition chicken.time.posix srfi-1
test uri-common intarweb chicken.io chicken.format)
;; Below, there are specific tests for when these do have a value
@@ -138,7 +138,7 @@
(let* ((headers (test-read-headers "Accept-Ranges: FoO")))
(test "Case-insensitive"
'(foo) (header-values 'accept-ranges headers))))
-
+
(test-group "symbol-parser"
(let* ((headers (test-read-headers "Allow: FoO, foo")))
(test "Case-sensitive"
@@ -235,7 +235,7 @@
(test "Custom contents"
'security-through-obscurity
(header-param 'contents 'authorization headers))))))
-
+
(test-group "authenticate parser"
(test-group "basic auth"
(let ((headers (test-read-headers "WWW-Authenticate: Basic realm=\"WallyWorld\"")))
@@ -277,7 +277,7 @@
(test "non-true stale value"
#f
(header-param 'stale 'www-authenticate headers)))))
-
+
(test-group "pragma-parser"
(let ((headers (test-read-headers "Pragma: custom-value=10, no-cache")))
(test "value"
@@ -300,10 +300,34 @@
(header-value 'content-md5 headers))))
(test-group "range-parser"
- (let ((headers (test-read-headers "content-range: bytes 500-999/1234")))
- (test "Simple range"
- '(500 999 1234)
- (header-value 'content-range headers))))
+ (let ((headers (test-read-headers "range: bytes=500-999")))
+ (test "Simple range full"
+ '(500 999)
+ (header-value 'range headers)))
+ (let ((headers (test-read-headers "range: bytes=500-")))
+ (test "Simple range start"
+ '(500 #f)
+ (header-value 'range headers)))
+ (let ((headers (test-read-headers "range: bytes=-999")))
+ (test "Simple range end"
+ '(#f 999)
+ (header-value 'range headers)))
+ (let ((headers (test-read-headers "range: bytes 500-999/1234")))
+ (test "Content-range failure"
+ #f
+ (header-value 'range headers)))
+ (let ((headers (test-read-headers "range: bytes=-")))
+ (test "Numberless range failure"
+ #f
+ (header-value 'range headers)))
+ (let ((headers (test-read-headers "range: bytes 500-999")))
+ (test "Space failure"
+ #f
+ (header-value 'range headers)))
+ (let ((headers (test-read-headers "range: bits=500-999")))
+ (test "Wrong units failure"
+ #f
+ (header-value 'range headers))))
(test-group "content-disposition"
(let ((headers (test-read-headers "Content-Disposition: attachment; filename=dir/foo.jpg")))
@@ -458,7 +482,7 @@
(header-value 'user-agent (test-read-headers "User-Agent: Mozilla/5.0\r\n")))
(test "Product with comment"
'(("Mozilla" #f "foo"))
- (header-value 'user-agent (test-read-headers "User-Agent: Mozilla (foo)\r\n")))
+ (header-value 'user-agent (test-read-headers "User-Agent: Mozilla (foo)\r\n")))
(test "Realistic product (comments, semicolons)"
'(("Mozilla" "5.0" "X11; U; NetBSD amd64; en-US; rv:1.9.0.3") ("Gecko" "2008110501" #f) ("Minefield" "3.0.3" #f))
(header-value 'user-agent (test-read-headers "User-Agent: Mozilla/5.0 (X11; U; NetBSD amd64; en-US; rv:1.9.0.3) Gecko/2008110501 Minefield/3.0.3\r\n")))
@@ -534,7 +558,7 @@
(utc-time->seconds
(get-param 'expires
(first (header-contents 'set-cookie headers)))))))
-
+
(test-group "cookie-parser"
(let* ((headers (test-read-headers "Cookie: Foo=bar; $Path=/; qux=mooh; $unknown=something")))
(test "Multiple cookies in the same header"
@@ -611,8 +635,8 @@
"Foo: \"bar \\\" qux\", mooh\r\n"
(test-unparse-headers `((foo "bar \" qux" "mooh"))))
(test "Escaping control characters"
- "Foo: \"bar\\\r\\\x01qux\"\r\n"
- (test-unparse-headers `((foo "bar\r\x01qux"))))
+ "Foo: \"bar\\\r\\\x01;qux\"\r\n"
+ (test-unparse-headers `((foo "bar\r\x01;qux"))))
;; Unfortunately, there are no or very few HTTP implementations
;; which understand that newlines can be escaped with a backslash
;; in a quoted string. That's why we don't allow it.
@@ -620,7 +644,7 @@
;; of header (URLencoding, removing the newlines from cookies, etc)
(test-error* "Embedded newlines throw an error"
(exn http unencoded-header)
- (test-unparse-headers `((foo "bar\n\x01qux"))))
+ (test-unparse-headers `((foo "bar\n\x01;qux"))))
(test "Alist"
"Foo: Bar=qux, Mooh=mumble\r\n"
(test-unparse-headers `((foo (bar . qux) (mooh . mumble)))))
@@ -644,7 +668,17 @@
(test-unparse-headers `((etag #("\"hi there" raw)))))
(test-error* "Embedded newlines in raw headers also throw an error"
(exn http unencoded-header)
- (test-unparse-headers `((foo #("bar\n\x01qux" raw))))))
+ (test-unparse-headers `((foo #("bar\n\x01;qux" raw))))))
+ (test-group "content-range unparser"
+ (test "Full content-range"
+ "Content-Range: bytes 500-999/1234\r\n"
+ (test-unparse-headers `((content-range (500 999 1234)))))
+ (test "Wildcard size content-range"
+ "Content-Range: bytes 500-999/*\r\n"
+ (test-unparse-headers `((content-range (500 999 #f)))))
+ (test "Wildcard range content-range"
+ "Content-Range: bytes */1234\r\n"
+ (test-unparse-headers `((content-range (#f #f 1234))))))
(test-group "etag unparser"
(test "Weak tag"
"Etag: W/\"blah\"\r\n"
@@ -729,7 +763,7 @@
(test "Old-style cookie expires value"
"Set-Cookie: foo=; Expires=Sunday, 20-Jul-08 15:23:42 GMT\r\n"
(test-unparse-headers `((set-cookie #(("foo" . "")
- ((expires . #(42 23 15 20 6 108 0 309 #f 0))))))))
+ ((expires . #(42 23 15 20 6 108 0 309 #f 0))))))))
(test "Secure (true)"
"Set-Cookie: foo=bar; Secure\r\n"
(test-unparse-headers `((set-cookie #(("foo" . "bar")
@@ -743,7 +777,7 @@
"Set-Cookie: foo=bar; Path=/blah\r\n"
(test-unparse-headers `((set-cookie #(("foo" . "bar")
((path . ,(uri-reference "/blah"))
- (secure . #f))))))))
+ (secure . #f))))))))
(test-group "authorization unparser"
(test "Basic auth"
"Authorization: Basic QWxpIEJhYmE6b3BlbiBzZXNhbWU=\r\n"
@@ -1146,7 +1180,7 @@
(update-response res status: 'unknown))
(test "any status can be used when code and reason are given directly"
"HTTP/1.1 999 No Way\r\n\r\ntest"
- (test-write-response
+ (test-write-response
(update-response res code: 999 reason: "No Way")
"test"))
(test "defaults can be parameterized"