summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBilly Brown <druidofluhn@gmail.com>2023-12-21 17:07:00 +0000
committerPeter Bex <peter@more-magic.net>2024-01-11 13:59:39 +0100
commitb9683af2da882d542208a15d67d8040bab8fa5c2 (patch)
tree947497195eb439a0a22e4839f9adcb952c4b579c
parentb2632661807a99fc7c3e610e8bd1b04c68b869c5 (diff)
downloadintarweb-2.1.0.tar.gz
Corrected Range and Content-Range header handling2.1.0
Corrected the handling of the Range request header and the Content- Range response header. They now have separate parsers, to match the standard, and the content-range unparser is now correctly named. The Range header can be in one of three forms: Range: bytes=<start>-<end> Range: bytes=<start>- Range: bytes=-<end> The Content-Range header can be in one of three forms: Content-Range: bytes <start>-<end>/<size> Content-Range: bytes <start>-<end>/* Content-Range: bytes */<size> The unit tests were updated to reflect the changes. Signed-off-by: Peter Bex <peter@more-magic.net>
-rw-r--r--header-parsers.scm60
-rw-r--r--intarweb.scm9
-rw-r--r--tests/run.scm42
3 files changed, 95 insertions, 16 deletions
diff --git a/header-parsers.scm b/header-parsers.scm
index 89edeae..ac41a09 100644
--- a/header-parsers.scm
+++ b/header-parsers.scm
@@ -376,18 +376,42 @@
(cons (substring/shared contents 0 idx) version)
(cons contents #f))))
-;; bytes <start>-<end>/<total>
+;; Try getting a submatch from an irregex match and parse it, or return #f
+(define (try-parse-submatch match group-name parser)
+ (and-let* ((value (irregex-match-substring match group-name)))
+ (parser value)))
+
+;; bytes=<start>-<end>
+;; bytes=<start>-
+;; bytes=-<end>
(define range-subparser
(let ((range-regex
+ (irregex '(seq "bytes="
+ (=> start (* digit)) "-" (=> end (* digit))))))
+ (lambda (s)
+ (and-let* ((m (irregex-match range-regex s)))
+ (let ((start (try-parse-submatch m 'start string->number))
+ (end (try-parse-submatch m 'end string->number)))
+ (if (or start end)
+ (list start end)
+ #f))))))
+
+;; bytes <start>-<end>/<total>
+;; bytes <start>-<end>/*
+;; bytes */<total>
+(define content-range-subparser
+ (let ((content-range-regex
(irregex '(seq "bytes" (+ space)
- (=> start (+ digit)) "-" (=> end (+ digit))
- "/" (=> total (+ digit))))))
+ (or (seq "*/" (=> total (+ digit)))
+ (seq (=> start (+ digit)) "-" (=> end (+ digit))
+ "/" (or "*"
+ (=> total (+ digit)))))))))
(lambda (s)
- (and-let* ((m (irregex-match range-regex s))
- (start (string->number (irregex-match-substring m 'start)))
- (end (string->number (irregex-match-substring m 'end)))
- (total (string->number (irregex-match-substring m 'total))))
- (list start end total)))))
+ (and-let* ((m (irregex-match content-range-regex s)))
+ (let ((start (try-parse-submatch m 'start string->number))
+ (end (try-parse-submatch m 'end string->number))
+ (total (try-parse-submatch m 'total string->number)))
+ (list start end total))))))
;; Accept *just* a filename, not a full path (simply strips directories)
;; This matches the content-disposition recommendation in RFC2616, 19.5.1:
@@ -724,6 +748,23 @@
(if (eq? 'weak (car etag)) "W/" "")
(quote-string (cdr etag)))) ;; Etags are _always_ quoted
+(define (unparse-content-range content-range)
+ ; False values are used to represent wildcards "*"
+ (let ((content-range-value-unparser (lambda (value)
+ (if value
+ (number->string value)
+ "*"))))
+ (string-append "bytes "
+ ; If the start and end are wildcards, use only one
+ (if (not (or (car content-range) (cadr content-range)))
+ "*"
+ (string-append
+ (content-range-value-unparser (car content-range))
+ "-"
+ (content-range-value-unparser (cadr content-range))))
+ "/"
+ (content-range-value-unparser (caddr content-range)))))
+
;; There's no need to make a specific header unparser for every header type.
;; Usually, the Scheme value representing a header can unambiguously be
;; unparsed into a header just by checking its type.
@@ -772,6 +813,9 @@
(read-date . ,rfc1123-time->string))
value-unparser: unparser)))))
+(define (content-range-unparser header-contents)
+ (list (unparse-content-range (get-value (car header-contents)))))
+
(define (etag-unparser header-contents)
(list (unparse-etag (get-value (car header-contents)))))
diff --git a/intarweb.scm b/intarweb.scm
index 78383d5..c40e1c4 100644
--- a/intarweb.scm
+++ b/intarweb.scm
@@ -81,14 +81,14 @@
rfc1123-subparser rfc850-subparser asctime-subparser http-date-subparser
product-subparser quality-subparser unknown-header-parser
filename-subparser symbol-subparser symbol-subparser-ci natnum-subparser
- host/port-subparser base64-subparser range-subparser filename-subparser
- etag-parser software-parser mailbox-subparser
+ host/port-subparser base64-subparser range-subparser content-range-subparser
+ filename-subparser etag-parser software-parser mailbox-subparser
if-range-parser retry-after-subparser via-parser warning-parser
key/value-subparser set-cookie-parser cache-control-parser pragma-parser
te-parser cookie-parser strict-transport-security-parser
must-be-quoted-chars quote-string unparse-token
- default-header-unparser etag-unparser host/port-unparser
+ content-range-unparser default-header-unparser etag-unparser host/port-unparser
product-unparser software-unparser rfc1123-unparser cookie-unparser
strict-transport-security-unparser
@@ -436,7 +436,7 @@
(content-length . ,(single natnum-subparser))
(content-location . ,(single normalized-uri))
(content-md5 . ,(single base64-subparser))
- (content-range . ,(single range-subparser))
+ (content-range . ,(single content-range-subparser))
(content-type . ,(single symbol-subparser-ci
`((charset . ,symbol-subparser-ci))))
(date . ,(single http-date-subparser))
@@ -670,6 +670,7 @@
(define header-unparsers
(make-parameter
`((content-disposition . ,content-disposition-unparser)
+ (content-range . ,content-range-unparser)
(date . ,rfc1123-unparser)
(etag . ,etag-unparser)
(expires . ,rfc1123-unparser)
diff --git a/tests/run.scm b/tests/run.scm
index d97f15d..95a3990 100644
--- a/tests/run.scm
+++ b/tests/run.scm
@@ -300,10 +300,34 @@
(header-value 'content-md5 headers))))
(test-group "range-parser"
- (let ((headers (test-read-headers "content-range: bytes 500-999/1234")))
- (test "Simple range"
- '(500 999 1234)
- (header-value 'content-range headers))))
+ (let ((headers (test-read-headers "range: bytes=500-999")))
+ (test "Simple range full"
+ '(500 999)
+ (header-value 'range headers)))
+ (let ((headers (test-read-headers "range: bytes=500-")))
+ (test "Simple range start"
+ '(500 #f)
+ (header-value 'range headers)))
+ (let ((headers (test-read-headers "range: bytes=-999")))
+ (test "Simple range end"
+ '(#f 999)
+ (header-value 'range headers)))
+ (let ((headers (test-read-headers "range: bytes 500-999/1234")))
+ (test "Content-range failure"
+ #f
+ (header-value 'range headers)))
+ (let ((headers (test-read-headers "range: bytes=-")))
+ (test "Numberless range failure"
+ #f
+ (header-value 'range headers)))
+ (let ((headers (test-read-headers "range: bytes 500-999")))
+ (test "Space failure"
+ #f
+ (header-value 'range headers)))
+ (let ((headers (test-read-headers "range: bits=500-999")))
+ (test "Wrong units failure"
+ #f
+ (header-value 'range headers))))
(test-group "content-disposition"
(let ((headers (test-read-headers "Content-Disposition: attachment; filename=dir/foo.jpg")))
@@ -645,6 +669,16 @@
(test-error* "Embedded newlines in raw headers also throw an error"
(exn http unencoded-header)
(test-unparse-headers `((foo #("bar\n\x01qux" raw))))))
+ (test-group "content-range unparser"
+ (test "Full content-range"
+ "Content-Range: bytes 500-999/1234\r\n"
+ (test-unparse-headers `((content-range (500 999 1234)))))
+ (test "Wildcard size content-range"
+ "Content-Range: bytes 500-999/*\r\n"
+ (test-unparse-headers `((content-range (500 999 #f)))))
+ (test "Wildcard range content-range"
+ "Content-Range: bytes */1234\r\n"
+ (test-unparse-headers `((content-range (#f #f 1234))))))
(test-group "etag unparser"
(test "Weak tag"
"Etag: W/\"blah\"\r\n"