From b9683af2da882d542208a15d67d8040bab8fa5c2 Mon Sep 17 00:00:00 2001 From: Billy Brown Date: Thu, 21 Dec 2023 17:07:00 +0000 Subject: Corrected Range and Content-Range header handling Corrected the handling of the Range request header and the Content- Range response header. They now have separate parsers, to match the standard, and the content-range unparser is now correctly named. The Range header can be in one of three forms: Range: bytes=- Range: bytes=- Range: bytes=- The Content-Range header can be in one of three forms: Content-Range: bytes -/ Content-Range: bytes -/* Content-Range: bytes */ The unit tests were updated to reflect the changes. Signed-off-by: Peter Bex --- header-parsers.scm | 60 ++++++++++++++++++++++++++++++++++++++++++++++-------- intarweb.scm | 9 ++++---- tests/run.scm | 42 ++++++++++++++++++++++++++++++++++---- 3 files changed, 95 insertions(+), 16 deletions(-) diff --git a/header-parsers.scm b/header-parsers.scm index 89edeae..ac41a09 100644 --- a/header-parsers.scm +++ b/header-parsers.scm @@ -376,18 +376,42 @@ (cons (substring/shared contents 0 idx) version) (cons contents #f)))) -;; bytes -/ +;; Try getting a submatch from an irregex match and parse it, or return #f +(define (try-parse-submatch match group-name parser) + (and-let* ((value (irregex-match-substring match group-name))) + (parser value))) + +;; bytes=- +;; bytes=- +;; bytes=- (define range-subparser (let ((range-regex + (irregex '(seq "bytes=" + (=> start (* digit)) "-" (=> end (* digit)))))) + (lambda (s) + (and-let* ((m (irregex-match range-regex s))) + (let ((start (try-parse-submatch m 'start string->number)) + (end (try-parse-submatch m 'end string->number))) + (if (or start end) + (list start end) + #f)))))) + +;; bytes -/ +;; bytes -/* +;; bytes */ +(define content-range-subparser + (let ((content-range-regex (irregex '(seq "bytes" (+ space) - (=> start (+ digit)) "-" (=> end (+ digit)) - "/" (=> total (+ digit)))))) + (or (seq "*/" (=> total (+ digit))) + (seq (=> start (+ digit)) "-" (=> end (+ digit)) + "/" (or "*" + (=> total (+ digit))))))))) (lambda (s) - (and-let* ((m (irregex-match range-regex s)) - (start (string->number (irregex-match-substring m 'start))) - (end (string->number (irregex-match-substring m 'end))) - (total (string->number (irregex-match-substring m 'total)))) - (list start end total))))) + (and-let* ((m (irregex-match content-range-regex s))) + (let ((start (try-parse-submatch m 'start string->number)) + (end (try-parse-submatch m 'end string->number)) + (total (try-parse-submatch m 'total string->number))) + (list start end total)))))) ;; Accept *just* a filename, not a full path (simply strips directories) ;; This matches the content-disposition recommendation in RFC2616, 19.5.1: @@ -724,6 +748,23 @@ (if (eq? 'weak (car etag)) "W/" "") (quote-string (cdr etag)))) ;; Etags are _always_ quoted +(define (unparse-content-range content-range) + ; False values are used to represent wildcards "*" + (let ((content-range-value-unparser (lambda (value) + (if value + (number->string value) + "*")))) + (string-append "bytes " + ; If the start and end are wildcards, use only one + (if (not (or (car content-range) (cadr content-range))) + "*" + (string-append + (content-range-value-unparser (car content-range)) + "-" + (content-range-value-unparser (cadr content-range)))) + "/" + (content-range-value-unparser (caddr content-range))))) + ;; There's no need to make a specific header unparser for every header type. ;; Usually, the Scheme value representing a header can unambiguously be ;; unparsed into a header just by checking its type. @@ -772,6 +813,9 @@ (read-date . ,rfc1123-time->string)) value-unparser: unparser))))) +(define (content-range-unparser header-contents) + (list (unparse-content-range (get-value (car header-contents))))) + (define (etag-unparser header-contents) (list (unparse-etag (get-value (car header-contents))))) diff --git a/intarweb.scm b/intarweb.scm index 78383d5..c40e1c4 100644 --- a/intarweb.scm +++ b/intarweb.scm @@ -81,14 +81,14 @@ rfc1123-subparser rfc850-subparser asctime-subparser http-date-subparser product-subparser quality-subparser unknown-header-parser filename-subparser symbol-subparser symbol-subparser-ci natnum-subparser - host/port-subparser base64-subparser range-subparser filename-subparser - etag-parser software-parser mailbox-subparser + host/port-subparser base64-subparser range-subparser content-range-subparser + filename-subparser etag-parser software-parser mailbox-subparser if-range-parser retry-after-subparser via-parser warning-parser key/value-subparser set-cookie-parser cache-control-parser pragma-parser te-parser cookie-parser strict-transport-security-parser must-be-quoted-chars quote-string unparse-token - default-header-unparser etag-unparser host/port-unparser + content-range-unparser default-header-unparser etag-unparser host/port-unparser product-unparser software-unparser rfc1123-unparser cookie-unparser strict-transport-security-unparser @@ -436,7 +436,7 @@ (content-length . ,(single natnum-subparser)) (content-location . ,(single normalized-uri)) (content-md5 . ,(single base64-subparser)) - (content-range . ,(single range-subparser)) + (content-range . ,(single content-range-subparser)) (content-type . ,(single symbol-subparser-ci `((charset . ,symbol-subparser-ci)))) (date . ,(single http-date-subparser)) @@ -670,6 +670,7 @@ (define header-unparsers (make-parameter `((content-disposition . ,content-disposition-unparser) + (content-range . ,content-range-unparser) (date . ,rfc1123-unparser) (etag . ,etag-unparser) (expires . ,rfc1123-unparser) diff --git a/tests/run.scm b/tests/run.scm index d97f15d..95a3990 100644 --- a/tests/run.scm +++ b/tests/run.scm @@ -300,10 +300,34 @@ (header-value 'content-md5 headers)))) (test-group "range-parser" - (let ((headers (test-read-headers "content-range: bytes 500-999/1234"))) - (test "Simple range" - '(500 999 1234) - (header-value 'content-range headers)))) + (let ((headers (test-read-headers "range: bytes=500-999"))) + (test "Simple range full" + '(500 999) + (header-value 'range headers))) + (let ((headers (test-read-headers "range: bytes=500-"))) + (test "Simple range start" + '(500 #f) + (header-value 'range headers))) + (let ((headers (test-read-headers "range: bytes=-999"))) + (test "Simple range end" + '(#f 999) + (header-value 'range headers))) + (let ((headers (test-read-headers "range: bytes 500-999/1234"))) + (test "Content-range failure" + #f + (header-value 'range headers))) + (let ((headers (test-read-headers "range: bytes=-"))) + (test "Numberless range failure" + #f + (header-value 'range headers))) + (let ((headers (test-read-headers "range: bytes 500-999"))) + (test "Space failure" + #f + (header-value 'range headers))) + (let ((headers (test-read-headers "range: bits=500-999"))) + (test "Wrong units failure" + #f + (header-value 'range headers)))) (test-group "content-disposition" (let ((headers (test-read-headers "Content-Disposition: attachment; filename=dir/foo.jpg"))) @@ -645,6 +669,16 @@ (test-error* "Embedded newlines in raw headers also throw an error" (exn http unencoded-header) (test-unparse-headers `((foo #("bar\n\x01qux" raw)))))) + (test-group "content-range unparser" + (test "Full content-range" + "Content-Range: bytes 500-999/1234\r\n" + (test-unparse-headers `((content-range (500 999 1234))))) + (test "Wildcard size content-range" + "Content-Range: bytes 500-999/*\r\n" + (test-unparse-headers `((content-range (500 999 #f))))) + (test "Wildcard range content-range" + "Content-Range: bytes */1234\r\n" + (test-unparse-headers `((content-range (#f #f 1234)))))) (test-group "etag unparser" (test "Weak tag" "Etag: W/\"blah\"\r\n" -- cgit v1.2.3