From b9683af2da882d542208a15d67d8040bab8fa5c2 Mon Sep 17 00:00:00 2001 From: Billy Brown Date: Thu, 21 Dec 2023 17:07:00 +0000 Subject: Corrected Range and Content-Range header handling Corrected the handling of the Range request header and the Content- Range response header. They now have separate parsers, to match the standard, and the content-range unparser is now correctly named. The Range header can be in one of three forms: Range: bytes=- Range: bytes=- Range: bytes=- The Content-Range header can be in one of three forms: Content-Range: bytes -/ Content-Range: bytes -/* Content-Range: bytes */ The unit tests were updated to reflect the changes. Signed-off-by: Peter Bex --- header-parsers.scm | 60 ++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 52 insertions(+), 8 deletions(-) (limited to 'header-parsers.scm') diff --git a/header-parsers.scm b/header-parsers.scm index 89edeae..ac41a09 100644 --- a/header-parsers.scm +++ b/header-parsers.scm @@ -376,18 +376,42 @@ (cons (substring/shared contents 0 idx) version) (cons contents #f)))) -;; bytes -/ +;; Try getting a submatch from an irregex match and parse it, or return #f +(define (try-parse-submatch match group-name parser) + (and-let* ((value (irregex-match-substring match group-name))) + (parser value))) + +;; bytes=- +;; bytes=- +;; bytes=- (define range-subparser (let ((range-regex + (irregex '(seq "bytes=" + (=> start (* digit)) "-" (=> end (* digit)))))) + (lambda (s) + (and-let* ((m (irregex-match range-regex s))) + (let ((start (try-parse-submatch m 'start string->number)) + (end (try-parse-submatch m 'end string->number))) + (if (or start end) + (list start end) + #f)))))) + +;; bytes -/ +;; bytes -/* +;; bytes */ +(define content-range-subparser + (let ((content-range-regex (irregex '(seq "bytes" (+ space) - (=> start (+ digit)) "-" (=> end (+ digit)) - "/" (=> total (+ digit)))))) + (or (seq "*/" (=> total (+ digit))) + (seq (=> start (+ digit)) "-" (=> end (+ digit)) + "/" (or "*" + (=> total (+ digit))))))))) (lambda (s) - (and-let* ((m (irregex-match range-regex s)) - (start (string->number (irregex-match-substring m 'start))) - (end (string->number (irregex-match-substring m 'end))) - (total (string->number (irregex-match-substring m 'total)))) - (list start end total))))) + (and-let* ((m (irregex-match content-range-regex s))) + (let ((start (try-parse-submatch m 'start string->number)) + (end (try-parse-submatch m 'end string->number)) + (total (try-parse-submatch m 'total string->number))) + (list start end total)))))) ;; Accept *just* a filename, not a full path (simply strips directories) ;; This matches the content-disposition recommendation in RFC2616, 19.5.1: @@ -724,6 +748,23 @@ (if (eq? 'weak (car etag)) "W/" "") (quote-string (cdr etag)))) ;; Etags are _always_ quoted +(define (unparse-content-range content-range) + ; False values are used to represent wildcards "*" + (let ((content-range-value-unparser (lambda (value) + (if value + (number->string value) + "*")))) + (string-append "bytes " + ; If the start and end are wildcards, use only one + (if (not (or (car content-range) (cadr content-range))) + "*" + (string-append + (content-range-value-unparser (car content-range)) + "-" + (content-range-value-unparser (cadr content-range)))) + "/" + (content-range-value-unparser (caddr content-range))))) + ;; There's no need to make a specific header unparser for every header type. ;; Usually, the Scheme value representing a header can unambiguously be ;; unparsed into a header just by checking its type. @@ -772,6 +813,9 @@ (read-date . ,rfc1123-time->string)) value-unparser: unparser))))) +(define (content-range-unparser header-contents) + (list (unparse-content-range (get-value (car header-contents))))) + (define (etag-unparser header-contents) (list (unparse-etag (get-value (car header-contents))))) -- cgit v1.2.3