;(module henrietta-cache () ;; NOTE: Module wrapper should not be activated due to hooks (import scheme) (cond-expand (chicken-4 (import chicken) (use utils files extras posix data-structures ports http-client matchable uri-common srfi-1 irregex setup-api) (define read-list read-file)) (chicken-5 (import (chicken base) (chicken condition) (chicken file) (chicken format) (chicken string) (chicken port) (chicken process) (chicken process-context) (chicken pathname) (chicken io) (chicken sort) (chicken irregex) (chicken file posix) http-client matchable uri-common srfi-1))) (define (usage code) (print #<#EOF usage: henrietta-cache [OPTION ...] -h -help show this message -c -cache-dir CACHEDIR put egg cache in this dir, defaults to "cache" -e -egg-list EGGLIST file containing the master list of available eggs, defaults to "egg-locations". Can be an URI -i -init-file INITFILE a file to load before starting the process -r -chicken-release REL the major CHICKEN release version for which to fetch (defaults to 5) -k -keep NUM only keep this many latest releases, delete the rest -n -no-download don't download any releases (useful for purging in combination with -k) Henrietta-cache will download cached copies of each egg listed in the file EGGLIST, to CACHEDIR. EOF ) (exit code));| (define *cache-directory* "cache") (define *egg-list* "egg-locations") (define *chicken-release* 5) (define *init-file* #f) (define *keep-number* #f) (define *no-download* #f) (define download-release-success-hook (make-parameter #f)) (define download-release-error-hook (make-parameter #f)) (define read-release-info-file-error-hook (make-parameter #f)) (define hooks `((download-release-success . ,download-release-success-hook) (download-release-error . ,download-release-error-hook) (read-release-info-file-error . ,read-release-info-file-error-hook))) (define (version<=? v1 v2) (define (version->list v) (map (lambda (x) (or (string->number x) x)) (irregex-split "[-\\._]" (->string v)))) (let loop ((p1 (version->list v1)) (p2 (version->list v2))) (cond ((null? p1) (null? p2)) ((null? p2)) ((number? (car p1)) (and (number? (car p2)) (or (< (car p1) (car p2)) (and (= (car p1) (car p2)) (loop (cdr p1) (cdr p2)))))) ((number? (car p2))) ((stringstring (cdr pattern))))) patterns))) (define (call-with-output-pipe* cmd proc) (let ([p (open-output-pipe cmd)]) (proc p) (unless (zero? (close-output-pipe p)) (error "Got an error while executing command " cmd)))) (define (pipe-from-http uri cmd) (condition-case (call-with-input-request uri #f (lambda (i) (call-with-output-pipe* cmd (lambda (o) (copy-port i o))))) (e (exn http) (error (sprintf "Could not download ~A -- ~A" uri (get-condition-property e 'exn 'message)))))) ;; this moves via copy delete as it produces more consistent results ;; (i.e. like new file) in particular bsd group ownership is more natural (define (move-to-cache tmp-dir cache-dir) (create-directory cache-dir) (for-each (lambda (file) (let ((tmp-file (make-pathname tmp-dir file)) (cache-file (make-pathname cache-dir file))) (if (directory-exists? tmp-file) (move-to-cache tmp-file cache-file) (begin (copy-file tmp-file cache-file) (delete-file tmp-file) )))) ;; Here we have to handle symlinks first. If we handle the ;; _target_ before the symlink, target will be deleted and ;; `copy-file' will break when trying to copy the symlink, as it ;; will be dangling. (sort (directory tmp-dir #t) (lambda (f _) (symbolic-link? (make-pathname tmp-dir f))))) (delete-directory tmp-dir #t) ) ;nb: don't like recursive but top level has hidden contents and don't want those (define (download-files-from-list base-uri files cache-dir) (let ((tmp-dir (create-temporary-directory)) (add-to-uri (lambda (f) (let* ((components (string-split f "/")) (rel (update-uri (uri-reference "") path: components))) (uri-relative-to rel base-uri))))) (handle-exceptions exn (begin (system (sprintf "rm -rf ~A" (qs tmp-dir))) (signal exn)) (for-each (lambda (file) (printf "\t\t~A...\n" file) (flush-output) (and-let* ((dirname (pathname-directory file)) (directory (make-pathname tmp-dir dirname))) (unless (file-exists? directory) (create-directory directory #t))) (condition-case (call-with-input-request (add-to-uri file) #f (lambda (i) (call-with-output-file (make-pathname tmp-dir file) (lambda (o) (copy-port i o))))) (e (exn http) (error (sprintf (conc "Could not download file \"~A\", " "listed in meta-file/files-list (full URI: ~A) -- ~A") file (uri->string (add-to-uri file)) (get-condition-property e 'exn 'message)))))) files) (create-directory cache-dir #t) (move-to-cache tmp-dir cache-dir) ))) ;; Make-cmd is a lambda which accepts the temporary file- and dirname ;; and returns a suitable command to execute using SYSTEM (define (download-and-extract type uri cache-dir make-cmd) (let ((tmp-dir (create-temporary-directory)) (tmp-file (create-temporary-file))) (handle-exceptions exn (begin (delete-file tmp-file) (system (sprintf "rm -rf ~A" (qs tmp-dir))) (signal exn)) (condition-case (call-with-input-request uri #f (lambda (i) (call-with-output-file tmp-file (lambda (o) (copy-port i o))))) (e (exn http) (error (sprintf "Could not fetch ~A-file ~A -- ~A" type uri (get-condition-property e 'exn 'message))))) (let* ((cmd (make-cmd tmp-file tmp-dir)) (status (system cmd))) (unless (zero? status) (error "Got an error executing command" cmd))) (create-directory cache-dir #t) ;; Some people extract to the current directory, some include the ;; directory (let* ((contents (directory tmp-dir)) (contents-dir (match contents ;; Silly pax archives contain this bogus entry. ;; Some pax-aware tar(1)s skip it, others don't. ((or (dir "pax_global_header") ("pax_global_header" dir) (dir)) (make-pathname tmp-dir dir)) (else tmp-dir)))) (move-to-cache contents-dir cache-dir) (unless (eq? contents-dir tmp-dir) (delete-directory tmp-dir #t)) ; remove parent if needed (delete-file tmp-file) )))) (define (download-release distribution-file-type uri cache-dir) (case distribution-file-type ((targz tarbz2 zip) (download-and-extract distribution-file-type uri cache-dir (lambda (archive dir) ;; Instead of messing about with tar, gunzip, bunzip2, unzip ;; etc, we should use libarchive. (case distribution-file-type ((targz) (sprintf "(cd ~A && gunzip -c ~A | tar xf -)" (qs dir) (qs archive))) ((tarbz2) (sprintf "(cd ~A && bunzip2 -c ~A | tar xf -)" (qs dir) (qs archive))) ((zip) (sprintf "unzip -d ~A -o -qq ~A" (qs dir) (qs archive))) (else (error (sprintf "Unknown archive type `~S' (shouldn't happen!)" distribution-file-type))))))) ;; CHICKEN 5 egg description format: .egg ((egg-file) (condition-case (let* ((meta (car (call-with-input-request uri #f read-list))) (files (alist-ref 'distribution-files meta))) (unless files (error "No \"distribution-files\" entry found in egg file" uri)) (download-files-from-list (uri-reference uri) files cache-dir)) (e (exn http) (error (sprintf "Could not download meta file \"~A\" -- ~A\n" uri (get-condition-property e 'exn 'message)))))) ;; CHICKEN 4 egg meta information format: .meta ((meta-file) (condition-case (let* ((meta (car (call-with-input-request uri #f read-list))) (files (alist-ref 'files meta))) (unless files (error "No \"files\" entry found in meta file" uri)) (download-files-from-list (uri-reference uri) files cache-dir)) (e (exn http) (error (sprintf "Could not download meta file \"~A\" -- ~A\n" uri (get-condition-property e 'exn 'message)))))) ((files-list) (condition-case (let ((lines (call-with-input-request uri #f read-lines))) (when (null? lines) (error "Empty files-list file" uri)) (let* ((original-uri (uri-reference (car lines))) (path (uri-path original-uri)) ;; Ensure base URI is seen as a directory so relative refs ;; are always appended (base-uri (update-uri original-uri path: (if (string=? "" (last path)) path `(,@path "")))) ;; This works around a strange thing (another one, sigh) ;; that apache-served stuff causes http-client to see an ;; extra \r after the end. Look into this! (files (delete "" (cdr lines)))) (download-files-from-list base-uri files cache-dir))) (e (exn http) (error (sprintf "Could not download files-list \"~A\" -- ~A\n" uri (get-condition-property e 'exn 'message)))))) (else (error "Unknown distribution file type" distribution-file-type)))) (define (download-all-release-files egg-name uris/releases uris) (let* ((egg-cache-dir (make-pathname *cache-directory* (->string egg-name))) (all-versions-alist (sort (append-map (lambda (uri/releases) (or (and-let* ((uri-alias (car uri/releases)) (uri-info (alist-ref uri-alias uris)) (type (car uri-info))) (map (lambda (egg-release) (list egg-release type uri-alias)) (cdr uri/releases))) '())) uris/releases) (lambda (a b) (version<=? (car a) (car b))))) (num-versions (length all-versions-alist))) (unless *no-download* (for-each (lambda (egg) (let* ((egg-release (car egg)) (type (cadr egg)) (uri-alias (caddr egg)) (uri-template (cadr (alist-ref uri-alias uris))) (cache-dir (make-pathname egg-cache-dir egg-release))) (unless (file-exists? cache-dir) (let* ((patterns `((egg-name . ,egg-name) (egg-release . ,egg-release) (chicken-release . ,*chicken-release*))) (uri (replace-uri-patterns uri-template patterns))) (printf "\tDownloading egg version ~A from ~A (CHICKEN release ~A)\n" egg-release uri *chicken-release*) (flush-output) (handle-exceptions exn (begin (system (sprintf "rm -rf ~A" (qs cache-dir))) (fprintf (current-error-port) "Error downloading or extracting egg '~A' release ~A (CHICKEN release ~A): " egg-name egg-release *chicken-release*) (print-error-message exn (current-error-port)) (flush-output (current-error-port)) (run-hook 'download-release-error *chicken-release* egg-name egg-release (get-condition-property exn 'exn 'message))) (download-release type uri cache-dir) (run-hook 'download-release-success *chicken-release* egg-name egg-release)))))) ;; TODO: Maybe avoid hitting the network at all when -n is ;; supplied? That would require us to sort the files on disk ;; too, which is a bit annoying. (if *keep-number* (take-right all-versions-alist (if (< num-versions *keep-number*) num-versions *keep-number*)) all-versions-alist))) (when (and *keep-number* (> num-versions *keep-number*)) (for-each (lambda (egg) (let* ((egg-release (car egg)) (cache-dir (make-pathname egg-cache-dir egg-release))) (when (file-exists? cache-dir) (printf "\tDeleting egg version ~A\n" egg-release) (delete-directory cache-dir #t)))) (drop-right all-versions-alist *keep-number*))))) (define (alist-add! key value alist) (alist-update! key (cons value (alist-ref key alist eq? '())) alist)) (define (read-release-info-file uri egg-name) (handle-exceptions exn (let ((message (get-condition-property exn 'exn 'message))) (fprintf (current-error-port) "Could not fetch release-info file for egg ~A (CHICKEN release ~A) from ~A -- ~A\n" egg-name *chicken-release* uri message) (run-hook 'read-release-info-file-error *chicken-release* egg-name uri message) (flush-output (current-error-port)) '()) (with-input-from-request uri #f read-list))) (define (update-egg-cache) (when *init-file* (handle-exceptions exn (begin (fprintf (current-error-port) "Error loading init file ~A:\n" *init-file*) (print-error-message exn (current-error-port))) (load *init-file*))) (for-each (lambda (egg) (let* ((egg-name (car egg)) (egg-uri-template (cadr egg)) (patterns `((egg-name . ,egg-name) (chicken-release . ,*chicken-release*))) (uri (replace-uri-patterns egg-uri-template patterns))) (printf "Caching egg '~A'\n" egg-name) (flush-output) (handle-exceptions exn (begin (fprintf (current-error-port) "----\n") (fprintf (current-error-port) "Error downloading egg ~A\n" egg-name) (print-error-message exn (current-error-port)) (fprintf (current-error-port) "----\n") (flush-output (current-error-port))) (let collect-releases ((info (read-release-info-file uri egg-name)) (uris/releases '()) (uris '())) (if (null? info) (download-all-release-files egg-name uris/releases uris) ;; There must be a simpler way to encode optional values (match (car info) (('uri type uri) ; The "default" URI (collect-releases (cdr info) uris/releases (alist-update! 'default (list type uri) uris))) (('uri type uri alias) (collect-releases (cdr info) uris/releases (alist-update! alias (list type uri) uris))) (('release version) ; For the "default" URI (collect-releases (cdr info) (alist-add! 'default version uris/releases) uris)) (('release version alias) (collect-releases (cdr info) (alist-add! alias version uris/releases) uris)) (else (collect-releases (cdr info) uris/releases uris)))))))) (let ((uri (uri-reference *egg-list*))) (if (absolute-uri? uri) ; Assume this is a http reference then (call-with-input-request uri #f read-list) (call-with-input-file *egg-list* read-list))))) (define *short-options* '(#\h #\c #\e #\n #\k)) (define (main args) (let loop ((args args)) (if (null? args) (update-egg-cache) (let ((arg (car args))) (cond ((or (string=? arg "-help") (string=? arg "-h") (string=? arg "--help")) (usage 0)) ((or (string=? arg "-c") (string=? arg "-cache-dir")) (unless (pair? (cdr args)) (usage 1)) (set! *cache-directory* (cadr args)) (loop (cddr args))) ((or (string=? arg "-e") (string=? arg "-egg-list")) (unless (pair? (cdr args)) (usage 1)) (set! *egg-list* (cadr args)) (loop (cddr args))) ((or (string=? arg "-i") (string=? arg "-init-file")) (unless (pair? (cdr args)) (usage 1)) (set! *init-file* (cadr args)) (loop (cddr args))) ((or (string=? arg "-r") (string=? arg "-chicken-release")) (unless (pair? (cdr args)) (usage 1)) (handle-exceptions exn (usage 1) (set! *chicken-release* (string->number (cadr args)))) (loop (cddr args))) ((or (string=? arg "-k") (string=? arg "-keep")) (unless (pair? (cdr args)) (usage 1)) (handle-exceptions exn (usage 1) (set! *keep-number* (string->number (cadr args)))) (loop (cddr args))) ((or (string=? arg "-n") (string=? arg "-no-download")) (set! *no-download* #t) (loop (cdr args))) ((and (positive? (string-length arg)) (char=? #\- (string-ref arg 0))) (if (> (string-length arg) 2) (let ((sos (string->list (substring arg 1)))) (if (null? (lset-difference eq? sos *short-options*)) (loop (append (map (cut string #\- <>) sos) (cdr args))) (usage 1))) (usage 1))) (else (loop (cdr args)))))))) (main (command-line-arguments)) ;)