(module henrietta-cache ()

(import scheme)

(cond-expand
  (chicken-4 (import chicken)
             (use utils files extras posix data-structures ports
                  http-client matchable uri-common srfi-1 irregex
                  setup-api)
             (define read-list read-file))
  (chicken-5 (import (chicken base) (chicken condition) (chicken file)
                     (chicken format) (chicken string) (chicken port)
                     (chicken process) (chicken process-context)
                     (chicken pathname) (chicken io) (chicken sort)
                     (chicken irregex)
                     http-client matchable uri-common srfi-1)))

(define (usage code)
  (print #<#EOF
usage: henrietta-cache [OPTION ...]

  -h   -help                    show this message
  -c   -cache-dir CACHEDIR      put egg cache in this dir, defaults to "cache"
  -e   -egg-list  EGGLIST       file containing the master list of available
                                eggs, defaults to "egg-locations". Can be an URI
  -i   -init-file INITFILE      a file to load before starting the process
  -r   -chicken-release REL     the major CHICKEN release version for which to fetch (defaults to 4)
  -k   -keep NUM                only keep this many latest releases, delete the rest
  -n   -no-download             don't download any releases (useful for purging in combination with -k)

Henrietta-cache will download cached copies of each egg listed in the file
EGGLIST, to CACHEDIR.

EOF
)
  (exit code));|

(define *cache-directory* "cache")
(define *egg-list* "egg-locations")
(define *chicken-release* 4)
(define *init-file* #f)
(define *keep-number* #f)
(define *no-download* #f)

(define download-release-success-hook
  (make-parameter #f))

(define download-release-error-hook
  (make-parameter #f))

(define read-release-info-file-error-hook
  (make-parameter #f))

(define hooks
  `((download-release-success . ,download-release-success-hook)
    (download-release-error   . ,download-release-error-hook)
    (read-release-info-file-error . ,read-release-info-file-error-hook)))

(define (version<=? v1 v2)
  (define (version->list v)
    (map (lambda (x) (or (string->number x) x))
         (irregex-split "[-\\._]" (->string v))))
  (let loop ((p1 (version->list v1))
             (p2 (version->list v2)))
    (cond ((null? p1) (null? p2))
          ((null? p2))
          ((number? (car p1))
           (and (number? (car p2))
                (or (< (car p1) (car p2))
                    (and (= (car p1) (car p2))
                         (loop (cdr p1) (cdr p2))))))
          ((number? (car p2)))
          ((string<? (car p1) (car p2)))
          (else
           (and (string=? (car p1) (car p2))
                (loop (cdr p1) (cdr p2)))))))

(define (run-hook hook-name . args)
  (and-let* ((hook (or (alist-ref hook-name hooks)
                       (error "invalid hook" hook-name)))
             (hook (hook)))
    (condition-case
        (apply hook args)
      (exn ()
           (print-error-message exn
                                (current-error-port)
                                (sprintf "Error running hook `~A'" hook-name))
           (flush-output (current-error-port))))))

;; This works on raw URI strings, not URI objects (for now?)
(define (replace-uri-patterns uri patterns)
  (string-translate* uri (map (lambda (pattern)
                                (cons (conc "{" (car pattern) "}")
                                      (uri-encode-string (->string (cdr pattern)))))
                              patterns)))

(define (call-with-output-pipe* cmd proc)
  (let ([p (open-output-pipe cmd)])
    (proc p)
    (unless (zero? (close-output-pipe p))
      (error "Got an error while executing command " cmd))))

(define (pipe-from-http uri cmd)
  (condition-case
      (call-with-input-request
       uri #f (lambda (i) (call-with-output-pipe*
                           cmd
                           (lambda (o) (copy-port i o)))))
    (e (exn http)
       (error (sprintf "Could not download ~A -- ~A"
                       uri (get-condition-property e 'exn 'message))))))

;; this moves via copy delete as it produces more consistent results
;; (i.e. like new file) in particular bsd group ownership is more natural
(define (move-to-cache tmp-dir cache-dir)
  (create-directory cache-dir)
  (for-each
    (lambda (file)
      (let
        ((tmp-file (make-pathname tmp-dir file))
          (cache-file (make-pathname cache-dir file)))
        (if (directory-exists? tmp-file)
          (move-to-cache tmp-file cache-file)
          (begin
            (copy-file tmp-file cache-file)
            (delete-file tmp-file) ))))
    (directory tmp-dir) )
  (delete-directory tmp-dir #t) )	;nb: don't like recursive but top level has hidden contents and don't want those

(define (download-files-from-list base-uri files cache-dir)
  (let ((tmp-dir (create-temporary-directory))
        (add-to-uri
         (lambda (f)
           (let* ((components (string-split f "/"))
                  (rel (update-uri (uri-reference "") path: components)))
             (uri-relative-to rel base-uri)))))
    (handle-exceptions exn
        (begin (system (sprintf "rm -rf ~A" (qs tmp-dir)))
               (signal exn))
      (for-each
       (lambda (file)
         (printf "\t\t~A...\n" file)
         (flush-output)
         (and-let* ((dirname (pathname-directory file))
                    (directory (make-pathname tmp-dir dirname)))
           (unless (file-exists? directory)
             (create-directory directory #t)))
         (condition-case
             (call-with-input-request
              (add-to-uri file) #f
              (lambda (i)
                (call-with-output-file
                    (make-pathname tmp-dir file)
                  (lambda (o) (copy-port i o)))))
           (e (exn http)
              (error (sprintf
                         (conc "Could not download file \"~A\", "
                               "listed in meta-file/files-list (full URI: ~A) -- ~A")
                       file (uri->string (add-to-uri file))
                       (get-condition-property e 'exn 'message))))))
        files)
      (create-directory cache-dir #t)
      (move-to-cache tmp-dir cache-dir) )))

;; Make-cmd is a lambda which accepts the temporary file- and dirname
;; and returns a suitable command to execute using SYSTEM
(define (download-and-extract type uri cache-dir make-cmd)
  (let ((tmp-dir (create-temporary-directory))
        (tmp-file (create-temporary-file)))
    (handle-exceptions exn
        (begin
          (delete-file tmp-file)
          (system (sprintf "rm -rf ~A" (qs tmp-dir)))
          (signal exn))
      (condition-case
          (call-with-input-request
           uri #f (lambda (i) (call-with-output-file tmp-file
                                (lambda (o) (copy-port i o)))))
        (e (exn http)
           (error (sprintf "Could not fetch ~A-file ~A -- ~A"
                    type uri (get-condition-property e 'exn 'message)))))
      (let* ((cmd (make-cmd tmp-file tmp-dir))
             (status (system cmd)))
        (unless (zero? status)
          (error "Got an error executing command" cmd)))
      (create-directory cache-dir #t)
      ;; Some people extract to the current directory, some include the
      ;; directory
      (let* ((contents (directory tmp-dir))
             (contents-dir (match contents
                             ;; Silly pax archives contain this bogus entry.
                             ;; Some pax-aware tar(1)s skip it, others don't.
                             ((or (dir "pax_global_header")
                                  ("pax_global_header" dir)
                                  (dir))
                              (make-pathname tmp-dir dir))
                             (else tmp-dir))))
        (move-to-cache contents-dir cache-dir)
        (unless (eq? contents-dir tmp-dir)
          (delete-directory tmp-dir #t)) ; remove parent if needed
        (delete-file tmp-file) ))))

(define (download-release distribution-file-type uri cache-dir)
  (case distribution-file-type
    ((targz tarbz2 zip)
     (download-and-extract
      distribution-file-type uri cache-dir
      (lambda (archive dir)
        ;; Instead of messing about with tar, gunzip, bunzip2, unzip
        ;; etc, we should use libarchive.
        (case distribution-file-type
          ((targz) (sprintf "(cd ~A && gunzip -c ~A | tar xf -)" (qs dir) (qs archive)))
          ((tarbz2) (sprintf "(cd ~A && bunzip2 -c ~A | tar xf -)" (qs dir) (qs archive)))
          ((zip) (sprintf "unzip -d ~A -o -qq ~A" (qs dir) (qs archive)))
          (else (error (sprintf "Unknown archive type `~S' (shouldn't happen!)"
                         distribution-file-type)))))))
    ;; CHICKEN 5 egg description format: .egg
    ((egg-file)
     (condition-case
         (let* ((meta (car (call-with-input-request uri #f read-list)))
                (files (alist-ref 'distribution-files meta)))
           (unless files
             (error "No \"distribution-files\" entry found in egg file" uri))
           (download-files-from-list (uri-reference uri) files cache-dir))
       (e (exn http)
          (error (sprintf "Could not download meta file \"~A\" -- ~A\n"
                          uri (get-condition-property e 'exn 'message))))))
    ;; CHICKEN 4 egg meta information format: .meta
    ((meta-file)
     (condition-case
         (let* ((meta (car (call-with-input-request uri #f read-list)))
                (files (alist-ref 'files meta)))
           (unless files
             (error "No \"files\" entry found in meta file" uri))
           (download-files-from-list (uri-reference uri) files cache-dir))
       (e (exn http)
          (error (sprintf "Could not download meta file \"~A\" -- ~A\n"
                          uri (get-condition-property e 'exn 'message))))))
    ((files-list)
     (condition-case
         (let ((lines (call-with-input-request uri #f read-lines)))
           (when (null? lines)
             (error "Empty files-list file" uri))
	   (let* ((original-uri (uri-reference (car lines)))
                  (path (uri-path original-uri))
                  ;; Ensure base URI is seen as a directory so relative refs
                  ;; are always appended
                  (base-uri (update-uri original-uri
                                        path: (if (string=? "" (last path))
                                                  path
                                                  `(,@path ""))))
                  ;; This works around a strange thing (another one, sigh)
                  ;; that apache-served stuff causes http-client to see an
                  ;; extra \r after the end.  Look into this!
		  (files (delete "" (cdr lines))))
             (download-files-from-list base-uri files cache-dir)))
       (e (exn http)
          (error (sprintf "Could not download files-list \"~A\" -- ~A\n"
                          uri (get-condition-property e 'exn 'message))))))
    (else (error "Unknown distribution file type" distribution-file-type))))

(define (download-all-release-files egg-name uris/releases uris)
  (let* ((egg-cache-dir (make-pathname *cache-directory* (->string egg-name)))
         (all-versions-alist
          (sort
           (append-map
            (lambda (uri/releases)
              (or (and-let* ((uri-alias (car uri/releases))
                             (uri-info (alist-ref uri-alias uris))
                             (type (car uri-info)))
                    (map
                     (lambda (egg-release)
                       (list egg-release type uri-alias))
                     (cdr uri/releases)))
                  '()))
            uris/releases)
           (lambda (a b) (version<=? (car a) (car b)))))
         (num-versions (length all-versions-alist)))
    (unless *no-download*
     (for-each
      (lambda (egg)
        (let* ((egg-release (car egg))
               (type (cadr egg))
               (uri-alias (caddr egg))
               (uri-template (cadr (alist-ref uri-alias uris)))
               (cache-dir (make-pathname egg-cache-dir egg-release)))
          (unless (file-exists? cache-dir)
            (let* ((patterns `((egg-name . ,egg-name)
                               (egg-release . ,egg-release)
                               (chicken-release . ,*chicken-release*)))
                   (uri (replace-uri-patterns uri-template patterns)))
              (printf "\tDownloading egg version ~A from ~A (CHICKEN release ~A)\n"
                egg-release uri *chicken-release*)
              (flush-output)
              (handle-exceptions exn
                  (begin
                    (system (sprintf "rm -rf ~A" (qs cache-dir)))
                    (fprintf (current-error-port)
                        "Error downloading or extracting egg '~A' release ~A (CHICKEN release ~A): "
                      egg-name egg-release *chicken-release*)
                    (print-error-message exn (current-error-port))
                    (flush-output (current-error-port))
                    (run-hook 'download-release-error
                              *chicken-release*
                              egg-name
                              egg-release
                              (get-condition-property exn 'exn 'message)))
                (download-release type uri cache-dir)
                (run-hook 'download-release-success
                          *chicken-release*
                          egg-name egg-release))))))
      ;; TODO: Maybe avoid hitting the network at all when -n is
      ;; supplied?  That would require us to sort the files on disk
      ;; too, which is a bit annoying.
      (if *keep-number*
          (take-right all-versions-alist
                      (if (< num-versions *keep-number*)
                          num-versions
                          *keep-number*))
          all-versions-alist)))
    (when (and *keep-number* (> num-versions *keep-number*))
      (for-each
       (lambda (egg)
         (let* ((egg-release (car egg))
                (cache-dir (make-pathname egg-cache-dir egg-release)))
           (when (file-exists? cache-dir)
             (printf "\tDeleting egg version ~A\n" egg-release)
             (delete-directory cache-dir #t))))
       (drop-right all-versions-alist *keep-number*)))))

(define (alist-add! key value alist)
  (alist-update! key (cons value (alist-ref key alist eq? '())) alist))

(define (read-release-info-file uri egg-name)
  (handle-exceptions exn
    (let ((message (get-condition-property exn 'exn 'message)))
      (fprintf (current-error-port)
               "Could not fetch release-info file for egg ~A (CHICKEN release ~A) from ~A -- ~A\n"
               egg-name *chicken-release* uri message)
      (run-hook 'read-release-info-file-error
                *chicken-release* egg-name uri message)
      (flush-output (current-error-port))
      '())
    (with-input-from-request uri #f read-list)))

(define (update-egg-cache)
  (when *init-file*
    (handle-exceptions exn
      (begin
        (fprintf (current-error-port) "Error loading init file ~A:\n" *init-file*)
        (print-error-message exn (current-error-port)))
      (load *init-file*)))

  (for-each
   (lambda (egg)
     (let* ((egg-name (car egg))
            (egg-uri-template  (cadr egg))
            (patterns `((egg-name . ,egg-name)
                        (chicken-release . ,*chicken-release*)))
            (uri (replace-uri-patterns egg-uri-template patterns)))
       (printf "Caching egg '~A'\n" egg-name)
       (flush-output)
       (handle-exceptions exn
         (begin (fprintf (current-error-port) "----\n")
                (fprintf (current-error-port) "Error downloading egg ~A\n" egg-name)
                (print-error-message exn (current-error-port))
                (fprintf (current-error-port) "----\n")
                (flush-output (current-error-port)))
         (let collect-releases ((info (read-release-info-file uri egg-name))
                                (uris/releases '())
                                (uris '()))
           (if (null? info)
               (download-all-release-files egg-name uris/releases uris)
               ;; There must be a simpler way to encode optional values
               (match (car info)
                 (('uri type uri)       ; The "default" URI
                  (collect-releases (cdr info) uris/releases
                                    (alist-update! 'default (list type uri) uris)))
                 (('uri type uri alias)
                  (collect-releases (cdr info) uris/releases
                                    (alist-update! alias (list type uri) uris)))
                 (('release version)    ; For the "default" URI
                  (collect-releases (cdr info)
                                    (alist-add! 'default version uris/releases)
                                    uris))
                 (('release version alias)
                  (collect-releases (cdr info)
                                    (alist-add! alias version uris/releases)
                                    uris))
                 (else (collect-releases (cdr info) uris/releases uris))))))))
   (let ((uri (uri-reference *egg-list*)))
     (if (absolute-uri? uri)            ; Assume this is a http reference then
         (call-with-input-request uri #f read-list)
         (call-with-input-file *egg-list* read-list)))))

(define *short-options* '(#\h #\c #\e #\n #\k))

(define (main args)
  (let loop ((args args))
    (if (null? args)
        (update-egg-cache)
        (let ((arg (car args)))
          (cond ((or (string=? arg "-help")
                     (string=? arg "-h")
                     (string=? arg "--help"))
                 (usage 0))
                ((or (string=? arg "-c") (string=? arg "-cache-dir"))
                 (unless (pair? (cdr args)) (usage 1))
                 (set! *cache-directory* (cadr args))
                 (loop (cddr args)))
                ((or (string=? arg "-e") (string=? arg "-egg-list"))
                 (unless (pair? (cdr args)) (usage 1))
                 (set! *egg-list* (cadr args))
                 (loop (cddr args)))
                ((or (string=? arg "-i") (string=? arg "-init-file"))
                 (unless (pair? (cdr args)) (usage 1))
                 (set! *init-file* (cadr args))
                 (loop (cddr args)))
                ((or (string=? arg "-r") (string=? arg "-chicken-release"))
                 (unless (pair? (cdr args)) (usage 1))
                 (handle-exceptions exn (usage 1)
                   (set! *chicken-release* (string->number (cadr args))))
                 (loop (cddr args)))
                ((or (string=? arg "-k") (string=? arg "-keep"))
                 (unless (pair? (cdr args)) (usage 1))
                 (handle-exceptions exn (usage 1)
                   (set! *keep-number* (string->number (cadr args))))
                 (loop (cddr args)))
                ((or (string=? arg "-n") (string=? arg "-no-download"))
                 (set! *no-download* #t)
                 (loop (cdr args)))
                ((and (positive? (string-length arg))
                      (char=? #\- (string-ref arg 0)))
                 (if (> (string-length arg) 2)
                     (let ((sos (string->list (substring arg 1))))
                       (if (null? (lset-difference eq? sos *short-options*))
                           (loop (append (map (cut string #\- <>) sos) (cdr args)))
                           (usage 1)))
                     (usage 1)))
                (else (loop (cdr args))))))))

(main (command-line-arguments))
)