;; ;; ugarit-backend-s3 - An Amazon S3 storage backend for Ugarit ;; ;; All code in this egg is hereby placed in the Public Domain ;; (module ugarit-backend-s3 () (import chicken scheme) (use ugarit-backend) (use (prefix amazon-s3 s3:)) (use intarweb uri-common) (use (only posix current-process-id)) (use extras) (use srfi-4) (use matchable) (use miscmacros) ;; This is supposed to be "unique" enough to distinguish a Ugarit ;; session running on a given computer at a point in time. This ;; should be enough in most cases. It might be better to use a uuid ;; but that's another dependency we don't really need, and we're not ;; really locking properly anyway. We could store the hostname in it, ;; but that's identifiable information we do not really want to store. (define (make-lock-id) (sprintf "~A@~A#~A" (current-milliseconds) (current-process-id) (random most-positive-fixnum))) ;; Because amazon-s3 uses global parameters and (in theory at least), ;; one can have multiple s3 backends with different configs, we ;; parameterize for every request. This macro helps to do that. (define-syntax define-s3-command (ir-macro-transformer (lambda (e i c) (let ((command-name (strip-syntax (cadr e)))) `(define ,(i command-name) (lambda args (parameterize ((s3:access-key ,(i '*access-key*)) (s3:secret-key ,(i '*secret-key*)) (s3:make-base-uri ,(i '*make-base-uri*))) (apply ,(symbol-append '|s3:| command-name) ,(i '*bucket*) args)))))))) ;; Ignore 404s, returning #f if the object doesn't exist (define-syntax maybe-missing (syntax-rules () ((_ ?expr) (condition-case ?expr (e (exn client-error) (let ((response ((condition-property-accessor 'client-error 'response) e))) (if (and response (= (response-code response) 404)) #f (signal e)))))))) (define (backend-s3 config-file) (let* ((*base-host* #f) (*bucket* #f) (*access-key* #f) (*secret-key* #f) (*s3-hostname* "s3.amazonaws.com") (*use-path* #f) (*make-base-uri* (lambda (bucket) (if bucket (if *use-path* (make-uri scheme: 'https host: *s3-hostname* path: `(/ ,bucket "")) (make-uri scheme: 'https host: (sprintf "~A.~A" bucket *s3-hostname*))) (make-uri scheme: 'https host: *s3-hostname*))))) (define (parse-config-file!) (for-each (lambda (confentry) (match confentry (('bucket name) (set! *bucket* name)) (('access-key key) (set! *access-key* key)) (('secret-key key) (set! *secret-key* key)) (('base-hostname host) (set! *s3-hostname* host)) ('use-bucket-path (set! *use-path* #t)) (_ (signal (make-property-condition 'exn 'location 'backend-s3 'message "Unknown configuration entry" 'arguments (list confentry)))))) (with-input-from-file config-file read-file))) (define-s3-command bucket-exists?) (define-s3-command create-bucket!) (define-s3-command object-exists?) (define-s3-command list-objects) (define-s3-command put-object!) (define-s3-command delete-object!) (define-s3-command get-object) (define-s3-command put-sexp!) (define-s3-command get-sexp) (define (maybe-delete-object! name) (maybe-missing (delete-object! name))) (define (put-u8vector! key vec) (put-object! key (lambda () (write-u8vector vec)) (u8vector-length vec) "application/octet-stream")) (define (get-u8vector key) (blob->u8vector/shared (string->blob (get-object key)))) (define (make-name key extension) (string-append "data/" key extension)) (define (make-tag-name tag) (string-append "tag/" tag)) (define (make-tag-lock-name tag) (string-append "lock/" tag)) (parse-config-file!) ;; Perform some sanity checks (unless *bucket* (error "Please choose a (bucket ...) in the config file")) (unless *access-key* (error "Please choose an (access-key ...) in the config file")) (unless *secret-key* (error "Please choose a (secret-key ...) in the config file")) (unless (bucket-exists?) (error "The archive bucket does not exist; try \"admin init\" first" *bucket*)) ;; This should be essentially infinite, but not all S3 API ;; implementations support range PUT requests, and even when ;; supported, it makes atomicity a real problem. Besides, ;; performance is better when this is not too small and not ;; too large (but it's also somewhat data set-dependent). (define block-size (* 1024 1024)) (make-storage block-size #t ; We are writable #t ; We support unlink! (lambda (key data type) ; put! (if (object-exists? (make-name key ".type")) (signal (make-property-condition 'exn 'message "Duplicate block: put! should not be called on an existing hash" 'arguments (list key type))) (begin ;; NOTE: PUT request are atomic according to ;; http://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectPUT.html ;; "Amazon S3 never adds partial objects; if you receive ;; a success response, Amazon S3 added the entire object ;; to the bucket." (put-u8vector! (make-name key ".data") data) (handle-exceptions exn (begin (maybe-delete-object! (make-name key ".data")) (signal exn)) (put-sexp! (make-name key ".type") type)) (handle-exceptions exn (begin (maybe-delete-object! (make-name key ".data")) (maybe-delete-object! (make-name key ".type")) (signal exn)) (put-sexp! (make-name key ".refcount") 1)) (void)))) (lambda () (void)) ; flush! - a no-op for us (lambda (key) ; exists? (and (object-exists? (make-name key ".data")) (object-exists? (make-name key ".type")) (get-sexp (make-name key ".type")))) (lambda (key) ; get (maybe-missing (get-u8vector (make-name key ".data")))) (lambda (key) ; link! (let ((current-refcount (get-sexp (make-name key ".refcount")))) (put-sexp! (make-name key ".refcount") (+ current-refcount 1)) (void))) (lambda (key) ; unlink! (let* ((current-refcount (get-sexp (make-name key ".refcount"))) (new-refcount (- current-refcount 1))) (if (zero? new-refcount) (let ((data (get-u8vector (make-name key ".data")))) (begin (maybe-delete-object! (make-name key ".data")) (maybe-delete-object! (make-name key ".type")) (maybe-delete-object! (make-name key ".refcount")) data)) ; returned in case of deletion (begin (put-sexp! (make-name key ".refcount") new-refcount) #f)))) (lambda (tag key) ; set-tag! (put-sexp! (make-tag-name tag) key)) (lambda (tag) ; tag (let ((key (maybe-missing (get-sexp (make-tag-name tag))))) (if (eof-object? key) #f ; Treat empty file as no tag key))) (lambda () ; all-tags (map (lambda (s) (substring s 4)) (list-objects prefix: "tag/"))) (lambda (tag) ; remove-tag! (maybe-delete-object! (make-tag-name tag)) (maybe-delete-object! (make-tag-lock-name tag)) (void)) (lambda (tag) ; lock-tag! ;; TODO: S3 does not support locking, so we approximate it as ;; best we can. Unfortunately, S3's "eventually consistency" ;; bullshit blabla model means this is even less reliable than ;; it should be. This scheme isn't fool-proof even if it did ;; offer better guarantuees. (and-let* ((lock (make-tag-lock-name tag)) ((not (object-exists? lock))) (in (make-lock-id)) (out (begin (put-sexp! (make-tag-lock-name tag) in) (maybe-missing (get-sexp lock))))) (equal? in out))) (lambda (tag) ; tag-locked? ;; This doesn't care *who* locked it, just *that* it's locked. (object-exists? (make-tag-lock-name tag))) (lambda (tag) ; unlock-tag! (maybe-delete-object! (make-tag-lock-name tag)) (void)) (lambda (command) ; admin! (match command (('info) (list (cons 'backend "s3") (cons 's3-config config-file) (cons 'block-size block-size) (cons 'writable? #t) (cons 'unlinkable? #t))) (('init) (unless (bucket-exists?) (create-bucket!))) (('help) (list (cons 'info "Return information about the archive") (cons 'help "List available admin commands") (cons 'init "Initialize bucket"))) (else (error "Unknown admin command")))) (lambda () ; close! (void))))) (define backend (match (command-line-arguments) ((s3-config-file) (lambda () (backend-s3 s3-config-file))) (else (export-storage-error! "Invalid arguments to backend-s3") (printf "USAGE:\nbackend-s3 \n") #f))) (if backend (export-storage! backend)) )