From 3d620f2fd2bfc1c5cd4967ec022a5b0a7195e52d Mon Sep 17 00:00:00 2001 From: Peter Bex Date: Mon, 1 Jan 2018 16:14:33 +0100 Subject: Add initial version of Amazon S3 backend for Ugarit backup system --- ugarit-backend-s3.meta | 10 ++ ugarit-backend-s3.scm | 249 ++++++++++++++++++++++++++++++++++++++++++++++++ ugarit-backend-s3.setup | 4 + 3 files changed, 263 insertions(+) create mode 100644 ugarit-backend-s3.meta create mode 100644 ugarit-backend-s3.scm create mode 100644 ugarit-backend-s3.setup diff --git a/ugarit-backend-s3.meta b/ugarit-backend-s3.meta new file mode 100644 index 0000000..88b88a4 --- /dev/null +++ b/ugarit-backend-s3.meta @@ -0,0 +1,10 @@ +;;; ugarit-backend-s3.meta -*- scheme -*- + +((egg "ugarit-backend-s3.egg") + (synopsis "An Amazon S3 storage backend for Ugarit") + (author "Peter Bex") + (maintainer "Peter Bex") + (category net) + (license "Public domain") + (doc-from-wiki) + (depends ugarit amazon-s3)) diff --git a/ugarit-backend-s3.scm b/ugarit-backend-s3.scm new file mode 100644 index 0000000..4708418 --- /dev/null +++ b/ugarit-backend-s3.scm @@ -0,0 +1,249 @@ +;; +;; ugarit-backend-s3 - An Amazon S3 storage backend for Ugarit +;; +;; All code in this egg is hereby placed in the Public Domain +;; +(module ugarit-backend-s3 () + +(import chicken scheme) +(use ugarit-backend) +(use (prefix amazon-s3 s3:)) +(use intarweb uri-common) +(use (only posix current-process-id)) +(use extras) +(use srfi-4) +(use matchable) +(use miscmacros) + +;; This is supposed to be "unique" enough to distinguish a Ugarit +;; session running on a given computer at a point in time. This +;; should be enough in most cases. It might be better to use a uuid +;; but that's another dependency we don't really need, and we're not +;; really locking properly anyway. We could store the hostname in it, +;; but that's identifiable information we do not really want to store. +(define (make-lock-id) + (sprintf "~A@~A#~A" + (current-milliseconds) (current-process-id) + (random most-positive-fixnum))) + +;; Because amazon-s3 uses global parameters and (in theory at least), +;; one can have multiple s3 backends with different configs, we +;; parameterize for every request. This macro helps to do that. +(define-syntax define-s3-command + (ir-macro-transformer + (lambda (e i c) + (let ((command-name (strip-syntax (cadr e)))) + `(define ,(i command-name) + (lambda args + (parameterize ((s3:access-key ,(i '*access-key*)) + (s3:secret-key ,(i '*secret-key*)) + (s3:make-base-uri ,(i '*make-base-uri*))) + (apply ,(symbol-append '|s3:| command-name) + ,(i '*bucket*) + args)))))))) + +;; Ignore 404s, returning #f if the object doesn't exist +(define-syntax maybe-missing + (syntax-rules () + ((_ ?expr) + (condition-case ?expr + (e (exn client-error) + (let ((response ((condition-property-accessor + 'client-error 'response) e))) + (if (and response (= (response-code response) 404)) + #f + (signal e)))))))) + +(define (backend-s3 config-file) + (let* ((*base-host* #f) + (*bucket* #f) + (*access-key* #f) + (*secret-key* #f) + (*s3-hostname* "s3.amazonaws.com") + (*use-path* #f) + (*make-base-uri* + (lambda (bucket) + (if bucket + (if *use-path* + (make-uri scheme: 'https + host: *s3-hostname* + path: `(/ ,bucket "")) + (make-uri scheme: 'https + host: (sprintf "~A.~A" + bucket *s3-hostname*))) + (make-uri scheme: 'https host: *s3-hostname*))))) + + (define (parse-config-file!) + (for-each (lambda (confentry) + (match confentry + (('bucket name) (set! *bucket* name)) + (('access-key key) (set! *access-key* key)) + (('secret-key key) (set! *secret-key* key)) + (('base-hostname host) (set! *s3-hostname* host)) + ('use-bucket-path (set! *use-path* #t)) + (_ (signal (make-property-condition + 'exn 'location 'backend-s3 + 'message "Unknown configuration entry" + 'arguments (list confentry)))))) + (with-input-from-file config-file read-file))) + + (define-s3-command bucket-exists?) + (define-s3-command create-bucket!) + (define-s3-command object-exists?) + (define-s3-command list-objects) + (define-s3-command put-object!) + (define-s3-command delete-object!) + (define-s3-command get-object) + (define-s3-command put-sexp!) + (define-s3-command get-sexp) + + (define (maybe-delete-object! name) + (maybe-missing (delete-object! name))) + + (define (put-u8vector! key vec) + (put-object! + key (lambda () (write-u8vector vec)) (u8vector-length vec) + "application/octet-stream")) + + (define (get-u8vector key) + (blob->u8vector/shared (string->blob (get-object key)))) + + (define (make-name key extension) + (string-append "data/" key extension)) + + (define (make-tag-name tag) + (string-append "tag/" tag)) + + (define (make-tag-lock-name tag) + (string-append "lock/" tag)) + + (parse-config-file!) + + ;; Perform some sanity checks + (unless *bucket* + (error "Please choose a (bucket ...) in the config file")) + (unless *access-key* + (error "Please choose an (access-key ...) in the config file")) + (unless *secret-key* + (error "Please choose a (secret-key ...) in the config file")) + + (unless (bucket-exists?) + (error "The archive bucket does not exist; try \"admin init\" first" *bucket*)) + + ;; This should be essentially infinite, but not all S3 API + ;; implementations support range PUT requests, and even when + ;; supported, it makes atomicity a real problem. Besides, + ;; performance is better when this is not too small and not + ;; too large (but it's also somewhat data set-dependent). + (define block-size (* 1024 1024)) + + (make-storage + block-size + #t ; We are writable + #t ; We support unlink! + (lambda (key data type) ; put! + (if (object-exists? (make-name key ".type")) + (signal (make-property-condition 'exn 'message "Duplicate block: put! should not be called on an existing hash" 'arguments (list key type))) + (begin + ;; NOTE: PUT request are atomic according to + ;; http://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectPUT.html + ;; "Amazon S3 never adds partial objects; if you receive + ;; a success response, Amazon S3 added the entire object + ;; to the bucket." + (put-u8vector! (make-name key ".data") data) + (handle-exceptions exn + (begin (maybe-delete-object! (make-name key ".data")) + (signal exn)) + (put-sexp! (make-name key ".type") type)) + (handle-exceptions exn + (begin (maybe-delete-object! (make-name key ".data")) + (maybe-delete-object! (make-name key ".type")) + (signal exn)) + (put-sexp! (make-name key ".refcount") 1)) + (void)))) + (lambda () (void)) ; flush! - a no-op for us + (lambda (key) ; exists? + (and (object-exists? (make-name key ".data")) + (get-sexp (make-name key ".type")))) + (lambda (key) ; get + (maybe-missing (get-u8vector (make-name key ".data")))) + (lambda (key) ; link! + (let ((current-refcount (get-sexp (make-name key ".refcount")))) + (put-sexp! (make-name key ".refcount") (+ current-refcount 1)) + (void))) + (lambda (key) ; unlink! + (let* ((current-refcount (get-sexp (make-name key ".refcount"))) + (new-refcount (- current-refcount 1))) + (if (zero? new-refcount) + (let ((data (get-u8vector (make-name key ".data")))) + (begin + (maybe-delete-object! (make-name key ".data")) + (maybe-delete-object! (make-name key ".type")) + (maybe-delete-object! (make-name key ".refcount")) + data)) ; returned in case of deletion + (begin + (put-sexp! (make-name key ".refcount") new-refcount) + #f)))) + (lambda (tag key) ; set-tag! + (put-sexp! (make-tag-name tag) key)) + (lambda (tag) ; tag + (let ((key (maybe-missing (get-sexp (make-tag-name tag))))) + (if (eof-object? key) + #f ; Treat empty file as no tag + key))) + (lambda () ; all-tags + (map (lambda (s) (substring s 4)) + (list-objects prefix: "tag/"))) + (lambda (tag) ; remove-tag! + (maybe-delete-object! (make-tag-name tag)) + (maybe-delete-object! (make-tag-lock-name tag)) + (void)) + (lambda (tag) ; lock-tag! + ;; TODO: S3 does not support locking, so we approximate it as + ;; best we can. Unfortunately, S3's "eventually consistency" + ;; bullshit blabla model means this is even less reliable than + ;; it should be. This scheme isn't fool-proof even if it did + ;; offer better guarantuees. + (and-let* ((lock (make-tag-lock-name tag)) + ((not (object-exists? lock))) + (in (make-lock-id)) + (out (begin (put-sexp! (make-tag-lock-name tag) in) + (maybe-missing (get-sexp lock))))) + (equal? in out))) + (lambda (tag) ; tag-locked? + ;; This doesn't care *who* locked it, just *that* it's locked. + (object-exists? (make-tag-lock-name tag))) + (lambda (tag) ; unlock-tag! + (maybe-delete-object! (make-tag-lock-name tag)) + (void)) + (lambda (command) ; admin! + (match command + (('info) + (list (cons 'backend "s3") + (cons 's3-config config-file) + (cons 'block-size block-size) + (cons 'writable? #t) + (cons 'unlinkable? #t))) + (('init) + (unless (bucket-exists?) + (create-bucket!))) + (('help) + (list (cons 'info "Return information about the archive") + (cons 'help "List available admin commands") + (cons 'init "Initialize bucket"))) + (else (error "Unknown admin command")))) + (lambda () ; close! + (void))))) + +(define backend + (match (command-line-arguments) + ((s3-config-file) (lambda () (backend-s3 s3-config-file))) + (else + (export-storage-error! "Invalid arguments to backend-s3") + (printf "USAGE:\nbackend-s3 \n") + #f))) + +(if backend + (export-storage! backend)) + +) diff --git a/ugarit-backend-s3.setup b/ugarit-backend-s3.setup new file mode 100644 index 0000000..de33a20 --- /dev/null +++ b/ugarit-backend-s3.setup @@ -0,0 +1,4 @@ +;; -*- scheme -*- + +(compile ugarit-backend-s3.scm) +(install-program 'ugarit-backend-s3 "ugarit-backend-s3") -- cgit v1.2.3