summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Bex <peter@more-magic.net>2018-01-01 16:14:33 +0100
committerPeter Bex <peter@more-magic.net>2018-01-01 21:31:35 +0100
commit3d620f2fd2bfc1c5cd4967ec022a5b0a7195e52d (patch)
tree298a3875c06e8e00a5c30339fcdd683e56ebe31f
downloadugarit-backend-s3-3d620f2fd2bfc1c5cd4967ec022a5b0a7195e52d.tar.gz
Add initial version of Amazon S3 backend for Ugarit backup system
-rw-r--r--ugarit-backend-s3.meta10
-rw-r--r--ugarit-backend-s3.scm249
-rw-r--r--ugarit-backend-s3.setup4
3 files changed, 263 insertions, 0 deletions
diff --git a/ugarit-backend-s3.meta b/ugarit-backend-s3.meta
new file mode 100644
index 0000000..88b88a4
--- /dev/null
+++ b/ugarit-backend-s3.meta
@@ -0,0 +1,10 @@
+;;; ugarit-backend-s3.meta -*- scheme -*-
+
+((egg "ugarit-backend-s3.egg")
+ (synopsis "An Amazon S3 storage backend for Ugarit")
+ (author "Peter Bex")
+ (maintainer "Peter Bex")
+ (category net)
+ (license "Public domain")
+ (doc-from-wiki)
+ (depends ugarit amazon-s3))
diff --git a/ugarit-backend-s3.scm b/ugarit-backend-s3.scm
new file mode 100644
index 0000000..4708418
--- /dev/null
+++ b/ugarit-backend-s3.scm
@@ -0,0 +1,249 @@
+;;
+;; ugarit-backend-s3 - An Amazon S3 storage backend for Ugarit
+;;
+;; All code in this egg is hereby placed in the Public Domain
+;;
+(module ugarit-backend-s3 ()
+
+(import chicken scheme)
+(use ugarit-backend)
+(use (prefix amazon-s3 s3:))
+(use intarweb uri-common)
+(use (only posix current-process-id))
+(use extras)
+(use srfi-4)
+(use matchable)
+(use miscmacros)
+
+;; This is supposed to be "unique" enough to distinguish a Ugarit
+;; session running on a given computer at a point in time. This
+;; should be enough in most cases. It might be better to use a uuid
+;; but that's another dependency we don't really need, and we're not
+;; really locking properly anyway. We could store the hostname in it,
+;; but that's identifiable information we do not really want to store.
+(define (make-lock-id)
+ (sprintf "~A@~A#~A"
+ (current-milliseconds) (current-process-id)
+ (random most-positive-fixnum)))
+
+;; Because amazon-s3 uses global parameters and (in theory at least),
+;; one can have multiple s3 backends with different configs, we
+;; parameterize for every request. This macro helps to do that.
+(define-syntax define-s3-command
+ (ir-macro-transformer
+ (lambda (e i c)
+ (let ((command-name (strip-syntax (cadr e))))
+ `(define ,(i command-name)
+ (lambda args
+ (parameterize ((s3:access-key ,(i '*access-key*))
+ (s3:secret-key ,(i '*secret-key*))
+ (s3:make-base-uri ,(i '*make-base-uri*)))
+ (apply ,(symbol-append '|s3:| command-name)
+ ,(i '*bucket*)
+ args))))))))
+
+;; Ignore 404s, returning #f if the object doesn't exist
+(define-syntax maybe-missing
+ (syntax-rules ()
+ ((_ ?expr)
+ (condition-case ?expr
+ (e (exn client-error)
+ (let ((response ((condition-property-accessor
+ 'client-error 'response) e)))
+ (if (and response (= (response-code response) 404))
+ #f
+ (signal e))))))))
+
+(define (backend-s3 config-file)
+ (let* ((*base-host* #f)
+ (*bucket* #f)
+ (*access-key* #f)
+ (*secret-key* #f)
+ (*s3-hostname* "s3.amazonaws.com")
+ (*use-path* #f)
+ (*make-base-uri*
+ (lambda (bucket)
+ (if bucket
+ (if *use-path*
+ (make-uri scheme: 'https
+ host: *s3-hostname*
+ path: `(/ ,bucket ""))
+ (make-uri scheme: 'https
+ host: (sprintf "~A.~A"
+ bucket *s3-hostname*)))
+ (make-uri scheme: 'https host: *s3-hostname*)))))
+
+ (define (parse-config-file!)
+ (for-each (lambda (confentry)
+ (match confentry
+ (('bucket name) (set! *bucket* name))
+ (('access-key key) (set! *access-key* key))
+ (('secret-key key) (set! *secret-key* key))
+ (('base-hostname host) (set! *s3-hostname* host))
+ ('use-bucket-path (set! *use-path* #t))
+ (_ (signal (make-property-condition
+ 'exn 'location 'backend-s3
+ 'message "Unknown configuration entry"
+ 'arguments (list confentry))))))
+ (with-input-from-file config-file read-file)))
+
+ (define-s3-command bucket-exists?)
+ (define-s3-command create-bucket!)
+ (define-s3-command object-exists?)
+ (define-s3-command list-objects)
+ (define-s3-command put-object!)
+ (define-s3-command delete-object!)
+ (define-s3-command get-object)
+ (define-s3-command put-sexp!)
+ (define-s3-command get-sexp)
+
+ (define (maybe-delete-object! name)
+ (maybe-missing (delete-object! name)))
+
+ (define (put-u8vector! key vec)
+ (put-object!
+ key (lambda () (write-u8vector vec)) (u8vector-length vec)
+ "application/octet-stream"))
+
+ (define (get-u8vector key)
+ (blob->u8vector/shared (string->blob (get-object key))))
+
+ (define (make-name key extension)
+ (string-append "data/" key extension))
+
+ (define (make-tag-name tag)
+ (string-append "tag/" tag))
+
+ (define (make-tag-lock-name tag)
+ (string-append "lock/" tag))
+
+ (parse-config-file!)
+
+ ;; Perform some sanity checks
+ (unless *bucket*
+ (error "Please choose a (bucket ...) in the config file"))
+ (unless *access-key*
+ (error "Please choose an (access-key ...) in the config file"))
+ (unless *secret-key*
+ (error "Please choose a (secret-key ...) in the config file"))
+
+ (unless (bucket-exists?)
+ (error "The archive bucket does not exist; try \"admin init\" first" *bucket*))
+
+ ;; This should be essentially infinite, but not all S3 API
+ ;; implementations support range PUT requests, and even when
+ ;; supported, it makes atomicity a real problem. Besides,
+ ;; performance is better when this is not too small and not
+ ;; too large (but it's also somewhat data set-dependent).
+ (define block-size (* 1024 1024))
+
+ (make-storage
+ block-size
+ #t ; We are writable
+ #t ; We support unlink!
+ (lambda (key data type) ; put!
+ (if (object-exists? (make-name key ".type"))
+ (signal (make-property-condition 'exn 'message "Duplicate block: put! should not be called on an existing hash" 'arguments (list key type)))
+ (begin
+ ;; NOTE: PUT request are atomic according to
+ ;; http://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectPUT.html
+ ;; "Amazon S3 never adds partial objects; if you receive
+ ;; a success response, Amazon S3 added the entire object
+ ;; to the bucket."
+ (put-u8vector! (make-name key ".data") data)
+ (handle-exceptions exn
+ (begin (maybe-delete-object! (make-name key ".data"))
+ (signal exn))
+ (put-sexp! (make-name key ".type") type))
+ (handle-exceptions exn
+ (begin (maybe-delete-object! (make-name key ".data"))
+ (maybe-delete-object! (make-name key ".type"))
+ (signal exn))
+ (put-sexp! (make-name key ".refcount") 1))
+ (void))))
+ (lambda () (void)) ; flush! - a no-op for us
+ (lambda (key) ; exists?
+ (and (object-exists? (make-name key ".data"))
+ (get-sexp (make-name key ".type"))))
+ (lambda (key) ; get
+ (maybe-missing (get-u8vector (make-name key ".data"))))
+ (lambda (key) ; link!
+ (let ((current-refcount (get-sexp (make-name key ".refcount"))))
+ (put-sexp! (make-name key ".refcount") (+ current-refcount 1))
+ (void)))
+ (lambda (key) ; unlink!
+ (let* ((current-refcount (get-sexp (make-name key ".refcount")))
+ (new-refcount (- current-refcount 1)))
+ (if (zero? new-refcount)
+ (let ((data (get-u8vector (make-name key ".data"))))
+ (begin
+ (maybe-delete-object! (make-name key ".data"))
+ (maybe-delete-object! (make-name key ".type"))
+ (maybe-delete-object! (make-name key ".refcount"))
+ data)) ; returned in case of deletion
+ (begin
+ (put-sexp! (make-name key ".refcount") new-refcount)
+ #f))))
+ (lambda (tag key) ; set-tag!
+ (put-sexp! (make-tag-name tag) key))
+ (lambda (tag) ; tag
+ (let ((key (maybe-missing (get-sexp (make-tag-name tag)))))
+ (if (eof-object? key)
+ #f ; Treat empty file as no tag
+ key)))
+ (lambda () ; all-tags
+ (map (lambda (s) (substring s 4))
+ (list-objects prefix: "tag/")))
+ (lambda (tag) ; remove-tag!
+ (maybe-delete-object! (make-tag-name tag))
+ (maybe-delete-object! (make-tag-lock-name tag))
+ (void))
+ (lambda (tag) ; lock-tag!
+ ;; TODO: S3 does not support locking, so we approximate it as
+ ;; best we can. Unfortunately, S3's "eventually consistency"
+ ;; bullshit blabla model means this is even less reliable than
+ ;; it should be. This scheme isn't fool-proof even if it did
+ ;; offer better guarantuees.
+ (and-let* ((lock (make-tag-lock-name tag))
+ ((not (object-exists? lock)))
+ (in (make-lock-id))
+ (out (begin (put-sexp! (make-tag-lock-name tag) in)
+ (maybe-missing (get-sexp lock)))))
+ (equal? in out)))
+ (lambda (tag) ; tag-locked?
+ ;; This doesn't care *who* locked it, just *that* it's locked.
+ (object-exists? (make-tag-lock-name tag)))
+ (lambda (tag) ; unlock-tag!
+ (maybe-delete-object! (make-tag-lock-name tag))
+ (void))
+ (lambda (command) ; admin!
+ (match command
+ (('info)
+ (list (cons 'backend "s3")
+ (cons 's3-config config-file)
+ (cons 'block-size block-size)
+ (cons 'writable? #t)
+ (cons 'unlinkable? #t)))
+ (('init)
+ (unless (bucket-exists?)
+ (create-bucket!)))
+ (('help)
+ (list (cons 'info "Return information about the archive")
+ (cons 'help "List available admin commands")
+ (cons 'init "Initialize bucket")))
+ (else (error "Unknown admin command"))))
+ (lambda () ; close!
+ (void)))))
+
+(define backend
+ (match (command-line-arguments)
+ ((s3-config-file) (lambda () (backend-s3 s3-config-file)))
+ (else
+ (export-storage-error! "Invalid arguments to backend-s3")
+ (printf "USAGE:\nbackend-s3 <s3-config-file>\n")
+ #f)))
+
+(if backend
+ (export-storage! backend))
+
+)
diff --git a/ugarit-backend-s3.setup b/ugarit-backend-s3.setup
new file mode 100644
index 0000000..de33a20
--- /dev/null
+++ b/ugarit-backend-s3.setup
@@ -0,0 +1,4 @@
+;; -*- scheme -*-
+
+(compile ugarit-backend-s3.scm)
+(install-program 'ugarit-backend-s3 "ugarit-backend-s3")