summaryrefslogtreecommitdiff
path: root/ugarit-backend-s3.scm
blob: 4708418f31e307dc638a6648b71a7a27548442b0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
;;
;; ugarit-backend-s3 - An Amazon S3 storage backend for Ugarit
;;
;; All code in this egg is hereby placed in the Public Domain
;;
(module ugarit-backend-s3 ()

(import chicken scheme)
(use ugarit-backend)
(use (prefix amazon-s3 s3:))
(use intarweb uri-common)
(use (only posix current-process-id))
(use extras)
(use srfi-4)
(use matchable)
(use miscmacros)

;; This is supposed to be "unique" enough to distinguish a Ugarit
;; session running on a given computer at a point in time.  This
;; should be enough in most cases.  It might be better to use a uuid
;; but that's another dependency we don't really need, and we're not
;; really locking properly anyway.  We could store the hostname in it,
;; but that's identifiable information we do not really want to store.
(define (make-lock-id)
  (sprintf "~A@~A#~A"
    (current-milliseconds) (current-process-id)
    (random most-positive-fixnum)))

;; Because amazon-s3 uses global parameters and (in theory at least),
;; one can have multiple s3 backends with different configs, we
;; parameterize for every request.  This macro helps to do that.
(define-syntax define-s3-command
  (ir-macro-transformer
   (lambda (e i c)
     (let ((command-name (strip-syntax (cadr e))))
       `(define ,(i command-name)
          (lambda args
            (parameterize ((s3:access-key ,(i '*access-key*))
                           (s3:secret-key ,(i '*secret-key*))
                           (s3:make-base-uri ,(i '*make-base-uri*)))
              (apply ,(symbol-append '|s3:| command-name)
                     ,(i '*bucket*)
                     args))))))))

;; Ignore 404s, returning #f if the object doesn't exist
(define-syntax maybe-missing
  (syntax-rules ()
    ((_ ?expr)
     (condition-case ?expr
       (e (exn client-error)
          (let ((response ((condition-property-accessor
                            'client-error 'response) e)))
            (if (and response (= (response-code response) 404))
                #f
                (signal e))))))))

(define (backend-s3 config-file)
  (let* ((*base-host* #f)
         (*bucket* #f)
         (*access-key* #f)
         (*secret-key* #f)
         (*s3-hostname* "s3.amazonaws.com")
         (*use-path* #f)
         (*make-base-uri*
          (lambda (bucket)
            (if bucket
                (if *use-path*
                    (make-uri scheme: 'https
                              host: *s3-hostname*
                              path: `(/ ,bucket ""))
                    (make-uri scheme: 'https
                              host: (sprintf "~A.~A"
                                      bucket *s3-hostname*)))
                (make-uri scheme: 'https host: *s3-hostname*)))))

    (define (parse-config-file!)
      (for-each (lambda (confentry)
                  (match confentry
                    (('bucket name) (set! *bucket* name))
                    (('access-key key) (set! *access-key* key))
                    (('secret-key key) (set! *secret-key* key))
                    (('base-hostname host) (set! *s3-hostname* host))
                    ('use-bucket-path (set! *use-path* #t))
                    (_ (signal (make-property-condition
                                'exn 'location 'backend-s3
                                'message "Unknown configuration entry"
                                'arguments (list confentry))))))
                (with-input-from-file config-file read-file)))

    (define-s3-command bucket-exists?)
    (define-s3-command create-bucket!)
    (define-s3-command object-exists?)
    (define-s3-command list-objects)
    (define-s3-command put-object!)
    (define-s3-command delete-object!)
    (define-s3-command get-object)
    (define-s3-command put-sexp!)
    (define-s3-command get-sexp)

    (define (maybe-delete-object! name)
      (maybe-missing (delete-object! name)))

    (define (put-u8vector! key vec)
      (put-object!
       key (lambda () (write-u8vector vec)) (u8vector-length vec)
       "application/octet-stream"))

    (define (get-u8vector key)
      (blob->u8vector/shared (string->blob (get-object key))))

    (define (make-name key extension)
      (string-append "data/" key extension))

    (define (make-tag-name tag)
      (string-append "tag/" tag))

    (define (make-tag-lock-name tag)
      (string-append "lock/" tag))

    (parse-config-file!)

    ;; Perform some sanity checks
    (unless *bucket*
      (error "Please choose a (bucket ...) in the config file"))
    (unless *access-key*
      (error "Please choose an (access-key ...) in the config file"))
    (unless *secret-key*
      (error "Please choose a (secret-key ...) in the config file"))

    (unless (bucket-exists?)
      (error "The archive bucket does not exist; try \"admin init\" first" *bucket*))

    ;; This should be essentially infinite, but not all S3 API
    ;; implementations support range PUT requests, and even when
    ;; supported, it makes atomicity a real problem.  Besides,
    ;; performance is better when this is not too small and not
    ;; too large (but it's also somewhat data set-dependent).
    (define block-size (* 1024 1024))

    (make-storage
     block-size
     #t ; We are writable
     #t ; We support unlink!
     (lambda (key data type) ; put!
       (if (object-exists? (make-name key ".type"))
           (signal (make-property-condition 'exn 'message "Duplicate block: put! should not be called on an existing hash" 'arguments (list key type)))
           (begin
             ;; NOTE: PUT request are atomic according to
             ;; http://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectPUT.html
             ;; "Amazon S3 never adds partial objects; if you receive
             ;; a success response, Amazon S3 added the entire object
             ;; to the bucket."
             (put-u8vector! (make-name key ".data") data)
             (handle-exceptions exn
                 (begin (maybe-delete-object! (make-name key ".data"))
                        (signal exn))
               (put-sexp! (make-name key ".type") type))
             (handle-exceptions exn
                 (begin (maybe-delete-object! (make-name key ".data"))
                        (maybe-delete-object! (make-name key ".type"))
                        (signal exn))
               (put-sexp! (make-name key ".refcount") 1))
             (void))))
     (lambda () (void)) ; flush! - a no-op for us
     (lambda (key) ; exists?
       (and (object-exists? (make-name key ".data"))
            (get-sexp (make-name key ".type"))))
     (lambda (key) ; get
       (maybe-missing (get-u8vector (make-name key ".data"))))
     (lambda (key) ; link!
       (let ((current-refcount (get-sexp (make-name key ".refcount"))))
         (put-sexp! (make-name key ".refcount") (+ current-refcount 1))
         (void)))
     (lambda (key) ; unlink!
       (let* ((current-refcount (get-sexp (make-name key ".refcount")))
              (new-refcount (- current-refcount 1)))
         (if (zero? new-refcount)
             (let ((data (get-u8vector (make-name key ".data"))))
               (begin
                 (maybe-delete-object! (make-name key ".data"))
                 (maybe-delete-object! (make-name key ".type"))
                 (maybe-delete-object! (make-name key ".refcount"))
                 data))               ; returned in case of deletion
             (begin
               (put-sexp! (make-name key ".refcount") new-refcount)
               #f))))
     (lambda (tag key) ; set-tag!
       (put-sexp! (make-tag-name tag) key))
     (lambda (tag) ; tag
       (let ((key (maybe-missing (get-sexp (make-tag-name tag)))))
         (if (eof-object? key)
             #f                   ; Treat empty file as no tag
             key)))
     (lambda () ; all-tags
       (map (lambda (s) (substring s 4))
            (list-objects prefix: "tag/")))
     (lambda (tag) ; remove-tag!
       (maybe-delete-object! (make-tag-name tag))
       (maybe-delete-object! (make-tag-lock-name tag))
       (void))
     (lambda (tag) ; lock-tag!
       ;; TODO: S3 does not support locking, so we approximate it as
       ;; best we can.  Unfortunately, S3's "eventually consistency"
       ;; bullshit blabla model means this is even less reliable than
       ;; it should be.  This scheme isn't fool-proof even if it did
       ;; offer better guarantuees.
       (and-let* ((lock (make-tag-lock-name tag))
                  ((not (object-exists? lock)))
                  (in (make-lock-id))
                  (out (begin (put-sexp! (make-tag-lock-name tag) in)
                              (maybe-missing (get-sexp lock)))))
         (equal? in out)))
     (lambda (tag) ; tag-locked?
       ;; This doesn't care *who* locked it, just *that* it's locked.
       (object-exists? (make-tag-lock-name tag)))
     (lambda (tag) ; unlock-tag!
       (maybe-delete-object! (make-tag-lock-name tag))
       (void))
     (lambda (command)                 ; admin!
       (match command
         (('info)
          (list (cons 'backend "s3")
                (cons 's3-config config-file)
                (cons 'block-size block-size)
                (cons 'writable? #t)
                (cons 'unlinkable? #t)))
         (('init)
          (unless (bucket-exists?)
            (create-bucket!)))
         (('help)
          (list (cons 'info "Return information about the archive")
                (cons 'help "List available admin commands")
                (cons 'init "Initialize bucket")))
         (else (error "Unknown admin command"))))
     (lambda () ; close!
       (void)))))

(define backend
  (match (command-line-arguments)
    ((s3-config-file) (lambda () (backend-s3 s3-config-file)))
    (else
     (export-storage-error! "Invalid arguments to backend-s3")
     (printf "USAGE:\nbackend-s3 <s3-config-file>\n")
     #f)))

(if backend
    (export-storage! backend))

)