From 1088b3244e9dd89404e6eea457727805b2fb2d89 Mon Sep 17 00:00:00 2001 From: Peter Bex Date: Thu, 12 Sep 2024 18:53:44 +0200 Subject: Port Postgres egg to CHICKEN 6 This has quite a few changes due to the low-level nature of this code: when the contents of a string get copied by C functions, we can't pass it to arguments of the scheme-pointer type directly anymore. Instead, strings are now wrapper objects which point to an internal (NUL-terminated!) bytevector. So not only do we have to extract this bytevector and pass it to these C functions, but we also need to calculate the string length excluding the NUL terminator. This creates a bit more branching in the code to get the thing to copy and length. The nice thing is that we no longer need to append NUL bytes to strings in a few places. Instead, we can just pass the raw bytevector to the C functions, which means this should be rather faster now. --- postgresql.scm | 136 +++++++++++++++++++++++++++++---------------------------- tests/run.scm | 32 +++++++------- 2 files changed, 85 insertions(+), 83 deletions(-) diff --git a/postgresql.scm b/postgresql.scm index d2c4a03..801df81 100644 --- a/postgresql.scm +++ b/postgresql.scm @@ -55,6 +55,7 @@ wait-for-notifications!) (import scheme + (scheme base) (chicken base) (chicken foreign) (chicken string) @@ -63,7 +64,6 @@ (chicken condition) (chicken format) (chicken gc) - (chicken blob) (chicken time) srfi-1 srfi-4 @@ -145,13 +145,7 @@ (define PQputCopyEnd (foreign-lambda int PQputCopyEnd pgconn* (const c-string))) (define PQgetCopyData (foreign-lambda int PQgetCopyData pgconn* (c-pointer (c-pointer char)) bool)) -(define memcpy (foreign-lambda c-pointer "C_memcpy" scheme-pointer c-pointer size_t)) - -(define (srfi-4-vector? x) ; Copied from ##sys#srfi-4-vector? from 4.8.3 - (and (##core#inline "C_blockp" x) - (##sys#generic-structure? x) - (memq (##sys#slot x 0) - '(u8vector u16vector s8vector s16vector u32vector s32vector f32vector f64vector)))) +(define memcpy (foreign-lambda c-pointer "C_memcpy" nonnull-scheme-pointer c-pointer size_t)) ;; TODO: Create a real callback system? (foreign-declare "static void nullNoticeReceiver(void *arg, const PGresult *res){ }") @@ -409,7 +403,7 @@ (define default-type-unparsers (make-parameter `((,string? . ,(lambda (conn s) s)) - (,u8vector? . ,(lambda (conn v) (u8vector->blob/shared v))) + (,bytevector? . ,(lambda (conn v) v)) (,char? . ,(lambda (conn c) (string c))) (,boolean? . ,bool-unparser) (,number? . ,(lambda (conn n) (number->string n))) @@ -462,7 +456,7 @@ (let ((conn-fd (pgsql-connection->fd conn)) (t ##sys#current-thread)) (when delay (##sys#thread-block-for-timeout! - t (+ (current-milliseconds) delay))) + t (+ (current-process-milliseconds) delay))) (##sys#thread-block-for-i/o! t conn-fd in/out) (##sys#thread-yield!))) @@ -504,23 +498,23 @@ (cond-expand (has-connectdb-params (let ((len (length spec))) - ((foreign-lambda* pgconn* ((scheme-object cons) (scheme-pointer keybuf) + ((foreign-lambda* pgconn* ((scheme-object lst) (scheme-pointer keybuf) (scheme-pointer valbuf) (int len)) "const char **key = (const char **)keybuf;" "const char **val = (const char **)valbuf;" "int i;" - "for (i=0; i < len; ++i,cons=C_u_i_cdr(cons)) {" - " C_word kvpair = C_u_i_car(cons);" - " key[i] = C_c_string(C_u_i_car(kvpair));" - " val[i] = C_c_string(C_u_i_cdr(kvpair));" + "for (i=0; i < len; ++i, lst=C_u_i_cdr(lst)) {" + " C_word kvpair = C_u_i_car(lst);" + " key[i] = C_c_string(C_block_item(C_u_i_car(kvpair), 0));" + " val[i] = C_c_string(C_block_item(C_u_i_cdr(kvpair), 0));" "}" "key[len] = NULL;" "val[len] = NULL;" "C_return(PQconnectStartParams(key, val, 0));") - (map (lambda (x) (cons (string-append (->string (car x)) "\x00") - (string-append (->string (cdr x)) "\x00"))) spec) - (make-blob (* (add1 len) (foreign-value "sizeof(char *)" int))) - (make-blob (* (add1 len) (foreign-value "sizeof(char *)" int))) + (map (lambda (x) (cons (->string (car x)) + (->string (cdr x)))) spec) + (make-bytevector (* (add1 len) (foreign-value "sizeof(char *)" int))) + (make-bytevector (* (add1 len) (foreign-value "sizeof(char *)" int))) len))) (else (PQconnectStart (alist->connection-spec spec)))))) @@ -670,13 +664,16 @@ (fmt (PQfformat ptr column)) (value (case fmt ((0) (make-string len)) - ((1) (make-blob len)) + ((1) (make-bytevector len)) (else (postgresql-error 'internal 'value-at (conc "Unknown column format type: " fmt) - result column row raw))))) - (memcpy value (PQgetvalue-ptr ptr row column) len) - (if (or raw (blob? value)) + result column row raw)))) + (raw-value (case fmt + ((0) (##sys#slot value 0)) + ((1) value)))) + (memcpy raw-value (PQgetvalue-ptr ptr row column) len) + (if (or raw (bytevector? value)) value ((vector-ref (pg-result-value-parsers result) column) value)))))) @@ -982,16 +979,15 @@ (let* ((params ;; Check all params and ensure they are proper pairs (map (lambda (p) (let ((obj (if raw p (scheme-value->db-value conn p)))) - (cond ((string? obj) ; Convert to ASCIIZ - (let* ((len (##sys#size obj)) - (res (string-append obj "\x00"))) - (vector 0 len res))) - ((blob? obj) - (vector 1 (##sys#size obj) obj)) + (cond ((string? obj) + ;; Extract the raw bytevector contents from the wrapped string object + (vector 0 (string-length obj) (##sys#slot obj 0))) + ((bytevector? obj) + (vector 1 (bytevector-length obj) obj)) ((sql-null? obj) #f) (else (postgresql-error 'type 'query* - (sprintf "Param value is not string, sql-null or blob: ~S" p) + (sprintf "Param value is not string, sql-null or bytevector/u8vector: ~S" p) conn query params format))))) params)) ;; It's a shame we need to traverse params twice (and again in C)... @@ -1034,9 +1030,9 @@ ((send-query (pg-connection-ptr conn) query-as-string (symbol? query) len params ;; We allocate here instead of in C to keep things simple - (make-blob (* len (foreign-value "sizeof(char *)" int))) - (make-blob (* len (foreign-value "sizeof(int)" int))) - (make-blob (* len (foreign-value "sizeof(int)" int))) + (make-bytevector (* len (foreign-value "sizeof(char *)" int))) + (make-bytevector (* len (foreign-value "sizeof(int)" int))) + (make-bytevector (* len (foreign-value "sizeof(int)" int))) (symbol->format format)) (get-last-result conn raw: raw)) (else (postgresql-error 'i/o 'query* @@ -1110,15 +1106,15 @@ ;;;; COPY support ;;;;;;;;;;;;;;;;;;;; -(define (put-copy-data conn data) - (let* ((data (cond - ((or (string? data) (blob? data)) data) - ((srfi-4-vector? data) (##sys#slot data 1)) - (else (postgresql-error - 'type 'put-copy-data - "Expected a blob, string or srfi-4 vector" conn data)))) - (len (##sys#size data)) - (conn-ptr (pg-connection-ptr conn))) +(define (put-copy-data conn in-data) + (let* ((conn-ptr (pg-connection-ptr conn)) + (data (cond ((string? in-data) (##sys#slot in-data 0)) + ((bytevector? in-data) in-data) + (else (postgresql-error + 'type 'put-copy-data + "Expected a bytevector/u8vector or string" conn in-data)))) + (len (cond ((string? in-data) (string-length in-data)) + ((bytevector? in-data) (bytevector-length in-data))))) (let loop ((res (PQputCopyData conn-ptr data len))) (cond ((= res 0) @@ -1158,14 +1154,17 @@ (let ((res (PQgetCopyData conn-ptr (location buf) #t))) (cond ((> res 0) - (let ((value (case format - ((text) (make-string res)) - ((binary) (make-blob res)) - (else (postgresql-error - 'internal 'get-copy-data - (conc "Unknown column format type: " format) - conn))))) - (memcpy value buf res) + (let* ((value (case format + ((text) (make-string res)) + ((binary) (make-bytevector res)) + (else (postgresql-error + 'internal 'get-copy-data + (conc "Unknown column format type: " format) + conn)))) + (raw-value (case format + ((text) (##sys#slot value 0)) + ((binary) value)))) + (memcpy raw-value buf res) (free buf) value)) ((= res 0) @@ -1195,7 +1194,7 @@ (buflen (add1 (* strlen 2))) (buffer (make-string buflen)) (conn-ptr (pg-connection-ptr conn)) - (size (%escape-string-conn conn-ptr buffer str strlen (location err)))) + (size (%escape-string-conn conn-ptr (##sys#slot buffer 0) (##sys#slot str 0) strlen (location err)))) (cond (err (postgresql-error 'internal 'escape-string (conc "String escaping failed. " (PQerrorMessage conn-ptr)) conn str)) @@ -1207,11 +1206,12 @@ (has-escape-identifier (define %escape-ident (foreign-lambda c-string* PQescapeIdentifier pgconn* scheme-pointer size_t)) - (or (%escape-ident (pg-connection-ptr conn) str (string-length str)) - (postgresql-error 'internal 'quote-identifier - (conc "Identifier escaping failed: " - (PQerrorMessage (pg-connection-ptr conn))) - conn str))) + (let ((len (string-length str))) + (or (%escape-ident (pg-connection-ptr conn) (##sys#slot str 0) len) + (postgresql-error 'internal 'quote-identifier + (conc "Identifier escaping failed: " + (PQerrorMessage (pg-connection-ptr conn))) + conn str)))) (else (postgresql-error 'unsupported-version 'quote-identifier (conc "Please upgrade your PostgreSQL to 9.0 or later " "in order to be able to use quote-identifier!") @@ -1222,18 +1222,20 @@ (foreign-lambda (c-pointer unsigned-char) PQescapeByteaConn pgconn* scheme-pointer size_t (c-pointer size_t))) (let-location ((allocated size_t)) - (let* ((data (cond ((or (string? obj) (blob? obj)) obj) - ((srfi-4-vector? obj) (##sys#slot obj 1)) + (let* ((conn-ptr (pg-connection-ptr conn)) + (data (cond ((string? obj) (##sys#slot obj 0)) + ((bytevector? obj) obj) (else (postgresql-error 'type 'escape-bytea - "Expected string, blob or srfi-4 vector" obj)))) - (conn-ptr (pg-connection-ptr conn)) + "Expected string or bytevector/u8vector" obj)))) + (len (cond ((string? obj) (string-length obj)) + ((bytevector? obj) (bytevector-length obj)))) (buf (%escape-bytea-conn - conn-ptr data (##sys#size data) (location allocated)))) + conn-ptr data len (location allocated)))) (if buf (let* ((string-length (sub1 allocated)) (result-string (make-string string-length))) - (memcpy result-string buf string-length) + (memcpy (##sys#slot result-string 0) buf string-length) (free buf) result-string) (postgresql-error @@ -1244,13 +1246,13 @@ (define (unescape-bytea str) (define %unescape-bytea (foreign-lambda (c-pointer unsigned-char) PQunescapeBytea c-string (c-pointer size_t))) - (let-location ((blob-length size_t)) - (let ((buf (%unescape-bytea str (location blob-length)))) + (let-location ((bv-length size_t)) + (let ((buf (%unescape-bytea str (location bv-length)))) (if buf - (let ((result-blob (make-blob blob-length))) - (memcpy result-blob buf blob-length) + (let ((result-bv (make-bytevector bv-length))) + (memcpy result-bv buf bv-length) (free buf) - (blob->u8vector/shared result-blob)) + result-bv) (postgresql-error 'internal 'unescape-bytea "Byte array unescaping failed (out of memory?)" str))))) diff --git a/tests/run.scm b/tests/run.scm index ff4c0d2..df84a03 100644 --- a/tests/run.scm +++ b/tests/run.scm @@ -1,4 +1,4 @@ -(import (chicken blob) +(import (scheme base) (chicken condition) (chicken string) test @@ -23,7 +23,7 @@ (test-error* (sprintf "~S" '?expr) ?error-type ?expr)))) ;; Perform a quick sanity check before running the actual tests -(condition-case (disconnect (connect '((dbname . test)))) +(condition-case (disconnect (connect '((dbname . test)) '() '())) (exn (exn postgresql connect) (print "Could not connect to server. Please ensure that the UNIX " "user running these tests is allowed to connect to a database " @@ -182,7 +182,7 @@ ;; We are using two levels of escaping here because the ::bytea cast ;; performs another string interpretation. Yes, this is kinda confusing... (test "Result value for null-terminated byte array" - (blob->u8vector (string->blob "h\x00ello")) + #u8("h" 0 "ello") (value-at (query conn "SELECT E'h\\\\000ello'::bytea"))) (test "Result value for raw null-terminated byte array" @@ -191,16 +191,16 @@ "h\\000ello") (value-at (query conn "SELECT E'h\\\\000ello'::bytea") 0 0 raw: #t)) - (test "Result value blob for binary string" - (string->blob "hello") + (test "Result value bytevector/u8vector for binary string" + #u8("hello") (value-at (query* conn "SELECT 'hello'" '() format: 'binary))) - (test "Result value blob for binary integer" - (u8vector->blob (u8vector 0 0 0 1)) + (test "Result value bytevector/u8vector for binary integer" + (u8vector 0 0 0 1) (value-at (query* conn "SELECT 1::int4" '() format: 'binary))) (test "Result value for binary string with NUL bytes" - (string->blob "h\x00ello") + #u8("h" 0 "ello") (value-at (query* conn "SELECT E'h\\\\000ello'::bytea" '() format: 'binary))) (test "Result value for array of integers" @@ -298,9 +298,9 @@ (test-assert "NULL parameters" (sql-null? (value-at (query conn "SELECT $1::text" (sql-null)) 0 0))) - (test "blob parameters" + (test "bytevector/u8vector parameters" "hi" - (value-at (query conn "SELECT $1::text" (string->blob "hi")) 0 0)) + (value-at (query conn "SELECT $1::text" (string->utf8 "hi")) 0 0)) (test "boolean parameters" '(#t #f) (row-values (query conn "SELECT $1::bool, $2::bool" #t #f))) @@ -385,9 +385,9 @@ (if (>= major-version 9) "\\\\x576800617427730a75703f" "Wh\\\\000at''s\\\\012up?") - (escape-bytea conn (blob->u8vector (string->blob "Wh\x00at's\nup?")))) - (test "Bytea is unescaped correctly (blob)" - (blob->u8vector (string->blob "What's\nup? ")) + (escape-bytea conn #u8("Wh" 0 "at's\nup?"))) + (test "Bytea is unescaped correctly (bytevector)" + (string->utf8 "What's\nup? ") ;; The extra quote is dropped here because it wouldn't be returned ;; by pgsql either. (unescape-bytea "What's\\012up?\\ ")) @@ -405,9 +405,9 @@ (if (>= major-version 9) "\\x576800617427730a75703f" "Wh\\000at''s\\012up?") - (escape-bytea conn "Wh\x00at's\nup?")) + (escape-bytea conn "Wh\x00;at's\nup?")) (test "Bytea is unescaped correctly" - (blob->u8vector (string->blob "What's\nup? ")) + (string->utf8 "What's\nup? ") ;; The extra quote is dropped here because it wouldn't be returned ;; by pgsql either. (unescape-bytea "What's\\012up?\\ ")) @@ -645,7 +645,7 @@ #f (bool-parser "f")) (test "Byte array parsed correctly" - (blob->u8vector/shared (string->blob "abc\x01\x02\xffdef")) + #u8("abc" #x01 #x02 #xff "def") (bytea-parser "abc\\001\\002\\377def")) (test "Char parser" #\x -- cgit v1.2.3