summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Bex <peter@more-magic.net>2013-05-04 15:52:35 +0200
committerPeter Bex <peter@more-magic.net>2013-05-04 15:52:35 +0200
commitc464a04a8c43e51dbf56e8c4f0be7eef93ed47e9 (patch)
treef699e5a6480b98e60317f8136b91695ce8442c87
downloadbpf-c464a04a8c43e51dbf56e8c4f0be7eef93ed47e9.tar.gz
Initial version of BPF assembler/disassembler with a handful of basic internal consistency tests
-rw-r--r--bpf-assembler.scm163
-rw-r--r--tests/run.scm92
2 files changed, 255 insertions, 0 deletions
diff --git a/bpf-assembler.scm b/bpf-assembler.scm
new file mode 100644
index 0000000..4cecd88
--- /dev/null
+++ b/bpf-assembler.scm
@@ -0,0 +1,163 @@
+(module bpf-assembler ; Maybe I should call this "bassie" ;)
+ (exprs->bpf-bytecode bpf-bytecode->exprs)
+
+(import chicken scheme)
+(use srfi-1 bitstring matchable)
+
+(define (uint? x) (and (integer? x) (positive? x)))
+;; Is a jump of zero allowed? It shouldn't be!
+(define (label? x) (and (integer? x) (< 0 x 256)))
+
+;; Basic idea stolen from sixtyfive-oh-two. Thanks, Felix! ;)
+(begin-for-syntax
+ (import chicken)
+ (use srfi-1 matchable)
+
+ (define addrmodes '())
+ (define opcodes '())
+
+ ;; This one is hairy and only semi-hygienic (or some such) due to the
+ ;; construction of "quoted code", which is then picked apart by the ir macros
+ (define-syntax defaddrmode
+ (syntax-rules (=>)
+ ((_ ?id ?code (?match-expr => ?k-expr) (?k-bit => ?disasm))
+ (defaddrmode ?id ?code
+ (?match-expr => 0 0 ?k-expr)
+ (0 0 ?k-bit => ?disasm)))
+ ((_ ?id ?code
+ (?match-expr => ?jt ?jf ?k-expr)
+ (?jt-bit ?jf-bit ?k-bit => ?disasm))
+ (set! addrmodes
+ (cons (cons '?id (list ?code
+ (list '?match-expr '?jt '?jf '?k-expr)
+ (list '((?jt-bit 8) (?jf-bit 8) (?k-bit 32 host))
+ '?disasm)))
+ addrmodes)))))
+
+ ;; Ordered by complexity
+ (defaddrmode no-operands #x80 (() => 0) (_ => ()))
+ (defaddrmode packet-length #x80 (('len) => 0) (_ => (len)))
+ (defaddrmode index-register #x08 (('x) => 0) (_ => (x)))
+ (defaddrmode accumulator-register #x10 (('a) => 0) (_ => (a)))
+ (defaddrmode immediate #x00 ; Also used for JA aka JMP w/ label
+ (((and k (? uint?))) => k)
+ (k => (,k)))
+ (defaddrmode memory-ref #x60
+ ((('mem (and k (? uint?)))) => k)
+ (k => ((mem ,k))))
+ (defaddrmode packet-ref #x20
+ ((('pkt (and k (? uint?)))) => k)
+ (k => ((pkt ,k))))
+ (defaddrmode packet-ref/index-register #x40
+ ((('pkt 'x (and k (? uint?)))) => k)
+ (k => ((pkt x ,k))))
+ (defaddrmode packet-ref/hack #xa0 ; BPF_MSH; an "efficiency hack", says bpf(4)
+ ((('pkt '4* (and k (? uint?)))) => k)
+ (k => ((pkt 4* ,k))))
+ (defaddrmode conditional-jump-immediate #x00
+ (((and k (? uint?)) (and jt (? label?)) (and jf (? label?))) => jt jf k)
+ (jt jf k => (,k ,jt ,jf)))
+ (defaddrmode conditional-jump-index-register #x08
+ (('x (and jt (? label?)) (and jf (? label?))) => jt jf 0) ; zero?
+ (jt jf _ => (x ,jt ,jf)))
+
+ (define-syntax defop
+ (syntax-rules ()
+ ((_ ?mnemonic ?opcode ?addrmode0 ...)
+ (set! opcodes (cons (cons '?mnemonic (list ?opcode '?addrmode0 ...))
+ opcodes)))))
+
+ ;; Define mnemonic, basic opcode and addressing modes for each instruction.
+ ;; The addressing mode is ORed into the basic opcode to get a full opcode.
+ (defop ld #x00
+ packet-length immediate memory-ref packet-ref packet-ref/index-register)
+ (defop ldh #x08 packet-ref packet-ref/index-register)
+ (defop ldb #x10 packet-ref packet-ref/index-register)
+ (defop ldx #x01 packet-length immediate memory-ref packet-ref/hack)
+ (defop st #x02 memory-ref)
+ (defop stx #x03 memory-ref)
+ (defop jmp #x05 immediate) ; aka JA
+ (defop jeq #x10 conditional-jump-immediate conditional-jump-index-register)
+ (defop jgt #x20 conditional-jump-immediate conditional-jump-index-register)
+ (defop jge #x30 conditional-jump-immediate conditional-jump-index-register)
+ (defop jset #x40 conditional-jump-immediate conditional-jump-index-register)
+ (defop add #x04 index-register immediate)
+ (defop sub #x14 index-register immediate)
+ (defop mul #x24 index-register immediate)
+ (defop div #x34 index-register immediate)
+ (defop or #x44 index-register immediate)
+ (defop and #x54 index-register immediate)
+ (defop lsh #x64 index-register immediate)
+ (defop rsh #x74 index-register immediate)
+ (defop neg #x84 no-operands)
+ ;; index-register is acceptable, according to comment in bpf.h
+ (defop ret #x06 accumulator-register immediate index-register)
+ (defop tax #x07 no-operands)
+ (defop txa #x87 no-operands))
+
+;;;;;;;;;;;;;;;;
+;;; Assembly ;;;
+;;;;;;;;;;;;;;;;
+
+(define-syntax assemble-instruction
+ (ir-macro-transformer
+ (lambda (e i c)
+ `(let ((insn ,(cadr e)))
+ (match insn
+ ((opcode args ...)
+ (case opcode
+ ,@(map (match-lambda
+ ((mnemonic opcode modes ...)
+ `((,mnemonic)
+ (match args
+ ,@(map (lambda (mode)
+ (match-let (((addrcode (match-expr jt jf k-expr) _)
+ (alist-ref mode addrmodes)))
+ `(,match-expr
+ (bitconstruct
+ (,(+ opcode addrcode) 16 host)
+ (,jt) (,jf) (,k-expr 32 host)))))
+ modes)
+ (else (error "Unknown addressing mode" insn))))))
+ (reverse opcodes))
+ (else (error "Unknown BPF opcode in expression" insn))))
+ (else (error "Invalid BPF instruction" insn)))))))
+
+(define (exprs->bpf-bytecode exprs)
+ (if (null? exprs)
+ (bitconstruct)
+ (bitstring-append
+ (assemble-instruction (car exprs))
+ (exprs->bpf-bytecode (cdr exprs)))))
+
+;;;;;;;;;;;;;;;;;;;
+;;; Disassembly ;;;
+;;;;;;;;;;;;;;;;;;;
+
+(define-syntax disassemble-instruction
+ (ir-macro-transformer
+ (lambda (e i c)
+ `(let ((insn ,(cadr e)))
+ (bitmatch
+ insn
+ ,@(append-map
+ (match-lambda
+ ((mnemonic opcode modes ...)
+ (map (lambda (mode)
+ (match-let (((addrcode _ (bitmatch-expr disasm-expr))
+ (alist-ref mode addrmodes)))
+ `(((,(+ opcode addrcode) 16 host) . ,bitmatch-expr)
+ ,(list 'quasiquote (cons mnemonic disasm-expr)))))
+ modes)))
+ (reverse opcodes))
+ (else "Unrecognised instruction" insn))))))
+
+(define (bpf-bytecode->exprs bytecode)
+ (let lp ((bytecode bytecode)
+ (exprs '()))
+ (bitmatch bytecode
+ (((insn 64 bitstring) (rest bitstring))
+ (lp rest (cons (disassemble-instruction insn) exprs)))
+ (() (reverse exprs)))))
+
+)
diff --git a/tests/run.scm b/tests/run.scm
new file mode 100644
index 0000000..fc7c3d6
--- /dev/null
+++ b/tests/run.scm
@@ -0,0 +1,92 @@
+(use test matchable)
+
+(include "../bpf-assembler.scm")
+
+(import bpf-assembler)
+
+(test-begin "BPF assembler")
+
+(test-group "single instruction assembly/disassembly invariance"
+ (for-each (lambda (expr)
+ (test (->string expr)
+ expr
+ (car (bpf-bytecode->exprs
+ (exprs->bpf-bytecode (list expr))))))
+ `(;; load word into accumulator register
+ (ld len) (ld 1) (ld (mem 1)) (ld (pkt 1)) (ld (pkt x 1))
+ ;; load byte/halfword into accumulator register
+ (ldb (pkt 1)) (ldb (pkt x 1)) (ldh (pkt 1)) (ldh (pkt x 1))
+ ;; load index register
+ (ldx len) (ldx 1) (ldx (mem 1)) (ldx (pkt 4* 1))
+ ;; store accumulator/index register
+ (st (mem 1)) (stx (mem 1))
+ ;; jump unconditionally (sometimes called JA instead of JMP)
+ (jmp 1)
+ ;; Jump conditionally on comparison of acc w/ immediate or idx
+ (jeq 1 2 3) (jeq x 2 3) (jgt 1 2 3) (jgt x 2 3)
+ (jge 1 2 3) (jge x 2 3)
+ ;; Jump if bit # from immediate/index register is set in acc
+ (jset 1 2 3) (jset x 2 3)
+ ;; ALU instructions: operates on acc w/ immediate or index
+ (add 1) (add x) (sub 1) (sub x) (mul 1) (mul x) (div 1) (div x)
+ (or 1) (or x) (and 1) (and x) (lsh 1) (lsh x) (rsh 1) (rsh x)
+ ;; Negate accumulator (no operands)
+ (neg)
+ ;; Return # of bytes in acc or idx registers or immediate
+ (ret 1) (ret a) (ret x)
+ ;; Transfer value between accumulator and index
+ (tax) (txa))))
+
+(test-group "Invalid opcode/addressing mode combinations"
+ (for-each (match-lambda
+ ((subgroup-name exprs ...)
+ (test-group subgroup-name
+ (for-each (lambda (expr)
+ (test-error
+ (->string expr)
+ (car (bpf-bytecode->exprs
+ (exprs->bpf-bytecode (list expr))))))
+ exprs))))
+ `(("load of word into accumulator from invalid types"
+ (ld x) (ld a) (ld (pkt 4* 1)))
+ ("load of halfword into accumulator from invalid types"
+ (ldh len) (ldh x) (ldh a) (ldh (pkt 4* 1)))
+ ("load of byte into accumulator from invalid types"
+ (ldb len) (ldb x) (ldb a) (ldb (pkt 4* 1)))
+ ("load of word into index from invalid types"
+ (ldx (pkt 1)) (ldx a) (ldx x) (ldx (pkt 1)) (ldx (pkt x 1)))
+
+ ("store of accumulator into memory must be immediate memory index"
+ (st 1) (st len) (st a) (st x) (st (mem x 1))
+ (st (pkt 1)) (st (pkt 4* 1)))
+ ("store of index into memory must be immediate memory index"
+ (stx 1) (stx len) (stx a) (stx x) (stx (mem x 1))
+ (stx (pkt 1)) (stx (pkt 4* 1)))
+
+ ("bogus jump types"
+ (jmp x) (jmp a) (jmp (mem 1)) (jmp len))
+
+ ("unconditional jumps can't work conditionally"
+ (jmp 1 2 3) (jmp x 2 3))
+
+ ("bogus unconditional jumps"
+ (jmp (mem 1) 2 3) (jmp len 2 3) (jmp a 2 3))
+
+ ("conditional jumps can't work unconditionally"
+ (jeq 1) (jeq x) (jgt 1) (jgt x) (jge 1) (jge x) (jset 1) (jset x))
+
+ ;; Only test add, the rest are defined identically
+ ("diadic ALU instructions with disallowed operands"
+ (add a) (add (mem 1)) (add (pkt 1)) (add (pkt x 1)) (add (pkt 4* 1)))
+
+ ("monadic instructions with operands"
+ (neg 1) (neg x) (neg a) (neg (mem 1))
+ ;; Only do txa, tax is defined identically
+ (txa 1) (txa x) (txa a) (txa (mem 1)))
+
+ ("returning non-immediate, non-register values"
+ (ret (mem 1)) (ret (pkt 1)) (ret (pkt x 1)) (ret (pkt 4* 1))))))
+
+(test-end "BPF assembler")
+
+(test-exit)