From c464a04a8c43e51dbf56e8c4f0be7eef93ed47e9 Mon Sep 17 00:00:00 2001 From: Peter Bex Date: Sat, 4 May 2013 15:52:35 +0200 Subject: Initial version of BPF assembler/disassembler with a handful of basic internal consistency tests --- bpf-assembler.scm | 163 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ tests/run.scm | 92 ++++++++++++++++++++++++++++++ 2 files changed, 255 insertions(+) create mode 100644 bpf-assembler.scm create mode 100644 tests/run.scm diff --git a/bpf-assembler.scm b/bpf-assembler.scm new file mode 100644 index 0000000..4cecd88 --- /dev/null +++ b/bpf-assembler.scm @@ -0,0 +1,163 @@ +(module bpf-assembler ; Maybe I should call this "bassie" ;) + (exprs->bpf-bytecode bpf-bytecode->exprs) + +(import chicken scheme) +(use srfi-1 bitstring matchable) + +(define (uint? x) (and (integer? x) (positive? x))) +;; Is a jump of zero allowed? It shouldn't be! +(define (label? x) (and (integer? x) (< 0 x 256))) + +;; Basic idea stolen from sixtyfive-oh-two. Thanks, Felix! ;) +(begin-for-syntax + (import chicken) + (use srfi-1 matchable) + + (define addrmodes '()) + (define opcodes '()) + + ;; This one is hairy and only semi-hygienic (or some such) due to the + ;; construction of "quoted code", which is then picked apart by the ir macros + (define-syntax defaddrmode + (syntax-rules (=>) + ((_ ?id ?code (?match-expr => ?k-expr) (?k-bit => ?disasm)) + (defaddrmode ?id ?code + (?match-expr => 0 0 ?k-expr) + (0 0 ?k-bit => ?disasm))) + ((_ ?id ?code + (?match-expr => ?jt ?jf ?k-expr) + (?jt-bit ?jf-bit ?k-bit => ?disasm)) + (set! addrmodes + (cons (cons '?id (list ?code + (list '?match-expr '?jt '?jf '?k-expr) + (list '((?jt-bit 8) (?jf-bit 8) (?k-bit 32 host)) + '?disasm))) + addrmodes))))) + + ;; Ordered by complexity + (defaddrmode no-operands #x80 (() => 0) (_ => ())) + (defaddrmode packet-length #x80 (('len) => 0) (_ => (len))) + (defaddrmode index-register #x08 (('x) => 0) (_ => (x))) + (defaddrmode accumulator-register #x10 (('a) => 0) (_ => (a))) + (defaddrmode immediate #x00 ; Also used for JA aka JMP w/ label + (((and k (? uint?))) => k) + (k => (,k))) + (defaddrmode memory-ref #x60 + ((('mem (and k (? uint?)))) => k) + (k => ((mem ,k)))) + (defaddrmode packet-ref #x20 + ((('pkt (and k (? uint?)))) => k) + (k => ((pkt ,k)))) + (defaddrmode packet-ref/index-register #x40 + ((('pkt 'x (and k (? uint?)))) => k) + (k => ((pkt x ,k)))) + (defaddrmode packet-ref/hack #xa0 ; BPF_MSH; an "efficiency hack", says bpf(4) + ((('pkt '4* (and k (? uint?)))) => k) + (k => ((pkt 4* ,k)))) + (defaddrmode conditional-jump-immediate #x00 + (((and k (? uint?)) (and jt (? label?)) (and jf (? label?))) => jt jf k) + (jt jf k => (,k ,jt ,jf))) + (defaddrmode conditional-jump-index-register #x08 + (('x (and jt (? label?)) (and jf (? label?))) => jt jf 0) ; zero? + (jt jf _ => (x ,jt ,jf))) + + (define-syntax defop + (syntax-rules () + ((_ ?mnemonic ?opcode ?addrmode0 ...) + (set! opcodes (cons (cons '?mnemonic (list ?opcode '?addrmode0 ...)) + opcodes))))) + + ;; Define mnemonic, basic opcode and addressing modes for each instruction. + ;; The addressing mode is ORed into the basic opcode to get a full opcode. + (defop ld #x00 + packet-length immediate memory-ref packet-ref packet-ref/index-register) + (defop ldh #x08 packet-ref packet-ref/index-register) + (defop ldb #x10 packet-ref packet-ref/index-register) + (defop ldx #x01 packet-length immediate memory-ref packet-ref/hack) + (defop st #x02 memory-ref) + (defop stx #x03 memory-ref) + (defop jmp #x05 immediate) ; aka JA + (defop jeq #x10 conditional-jump-immediate conditional-jump-index-register) + (defop jgt #x20 conditional-jump-immediate conditional-jump-index-register) + (defop jge #x30 conditional-jump-immediate conditional-jump-index-register) + (defop jset #x40 conditional-jump-immediate conditional-jump-index-register) + (defop add #x04 index-register immediate) + (defop sub #x14 index-register immediate) + (defop mul #x24 index-register immediate) + (defop div #x34 index-register immediate) + (defop or #x44 index-register immediate) + (defop and #x54 index-register immediate) + (defop lsh #x64 index-register immediate) + (defop rsh #x74 index-register immediate) + (defop neg #x84 no-operands) + ;; index-register is acceptable, according to comment in bpf.h + (defop ret #x06 accumulator-register immediate index-register) + (defop tax #x07 no-operands) + (defop txa #x87 no-operands)) + +;;;;;;;;;;;;;;;; +;;; Assembly ;;; +;;;;;;;;;;;;;;;; + +(define-syntax assemble-instruction + (ir-macro-transformer + (lambda (e i c) + `(let ((insn ,(cadr e))) + (match insn + ((opcode args ...) + (case opcode + ,@(map (match-lambda + ((mnemonic opcode modes ...) + `((,mnemonic) + (match args + ,@(map (lambda (mode) + (match-let (((addrcode (match-expr jt jf k-expr) _) + (alist-ref mode addrmodes))) + `(,match-expr + (bitconstruct + (,(+ opcode addrcode) 16 host) + (,jt) (,jf) (,k-expr 32 host))))) + modes) + (else (error "Unknown addressing mode" insn)))))) + (reverse opcodes)) + (else (error "Unknown BPF opcode in expression" insn)))) + (else (error "Invalid BPF instruction" insn))))))) + +(define (exprs->bpf-bytecode exprs) + (if (null? exprs) + (bitconstruct) + (bitstring-append + (assemble-instruction (car exprs)) + (exprs->bpf-bytecode (cdr exprs))))) + +;;;;;;;;;;;;;;;;;;; +;;; Disassembly ;;; +;;;;;;;;;;;;;;;;;;; + +(define-syntax disassemble-instruction + (ir-macro-transformer + (lambda (e i c) + `(let ((insn ,(cadr e))) + (bitmatch + insn + ,@(append-map + (match-lambda + ((mnemonic opcode modes ...) + (map (lambda (mode) + (match-let (((addrcode _ (bitmatch-expr disasm-expr)) + (alist-ref mode addrmodes))) + `(((,(+ opcode addrcode) 16 host) . ,bitmatch-expr) + ,(list 'quasiquote (cons mnemonic disasm-expr))))) + modes))) + (reverse opcodes)) + (else "Unrecognised instruction" insn)))))) + +(define (bpf-bytecode->exprs bytecode) + (let lp ((bytecode bytecode) + (exprs '())) + (bitmatch bytecode + (((insn 64 bitstring) (rest bitstring)) + (lp rest (cons (disassemble-instruction insn) exprs))) + (() (reverse exprs))))) + +) diff --git a/tests/run.scm b/tests/run.scm new file mode 100644 index 0000000..fc7c3d6 --- /dev/null +++ b/tests/run.scm @@ -0,0 +1,92 @@ +(use test matchable) + +(include "../bpf-assembler.scm") + +(import bpf-assembler) + +(test-begin "BPF assembler") + +(test-group "single instruction assembly/disassembly invariance" + (for-each (lambda (expr) + (test (->string expr) + expr + (car (bpf-bytecode->exprs + (exprs->bpf-bytecode (list expr)))))) + `(;; load word into accumulator register + (ld len) (ld 1) (ld (mem 1)) (ld (pkt 1)) (ld (pkt x 1)) + ;; load byte/halfword into accumulator register + (ldb (pkt 1)) (ldb (pkt x 1)) (ldh (pkt 1)) (ldh (pkt x 1)) + ;; load index register + (ldx len) (ldx 1) (ldx (mem 1)) (ldx (pkt 4* 1)) + ;; store accumulator/index register + (st (mem 1)) (stx (mem 1)) + ;; jump unconditionally (sometimes called JA instead of JMP) + (jmp 1) + ;; Jump conditionally on comparison of acc w/ immediate or idx + (jeq 1 2 3) (jeq x 2 3) (jgt 1 2 3) (jgt x 2 3) + (jge 1 2 3) (jge x 2 3) + ;; Jump if bit # from immediate/index register is set in acc + (jset 1 2 3) (jset x 2 3) + ;; ALU instructions: operates on acc w/ immediate or index + (add 1) (add x) (sub 1) (sub x) (mul 1) (mul x) (div 1) (div x) + (or 1) (or x) (and 1) (and x) (lsh 1) (lsh x) (rsh 1) (rsh x) + ;; Negate accumulator (no operands) + (neg) + ;; Return # of bytes in acc or idx registers or immediate + (ret 1) (ret a) (ret x) + ;; Transfer value between accumulator and index + (tax) (txa)))) + +(test-group "Invalid opcode/addressing mode combinations" + (for-each (match-lambda + ((subgroup-name exprs ...) + (test-group subgroup-name + (for-each (lambda (expr) + (test-error + (->string expr) + (car (bpf-bytecode->exprs + (exprs->bpf-bytecode (list expr)))))) + exprs)))) + `(("load of word into accumulator from invalid types" + (ld x) (ld a) (ld (pkt 4* 1))) + ("load of halfword into accumulator from invalid types" + (ldh len) (ldh x) (ldh a) (ldh (pkt 4* 1))) + ("load of byte into accumulator from invalid types" + (ldb len) (ldb x) (ldb a) (ldb (pkt 4* 1))) + ("load of word into index from invalid types" + (ldx (pkt 1)) (ldx a) (ldx x) (ldx (pkt 1)) (ldx (pkt x 1))) + + ("store of accumulator into memory must be immediate memory index" + (st 1) (st len) (st a) (st x) (st (mem x 1)) + (st (pkt 1)) (st (pkt 4* 1))) + ("store of index into memory must be immediate memory index" + (stx 1) (stx len) (stx a) (stx x) (stx (mem x 1)) + (stx (pkt 1)) (stx (pkt 4* 1))) + + ("bogus jump types" + (jmp x) (jmp a) (jmp (mem 1)) (jmp len)) + + ("unconditional jumps can't work conditionally" + (jmp 1 2 3) (jmp x 2 3)) + + ("bogus unconditional jumps" + (jmp (mem 1) 2 3) (jmp len 2 3) (jmp a 2 3)) + + ("conditional jumps can't work unconditionally" + (jeq 1) (jeq x) (jgt 1) (jgt x) (jge 1) (jge x) (jset 1) (jset x)) + + ;; Only test add, the rest are defined identically + ("diadic ALU instructions with disallowed operands" + (add a) (add (mem 1)) (add (pkt 1)) (add (pkt x 1)) (add (pkt 4* 1))) + + ("monadic instructions with operands" + (neg 1) (neg x) (neg a) (neg (mem 1)) + ;; Only do txa, tax is defined identically + (txa 1) (txa x) (txa a) (txa (mem 1))) + + ("returning non-immediate, non-register values" + (ret (mem 1)) (ret (pkt 1)) (ret (pkt x 1)) (ret (pkt 4* 1)))))) + +(test-end "BPF assembler") + +(test-exit) -- cgit v1.2.3