From c464a04a8c43e51dbf56e8c4f0be7eef93ed47e9 Mon Sep 17 00:00:00 2001 From: Peter Bex Date: Sat, 4 May 2013 15:52:35 +0200 Subject: Initial version of BPF assembler/disassembler with a handful of basic internal consistency tests --- bpf-assembler.scm | 163 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 163 insertions(+) create mode 100644 bpf-assembler.scm (limited to 'bpf-assembler.scm') diff --git a/bpf-assembler.scm b/bpf-assembler.scm new file mode 100644 index 0000000..4cecd88 --- /dev/null +++ b/bpf-assembler.scm @@ -0,0 +1,163 @@ +(module bpf-assembler ; Maybe I should call this "bassie" ;) + (exprs->bpf-bytecode bpf-bytecode->exprs) + +(import chicken scheme) +(use srfi-1 bitstring matchable) + +(define (uint? x) (and (integer? x) (positive? x))) +;; Is a jump of zero allowed? It shouldn't be! +(define (label? x) (and (integer? x) (< 0 x 256))) + +;; Basic idea stolen from sixtyfive-oh-two. Thanks, Felix! ;) +(begin-for-syntax + (import chicken) + (use srfi-1 matchable) + + (define addrmodes '()) + (define opcodes '()) + + ;; This one is hairy and only semi-hygienic (or some such) due to the + ;; construction of "quoted code", which is then picked apart by the ir macros + (define-syntax defaddrmode + (syntax-rules (=>) + ((_ ?id ?code (?match-expr => ?k-expr) (?k-bit => ?disasm)) + (defaddrmode ?id ?code + (?match-expr => 0 0 ?k-expr) + (0 0 ?k-bit => ?disasm))) + ((_ ?id ?code + (?match-expr => ?jt ?jf ?k-expr) + (?jt-bit ?jf-bit ?k-bit => ?disasm)) + (set! addrmodes + (cons (cons '?id (list ?code + (list '?match-expr '?jt '?jf '?k-expr) + (list '((?jt-bit 8) (?jf-bit 8) (?k-bit 32 host)) + '?disasm))) + addrmodes))))) + + ;; Ordered by complexity + (defaddrmode no-operands #x80 (() => 0) (_ => ())) + (defaddrmode packet-length #x80 (('len) => 0) (_ => (len))) + (defaddrmode index-register #x08 (('x) => 0) (_ => (x))) + (defaddrmode accumulator-register #x10 (('a) => 0) (_ => (a))) + (defaddrmode immediate #x00 ; Also used for JA aka JMP w/ label + (((and k (? uint?))) => k) + (k => (,k))) + (defaddrmode memory-ref #x60 + ((('mem (and k (? uint?)))) => k) + (k => ((mem ,k)))) + (defaddrmode packet-ref #x20 + ((('pkt (and k (? uint?)))) => k) + (k => ((pkt ,k)))) + (defaddrmode packet-ref/index-register #x40 + ((('pkt 'x (and k (? uint?)))) => k) + (k => ((pkt x ,k)))) + (defaddrmode packet-ref/hack #xa0 ; BPF_MSH; an "efficiency hack", says bpf(4) + ((('pkt '4* (and k (? uint?)))) => k) + (k => ((pkt 4* ,k)))) + (defaddrmode conditional-jump-immediate #x00 + (((and k (? uint?)) (and jt (? label?)) (and jf (? label?))) => jt jf k) + (jt jf k => (,k ,jt ,jf))) + (defaddrmode conditional-jump-index-register #x08 + (('x (and jt (? label?)) (and jf (? label?))) => jt jf 0) ; zero? + (jt jf _ => (x ,jt ,jf))) + + (define-syntax defop + (syntax-rules () + ((_ ?mnemonic ?opcode ?addrmode0 ...) + (set! opcodes (cons (cons '?mnemonic (list ?opcode '?addrmode0 ...)) + opcodes))))) + + ;; Define mnemonic, basic opcode and addressing modes for each instruction. + ;; The addressing mode is ORed into the basic opcode to get a full opcode. + (defop ld #x00 + packet-length immediate memory-ref packet-ref packet-ref/index-register) + (defop ldh #x08 packet-ref packet-ref/index-register) + (defop ldb #x10 packet-ref packet-ref/index-register) + (defop ldx #x01 packet-length immediate memory-ref packet-ref/hack) + (defop st #x02 memory-ref) + (defop stx #x03 memory-ref) + (defop jmp #x05 immediate) ; aka JA + (defop jeq #x10 conditional-jump-immediate conditional-jump-index-register) + (defop jgt #x20 conditional-jump-immediate conditional-jump-index-register) + (defop jge #x30 conditional-jump-immediate conditional-jump-index-register) + (defop jset #x40 conditional-jump-immediate conditional-jump-index-register) + (defop add #x04 index-register immediate) + (defop sub #x14 index-register immediate) + (defop mul #x24 index-register immediate) + (defop div #x34 index-register immediate) + (defop or #x44 index-register immediate) + (defop and #x54 index-register immediate) + (defop lsh #x64 index-register immediate) + (defop rsh #x74 index-register immediate) + (defop neg #x84 no-operands) + ;; index-register is acceptable, according to comment in bpf.h + (defop ret #x06 accumulator-register immediate index-register) + (defop tax #x07 no-operands) + (defop txa #x87 no-operands)) + +;;;;;;;;;;;;;;;; +;;; Assembly ;;; +;;;;;;;;;;;;;;;; + +(define-syntax assemble-instruction + (ir-macro-transformer + (lambda (e i c) + `(let ((insn ,(cadr e))) + (match insn + ((opcode args ...) + (case opcode + ,@(map (match-lambda + ((mnemonic opcode modes ...) + `((,mnemonic) + (match args + ,@(map (lambda (mode) + (match-let (((addrcode (match-expr jt jf k-expr) _) + (alist-ref mode addrmodes))) + `(,match-expr + (bitconstruct + (,(+ opcode addrcode) 16 host) + (,jt) (,jf) (,k-expr 32 host))))) + modes) + (else (error "Unknown addressing mode" insn)))))) + (reverse opcodes)) + (else (error "Unknown BPF opcode in expression" insn)))) + (else (error "Invalid BPF instruction" insn))))))) + +(define (exprs->bpf-bytecode exprs) + (if (null? exprs) + (bitconstruct) + (bitstring-append + (assemble-instruction (car exprs)) + (exprs->bpf-bytecode (cdr exprs))))) + +;;;;;;;;;;;;;;;;;;; +;;; Disassembly ;;; +;;;;;;;;;;;;;;;;;;; + +(define-syntax disassemble-instruction + (ir-macro-transformer + (lambda (e i c) + `(let ((insn ,(cadr e))) + (bitmatch + insn + ,@(append-map + (match-lambda + ((mnemonic opcode modes ...) + (map (lambda (mode) + (match-let (((addrcode _ (bitmatch-expr disasm-expr)) + (alist-ref mode addrmodes))) + `(((,(+ opcode addrcode) 16 host) . ,bitmatch-expr) + ,(list 'quasiquote (cons mnemonic disasm-expr))))) + modes))) + (reverse opcodes)) + (else "Unrecognised instruction" insn)))))) + +(define (bpf-bytecode->exprs bytecode) + (let lp ((bytecode bytecode) + (exprs '())) + (bitmatch bytecode + (((insn 64 bitstring) (rest bitstring)) + (lp rest (cons (disassemble-instruction insn) exprs))) + (() (reverse exprs))))) + +) -- cgit v1.2.3