;;; ;;; Assembler/disassembler between S-expressions and BPF bytecode ;;; ;;; Copyright (c) 2013 by Peter Bex, see file COPYING.BSD ;;; (module bpf-assembler (exprs->bpf-bytecode bpf-bytecode->exprs read-decimal-bpf-bytecode write-decimal-bpf-bytecode) (import chicken scheme) (use extras srfi-1 bitstring matchable) (define (uint? x) (and (integer? x) (or (zero? x) (positive? x)))) ;; Labels are jump offset from *next* instruction (a little surprising perhaps) (define (label? x) (and (integer? x) (<= 0 x 256))) ;; Basic idea stolen from sixtyfive-oh-two. Thanks, Felix! ;) (begin-for-syntax (import chicken) (use srfi-1 matchable) (define addrmodes '()) (define opcodes '()) ;; This one is hairy and only semi-hygienic (or some such) due to the ;; construction of "quoted code", which is then picked apart by the ir macros (define-syntax defaddrmode (syntax-rules (=>) ((_ ?id ?code (?match-expr => ?k-expr) (?k-bit => ?disasm)) (defaddrmode ?id ?code (?match-expr => 0 0 ?k-expr) (0 0 ?k-bit => ?disasm))) ((_ ?id ?code (?match-expr => ?jt ?jf ?k-expr) (?jt-bit ?jf-bit ?k-bit => ?disasm)) (set! addrmodes (cons (cons '?id (list ?code (list '?match-expr '?jt '?jf '?k-expr) (list '((?jt-bit 8) (?jf-bit 8) (?k-bit 32 host)) '?disasm))) addrmodes))))) ;; XXX TODO: Use the constants from net/bpf.h? ;; Ordered by complexity (defaddrmode no-operands #x00 (() => 0) (_ => ())) (defaddrmode packet-length #x80 (('len) => 0) (_ => (len))) (defaddrmode index-register #x08 (('x) => 0) (_ => (x))) (defaddrmode accumulator-register #x10 (('a) => 0) (_ => (a))) (defaddrmode immediate #x00 ; Also used for JA aka JMP w/ label (((and k (? uint?))) => k) (k => (,k))) (defaddrmode memory-ref #x60 ((('mem (and k (? uint?)))) => k) (k => ((mem ,k)))) (defaddrmode memory-set! #x00 ; No corresponding definition in bpf.h ((('mem (and k (? uint?)))) => k) (k => ((mem ,k)))) (defaddrmode packet-ref #x20 ((('pkt (and k (? uint?)))) => k) (k => ((pkt ,k)))) (defaddrmode packet-ref/index-register #x40 ((('pkt 'x (and k (? uint?)))) => k) (k => ((pkt x ,k)))) (defaddrmode packet-ref/hack #xa0 ; BPF_MSH; an "efficiency hack", says bpf(4) ((('pkt '4* (and k (? uint?)))) => k) (k => ((pkt 4* ,k)))) (defaddrmode conditional-jump-immediate #x00 (((and k (? uint?)) (and jt (? label?)) (and jf (? label?))) => jt jf k) (jt jf k => (,k ,jt ,jf))) (defaddrmode conditional-jump-index-register #x08 (('x (and jt (? label?)) (and jf (? label?))) => jt jf 0) ; zero? (jt jf _ => (x ,jt ,jf))) (define-syntax defop (syntax-rules () ((_ ?mnemonic ?opcode ?addrmode0 ...) (set! opcodes (cons (cons '?mnemonic (list ?opcode '?addrmode0 ...)) opcodes))))) ;; Define mnemonic, basic opcode and addressing modes for each instruction. ;; The addressing mode is ORed into the basic opcode to get a full opcode. (defop ld #x00 packet-length immediate memory-ref packet-ref packet-ref/index-register) (defop ldh #x08 packet-ref packet-ref/index-register) (defop ldb #x10 packet-ref packet-ref/index-register) (defop ldx #x01 packet-length immediate memory-ref) (defop ldxb #x11 packet-ref/hack) ; This is how tcpdump prints ldx/msh (defop st #x02 memory-set!) (defop stx #x03 memory-set!) (defop jmp #x05 immediate) ; aka JA (defop jeq #x15 conditional-jump-immediate conditional-jump-index-register) (defop jgt #x25 conditional-jump-immediate conditional-jump-index-register) (defop jge #x35 conditional-jump-immediate conditional-jump-index-register) (defop jset #x45 conditional-jump-immediate conditional-jump-index-register) (defop add #x04 index-register immediate) (defop sub #x14 index-register immediate) (defop mul #x24 index-register immediate) (defop div #x34 index-register immediate) (defop or #x44 index-register immediate) (defop and #x54 index-register immediate) (defop lsh #x64 index-register immediate) (defop rsh #x74 index-register immediate) (defop neg #x84 no-operands) ;; index-register is acceptable, according to comment in bpf.h (defop ret #x06 accumulator-register immediate index-register) (defop tax #x07 no-operands) (defop txa #x87 no-operands)) ;;;;;;;;;;;;;;;; ;;; Assembly ;;; ;;;;;;;;;;;;;;;; ;; XXX TODO: Use the structs from net/bpf.h? bitstring can go, then. (define-syntax assemble-instruction (ir-macro-transformer (lambda (e i c) `(let ((insn ,(cadr e))) (match insn ((opcode args ...) (case opcode ,@(map (match-lambda ((mnemonic opcode modes ...) `((,mnemonic) (match args ,@(map (lambda (mode) (match-let (((addrcode (match-expr jt jf k-expr) _) (alist-ref mode addrmodes))) `(,match-expr (bitconstruct (,(+ opcode addrcode) 16 host) (,jt) (,jf) (,k-expr 32 host))))) modes) (else (error "Unknown addressing mode" insn)))))) (reverse opcodes)) (else (error "Unknown BPF opcode in expression" insn)))) (else (error "Invalid BPF instruction" insn))))))) (define (exprs->bpf-bytecode exprs) (if (null? exprs) (bitconstruct) (bitstring-append (assemble-instruction (car exprs)) (exprs->bpf-bytecode (cdr exprs))))) ;;;;;;;;;;;;;;;;;;; ;;; Disassembly ;;; ;;;;;;;;;;;;;;;;;;; (define-syntax disassemble-instruction (ir-macro-transformer (lambda (e i c) `(let ((insn ,(cadr e))) (bitmatch insn ,@(append-map (match-lambda ((mnemonic opcode modes ...) (map (lambda (mode) (match-let (((addrcode _ (bitmatch-expr disasm-expr)) (alist-ref mode addrmodes))) `(((,(+ opcode addrcode) 16 host) . ,bitmatch-expr) ,(list 'quasiquote (cons mnemonic disasm-expr))))) modes))) (reverse opcodes)) (else (error "Unrecognised instruction" (bitstring->list insn 8)))))))) (define (bpf-bytecode->exprs bytecode) (let lp ((bytecode bytecode) (exprs '())) (bitmatch bytecode (((insn 64 bitstring) (rest bitstring)) (lp rest (cons (disassemble-instruction insn) exprs))) (() (reverse exprs))))) (define (read-number port) (read-token char-whitespace? port) ; First, skip any whitespace (let* ((token (read-token (lambda (x) (not (char-whitespace? x))) port)) (num (string->number token))) (unless (and num (integer? num) (or (zero? num) (positive? num))) (receive pos (port-position port) (error "Expected to read a positive integer" port pos token num))) num)) ;; For reading bytecode from tcpdump's -dd output (define (read-decimal-bpf-bytecode #!optional (port (current-input-port)) (count #f)) (let lp ((pos 0) (count? (or (eq? count #f) (and (integer? count) (positive? count)))) (count (or count (read-number port))) (bytecode (bitconstruct))) (cond ((and count? (>= pos count)) bytecode) ((eof-object? (peek-char port)) (if (not count?) bytecode (error "Expected more instructions due to count" count))) (else (let* ((op (read-number port)) (jt (read-number port)) (jf (read-number port)) (k (read-number port))) (lp (fx+ pos 1) count? count (bitstring-append bytecode (bitconstruct (op 16 host) (jt) (jf) (k 32 host))))))))) (define (write-decimal-bpf-bytecode code #!optional (port (current-output-port)) raw-code?) (let ((code (bitstring-of-any code))) (unless raw-code? (fprintf port "~A\n" (quotient (bitstring-length code) 64))) (let lp ((code code)) (bitmatch code (((op 16 host) (jt) (jf) (k 32 host) (rest bitstring)) (fprintf port "~A ~A ~A ~A\n" op jt jf k) (lp rest)) (() (void)))))) )