summaryrefslogtreecommitdiff
path: root/bpf-assembler.scm
diff options
context:
space:
mode:
authorPeter Bex <peter@more-magic.net>2013-05-04 15:52:35 +0200
committerPeter Bex <peter@more-magic.net>2013-05-04 15:52:35 +0200
commitc464a04a8c43e51dbf56e8c4f0be7eef93ed47e9 (patch)
treef699e5a6480b98e60317f8136b91695ce8442c87 /bpf-assembler.scm
downloadbpf-c464a04a8c43e51dbf56e8c4f0be7eef93ed47e9.tar.gz
Initial version of BPF assembler/disassembler with a handful of basic internal consistency tests
Diffstat (limited to 'bpf-assembler.scm')
-rw-r--r--bpf-assembler.scm163
1 files changed, 163 insertions, 0 deletions
diff --git a/bpf-assembler.scm b/bpf-assembler.scm
new file mode 100644
index 0000000..4cecd88
--- /dev/null
+++ b/bpf-assembler.scm
@@ -0,0 +1,163 @@
+(module bpf-assembler ; Maybe I should call this "bassie" ;)
+ (exprs->bpf-bytecode bpf-bytecode->exprs)
+
+(import chicken scheme)
+(use srfi-1 bitstring matchable)
+
+(define (uint? x) (and (integer? x) (positive? x)))
+;; Is a jump of zero allowed? It shouldn't be!
+(define (label? x) (and (integer? x) (< 0 x 256)))
+
+;; Basic idea stolen from sixtyfive-oh-two. Thanks, Felix! ;)
+(begin-for-syntax
+ (import chicken)
+ (use srfi-1 matchable)
+
+ (define addrmodes '())
+ (define opcodes '())
+
+ ;; This one is hairy and only semi-hygienic (or some such) due to the
+ ;; construction of "quoted code", which is then picked apart by the ir macros
+ (define-syntax defaddrmode
+ (syntax-rules (=>)
+ ((_ ?id ?code (?match-expr => ?k-expr) (?k-bit => ?disasm))
+ (defaddrmode ?id ?code
+ (?match-expr => 0 0 ?k-expr)
+ (0 0 ?k-bit => ?disasm)))
+ ((_ ?id ?code
+ (?match-expr => ?jt ?jf ?k-expr)
+ (?jt-bit ?jf-bit ?k-bit => ?disasm))
+ (set! addrmodes
+ (cons (cons '?id (list ?code
+ (list '?match-expr '?jt '?jf '?k-expr)
+ (list '((?jt-bit 8) (?jf-bit 8) (?k-bit 32 host))
+ '?disasm)))
+ addrmodes)))))
+
+ ;; Ordered by complexity
+ (defaddrmode no-operands #x80 (() => 0) (_ => ()))
+ (defaddrmode packet-length #x80 (('len) => 0) (_ => (len)))
+ (defaddrmode index-register #x08 (('x) => 0) (_ => (x)))
+ (defaddrmode accumulator-register #x10 (('a) => 0) (_ => (a)))
+ (defaddrmode immediate #x00 ; Also used for JA aka JMP w/ label
+ (((and k (? uint?))) => k)
+ (k => (,k)))
+ (defaddrmode memory-ref #x60
+ ((('mem (and k (? uint?)))) => k)
+ (k => ((mem ,k))))
+ (defaddrmode packet-ref #x20
+ ((('pkt (and k (? uint?)))) => k)
+ (k => ((pkt ,k))))
+ (defaddrmode packet-ref/index-register #x40
+ ((('pkt 'x (and k (? uint?)))) => k)
+ (k => ((pkt x ,k))))
+ (defaddrmode packet-ref/hack #xa0 ; BPF_MSH; an "efficiency hack", says bpf(4)
+ ((('pkt '4* (and k (? uint?)))) => k)
+ (k => ((pkt 4* ,k))))
+ (defaddrmode conditional-jump-immediate #x00
+ (((and k (? uint?)) (and jt (? label?)) (and jf (? label?))) => jt jf k)
+ (jt jf k => (,k ,jt ,jf)))
+ (defaddrmode conditional-jump-index-register #x08
+ (('x (and jt (? label?)) (and jf (? label?))) => jt jf 0) ; zero?
+ (jt jf _ => (x ,jt ,jf)))
+
+ (define-syntax defop
+ (syntax-rules ()
+ ((_ ?mnemonic ?opcode ?addrmode0 ...)
+ (set! opcodes (cons (cons '?mnemonic (list ?opcode '?addrmode0 ...))
+ opcodes)))))
+
+ ;; Define mnemonic, basic opcode and addressing modes for each instruction.
+ ;; The addressing mode is ORed into the basic opcode to get a full opcode.
+ (defop ld #x00
+ packet-length immediate memory-ref packet-ref packet-ref/index-register)
+ (defop ldh #x08 packet-ref packet-ref/index-register)
+ (defop ldb #x10 packet-ref packet-ref/index-register)
+ (defop ldx #x01 packet-length immediate memory-ref packet-ref/hack)
+ (defop st #x02 memory-ref)
+ (defop stx #x03 memory-ref)
+ (defop jmp #x05 immediate) ; aka JA
+ (defop jeq #x10 conditional-jump-immediate conditional-jump-index-register)
+ (defop jgt #x20 conditional-jump-immediate conditional-jump-index-register)
+ (defop jge #x30 conditional-jump-immediate conditional-jump-index-register)
+ (defop jset #x40 conditional-jump-immediate conditional-jump-index-register)
+ (defop add #x04 index-register immediate)
+ (defop sub #x14 index-register immediate)
+ (defop mul #x24 index-register immediate)
+ (defop div #x34 index-register immediate)
+ (defop or #x44 index-register immediate)
+ (defop and #x54 index-register immediate)
+ (defop lsh #x64 index-register immediate)
+ (defop rsh #x74 index-register immediate)
+ (defop neg #x84 no-operands)
+ ;; index-register is acceptable, according to comment in bpf.h
+ (defop ret #x06 accumulator-register immediate index-register)
+ (defop tax #x07 no-operands)
+ (defop txa #x87 no-operands))
+
+;;;;;;;;;;;;;;;;
+;;; Assembly ;;;
+;;;;;;;;;;;;;;;;
+
+(define-syntax assemble-instruction
+ (ir-macro-transformer
+ (lambda (e i c)
+ `(let ((insn ,(cadr e)))
+ (match insn
+ ((opcode args ...)
+ (case opcode
+ ,@(map (match-lambda
+ ((mnemonic opcode modes ...)
+ `((,mnemonic)
+ (match args
+ ,@(map (lambda (mode)
+ (match-let (((addrcode (match-expr jt jf k-expr) _)
+ (alist-ref mode addrmodes)))
+ `(,match-expr
+ (bitconstruct
+ (,(+ opcode addrcode) 16 host)
+ (,jt) (,jf) (,k-expr 32 host)))))
+ modes)
+ (else (error "Unknown addressing mode" insn))))))
+ (reverse opcodes))
+ (else (error "Unknown BPF opcode in expression" insn))))
+ (else (error "Invalid BPF instruction" insn)))))))
+
+(define (exprs->bpf-bytecode exprs)
+ (if (null? exprs)
+ (bitconstruct)
+ (bitstring-append
+ (assemble-instruction (car exprs))
+ (exprs->bpf-bytecode (cdr exprs)))))
+
+;;;;;;;;;;;;;;;;;;;
+;;; Disassembly ;;;
+;;;;;;;;;;;;;;;;;;;
+
+(define-syntax disassemble-instruction
+ (ir-macro-transformer
+ (lambda (e i c)
+ `(let ((insn ,(cadr e)))
+ (bitmatch
+ insn
+ ,@(append-map
+ (match-lambda
+ ((mnemonic opcode modes ...)
+ (map (lambda (mode)
+ (match-let (((addrcode _ (bitmatch-expr disasm-expr))
+ (alist-ref mode addrmodes)))
+ `(((,(+ opcode addrcode) 16 host) . ,bitmatch-expr)
+ ,(list 'quasiquote (cons mnemonic disasm-expr)))))
+ modes)))
+ (reverse opcodes))
+ (else "Unrecognised instruction" insn))))))
+
+(define (bpf-bytecode->exprs bytecode)
+ (let lp ((bytecode bytecode)
+ (exprs '()))
+ (bitmatch bytecode
+ (((insn 64 bitstring) (rest bitstring))
+ (lp rest (cons (disassemble-instruction insn) exprs)))
+ (() (reverse exprs)))))
+
+)