summaryrefslogtreecommitdiff
path: root/bpf-assembler.scm
blob: 207b271d8bcb1ed3b489f8f2583310ea194d59a7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
;;;
;;; Assembler/disassembler between S-expressions and BPF bytecode
;;;
;;; Copyright (c) 2013 by Peter Bex, see file COPYING.BSD
;;;
(module bpf-assembler
        (exprs->bpf-bytecode bpf-bytecode->exprs
         read-decimal-bpf-bytecode write-decimal-bpf-bytecode)

(import chicken scheme)
(use extras srfi-1 bitstring matchable)

(define (uint? x) (and (integer? x) (or (zero? x) (positive? x))))
;; Labels are jump offset from *next* instruction (a little surprising perhaps)
(define (label? x) (and (integer? x) (<= 0 x 256)))

(use-for-syntax srfi-1 matchable)

;; Basic idea stolen from sixtyfive-oh-two.  Thanks, Felix! ;)
(begin-for-syntax
 (import chicken)
 
 (define addrmodes '())
 (define opcodes   '())

 ;; This one is hairy and only semi-hygienic (or some such) due to the
 ;; construction of "quoted code", which is then picked apart by the ir macros
 (define-syntax defaddrmode
   (syntax-rules (=>)
     ((_ ?id ?code (?match-expr => ?k-expr) (?k-bit => ?disasm))
      (defaddrmode ?id ?code
        (?match-expr => 0 0 ?k-expr)
        (0 0 ?k-bit => ?disasm)))
     ((_ ?id ?code
         (?match-expr => ?jt ?jf ?k-expr)
         (?jt-bit ?jf-bit ?k-bit => ?disasm))
      (set! addrmodes
        (cons (cons '?id (list ?code
                               (list '?match-expr '?jt '?jf '?k-expr)
                               (list '((?jt-bit 8) (?jf-bit 8) (?k-bit 32 host))
                                     '?disasm)))
              addrmodes)))))

 ;; XXX TODO: Use the constants from net/bpf.h?
 
 ;; Ordered by complexity
 (defaddrmode no-operands #x00 (() => 0) (_ => ()))
 (defaddrmode packet-length #x80 (('len) => 0) (_ => (len)))
 (defaddrmode index-register #x08 (('x) => 0) (_ => (x)))
 (defaddrmode accumulator-register #x10 (('a) => 0) (_ => (a)))
 (defaddrmode immediate #x00       ; Also used for JA aka JMP w/ label
   (((and k (? uint?))) => k)
   (k => (,k)))
 (defaddrmode memory-ref #x60
   ((('mem (and k (? uint?)))) => k)
   (k => ((mem ,k))))
 (defaddrmode memory-set! #x00  ; No corresponding definition in bpf.h
   ((('mem (and k (? uint?)))) => k)
   (k => ((mem ,k))))
 (defaddrmode packet-ref #x20
   ((('pkt (and k (? uint?)))) => k)
   (k => ((pkt ,k))))
 (defaddrmode packet-ref/index-register #x40
   ((('pkt 'x (and k (? uint?)))) => k)
   (k => ((pkt x ,k))))
 (defaddrmode packet-ref/hack #xa0 ; BPF_MSH; an "efficiency hack", says bpf(4)
   ((('pkt '4* (and k (? uint?)))) => k)
   (k => ((pkt 4* ,k))))
 (defaddrmode conditional-jump-immediate #x00
   (((and k (? uint?)) (and jt (? label?)) (and jf (? label?))) => jt jf k)
   (jt jf k => (,k ,jt ,jf)))
 (defaddrmode conditional-jump-index-register #x08
   (('x (and jt (? label?)) (and jf (? label?))) => jt jf 0) ; zero?
   (jt jf _ => (x ,jt ,jf)))
 
 (define-syntax defop
   (syntax-rules ()
     ((_ ?mnemonic ?opcode ?addrmode0 ...)
      (set! opcodes (cons (cons '?mnemonic (list ?opcode '?addrmode0 ...))
                          opcodes)))))

 ;; Define mnemonic, basic opcode and addressing modes for each instruction.
 ;; The addressing mode is ORed into the basic opcode to get a full opcode.
 (defop ld   #x00
   packet-length immediate memory-ref packet-ref packet-ref/index-register)
 (defop ldh  #x08 packet-ref packet-ref/index-register)
 (defop ldb  #x10 packet-ref packet-ref/index-register)
 (defop ldx  #x01 packet-length immediate memory-ref)
 (defop ldxb #x11 packet-ref/hack)      ; This is how tcpdump prints ldx/msh
 (defop st   #x02 memory-set!)
 (defop stx  #x03 memory-set!)
 (defop jmp  #x05 immediate)            ; aka JA
 (defop jeq  #x15 conditional-jump-immediate conditional-jump-index-register)
 (defop jgt  #x25 conditional-jump-immediate conditional-jump-index-register)
 (defop jge  #x35 conditional-jump-immediate conditional-jump-index-register)
 (defop jset #x45 conditional-jump-immediate conditional-jump-index-register)
 (defop add  #x04 index-register immediate)
 (defop sub  #x14 index-register immediate)
 (defop mul  #x24 index-register immediate)
 (defop div  #x34 index-register immediate)
 (defop or   #x44 index-register immediate)
 (defop and  #x54 index-register immediate)
 (defop lsh  #x64 index-register immediate)
 (defop rsh  #x74 index-register immediate)
 (defop neg  #x84 no-operands)
 ;; index-register is acceptable, according to comment in bpf.h
 (defop ret  #x06 accumulator-register immediate index-register)
 (defop tax  #x07 no-operands)
 (defop txa  #x87 no-operands))

;;;;;;;;;;;;;;;;
;;; Assembly ;;;
;;;;;;;;;;;;;;;;

;; XXX TODO: Use the structs from net/bpf.h?  bitstring can go, then.

(define-syntax assemble-instruction
  (ir-macro-transformer
   (lambda (e i c)
     `(let ((insn ,(cadr e)))
        (match insn
          ((opcode args ...)
           (case opcode
             ,@(map (match-lambda
                      ((mnemonic opcode modes ...)
                       `((,mnemonic)
                         (match args
                           ,@(map (lambda (mode)
                                    (match-let (((addrcode (match-expr jt jf k-expr) _)
                                                 (alist-ref mode addrmodes)))
                                      `(,match-expr
                                        (bitconstruct
                                         (,(+ opcode addrcode) 16 host)
                                         (,jt) (,jf) (,k-expr 32 host)))))
                                  modes)
                           (else (error "Unknown addressing mode" insn))))))
                    (reverse opcodes))
             (else (error "Unknown BPF opcode in expression" insn))))
          (else (error "Invalid BPF instruction" insn)))))))

(define (exprs->bpf-bytecode exprs)
  (if (null? exprs)
      (bitconstruct)
      (bitstring-append
       (assemble-instruction (car exprs))
       (exprs->bpf-bytecode (cdr exprs)))))

;;;;;;;;;;;;;;;;;;;
;;; Disassembly ;;;
;;;;;;;;;;;;;;;;;;;

(define-syntax disassemble-instruction
  (ir-macro-transformer
   (lambda (e i c)
     `(let ((insn ,(cadr e)))
        (bitmatch
         insn
         ,@(append-map
            (match-lambda
              ((mnemonic opcode modes ...)
               (map (lambda (mode)
                      (match-let (((addrcode _ (bitmatch-expr disasm-expr))
                                   (alist-ref mode addrmodes)))
                        `(((,(+ opcode addrcode) 16 host) . ,bitmatch-expr)
                          ,(list 'quasiquote (cons mnemonic disasm-expr)))))
                    modes)))
                (reverse opcodes))
         (else (error "Unrecognised instruction" (bitstring->list insn 8))))))))

(define (bpf-bytecode->exprs bytecode)
  (let lp ((bytecode bytecode)
           (exprs '()))
    (bitmatch bytecode
              (((insn 64 bitstring) (rest bitstring))
               (lp rest (cons (disassemble-instruction insn) exprs)))
              (() (reverse exprs)))))

(define (read-number port)
  (read-token char-whitespace? port)  ; First, skip any whitespace
  (let* ((token (read-token (lambda (x) (not (char-whitespace? x))) port))
         (num (string->number token)))
    (unless (and num (integer? num) (or (zero? num) (positive? num)))
      (receive pos (port-position port)
        (error "Expected to read a positive integer" port pos token num)))
    num))

;; For reading bytecode from tcpdump's -dd output
(define (read-decimal-bpf-bytecode
         #!optional (port (current-input-port)) (count #f))
  (let lp ((pos 0)
           (count? (or (eq? count #f)
                       (and (integer? count) (positive? count))))
           (count (or count (read-number port)))
           (bytecode (bitconstruct)))
    (cond ((and count? (>= pos count)) bytecode)
          ((eof-object? (peek-char port))
           (if (not count?)
               bytecode
               (error "Expected more instructions due to count" count)))
          (else
           (let* ((op (read-number port))
                  (jt (read-number port))
                  (jf (read-number port))
                  (k (read-number port)))
             (lp (fx+ pos 1) count? count
                 (bitstring-append
                  bytecode (bitconstruct (op 16 host) (jt) (jf) (k 32 host)))))))))

(define (write-decimal-bpf-bytecode
         code #!optional (port (current-output-port)) raw-code?)
  (let ((code (->bitstring code)))
    (unless raw-code?
      (fprintf port "~A\n" (quotient (bitstring-length code) 64)))
    (let lp ((code code))
      (bitmatch code
                (((op 16 host) (jt) (jf) (k 32 host) (rest bitstring))
                 (fprintf port "~A ~A ~A ~A\n" op jt jf k)
                 (lp rest))
                (() (void))))))

)