Return Styles: Pseud0ch, Terminal, Valhalla, NES, Geocities, Blue Moon.

Pages: 1-

disassembler in Lisp

Name: Anonymous 2010-12-22 3:26


"8b 04 9b b8 01 00 00 00 90".split |> map asBase,16,? |> x86deasm
((mov eax [ebx+4*ebx]) (mov eax 1) (nop))

Name: Anonymous 2010-12-22 3:37

>>1
Is that Haskell?

Name: Anonymous 2010-12-22 3:38

>>2
Lisp

Name: Anonymous 2010-12-22 3:41

>>3
Why that faggy Haskell/Ruby-like syntax?
Where are the parentheses?

Name: Anonymous 2010-12-22 3:41


<: X86 disassembler :>

load "tests/x86opcodeTable.st"

// keeps reference to memory from instruction seg:[base+index*scale+disp]
class X86Sib
  seg   = 'DS // base segment
  index = no  // this register keeps index in data array
  base  = no  // base register keeps start of data array
  scale = 1   // scale for index (2, 4, 8, or 16) (array element size)
  disp  = 0   // signed displacement

class X86Inst
  form       // formal representation of this opcode
  bytes      // byte representation
  args       // immediate operands
  ip         // offset in memory of this instruction
  mode       // 32-bit or 16-bit
  MOD REG RM // decoded mod/rm
  segovr     // segment override
  oprovr?    // address size override
  adrovr?    // operand szie override
  lock?      // makes instruction atomic
  repe?
  repne?

oprSz i -> if i.mode==32 (if i.oprovr? 2 4) (if i.oprovr? 4 2)
adrSz i -> if i.mode==32 (if i.adrovr? 2 4) (if i.adrovr? 4 2)

regArg? a -> bfnd a '(AH  BH  CH  CS  DH DS ES FLG FS GS SS eAX eBP eBX eCX
                      eDI eDX eSI eSP)

extendedReg? a -> bfnd a '(eAX eBP eBX eCX eDI eDX eSI eSP)

modrmMapGPR =: '(eAX eCX eDX eBX eSP eBP eSI eDI)
modrmMapSeg =: '( ES  CS  SS  DS  FS  GS eSI eDI)
modrmMapCR  =: '(CR0 CR1 CR2 CR3 CR4 CR5 CR6 CR7) // control regs
modrmMapTR  =: '(TR0 TR1 TR2 TR3 TR4 TR5 TR6 TR7) // test regs
modrmMapDR  =: '(DR0 DR1 DR2 DR3 DR4 DR5 DR6 DR7) // debug regs
modrmMapMM  =: '(MM0 MM1 MM2 MM3 MM4 MM5 MM6 MM7) // mmx regs
modrmMapXM  =: '(XM0 XM1 XM2 XM3 XM4 XM5 XM6 XM7) // sse regs


gprTo16bit r -> r.{eAX=>AX; eCX=>CX; eDX=>DX; eBX=>BX
                   eSP=>SP; eBP=>BP; eSI=>SI; eDI=>DI}

decSib4disp sib rs as bs i -> [sib.disp:!S4 @bs]:bs decArg [@rs sib] as bs i

decSib4 sib rs as bs i ->
  i.MOD.{0 => i.RM.{_ => decArg [@rs sib] as bs i
                    5 => decSib4disp sib rs as bs i}
         1 => [v:!S1 @bs]:bs sib.disp:v sib.base:sib.base.gprTo16bit \
              decArg [@rs sib] as bs i
         2 => decSib4disp sib rs as bs i
         3 => decArg [@rs sib] as bs i}

decSibByte rs as [b@bs] i -> do
  sib:(X86Sib scale=1<<(ldb 6 2 b)
              index=modrmMapGPR|(ldb 3 3 b) base=modrmMapGPR|(ldb 0 3 b))

  // don't use base
  (if i.MOD == 0 && sib.base == 'eBP then do sib.base=:no)

  (if sib.index == 'eSP then do // dont use scaled index
     sib.index =: no
     sib.scale =: 1)
  (decSib4 sib rs as bs i)

modrmMapSib =: '((DS eSI eBX) (DS eDI eBX) (DS eSI eBP) (DS eDI eBP)
                 (DS no  eSI) (DS no  eDI) (SS no  eBP) (DS no  eBX))

decSib2 sib rs as bs inst -> [sib.seg sib.index sib.base]:modrmMapSib|i.RM
                             decArg [@rs sib] as bs inst

decMem p s rs as bs i ->
  sib:(X86Sib)
  i.oprSz.{_ => i.MOD.{0 => i.RM.{_ => decSib2 sib rs as bs i
                                  6 => [sib.disp:!S2 @bs]:bs \
                                       decArg [@rs sib] as bs i}
                       1 => [sib.disp:!S1 @bs]:bs decSib2 sib as bs i
                       2 => [sib.disp:!S2 @bs]:bs decSib2 sib as bs i
                       3 => decArg [@rs sib] as bs i}
           4 => i.RM.{_ => sib.base:(if i.MOD!=0||i.RM!=5 modrmMapGPR|i.REG) \
                           decSib4 sib rs as bs i
                      4 => decSibByte rs as bs i}}


decImm s rs as bs i ->
  s.{ _   => error "decImm: invalid imm type"
     'si  => error "decImm: unexpected type `$s`"
     'c   => i.oprSz.{_ =>[v     @bs]:bs decArg [@rs v] as bs i
                      4 =>[v:!U2 @bs]:bs decArg [@rs v] as bs i}
     'pi  => error "decImm: unexpected type `$s`"
     'ps  => decArg [@rs take,16,bs] as drop,16,bs i
     'ss  => error "decImm: unexpected type `$s`"
     'q   => decArg [@rs take, 8,bs] as drop, 8,bs i
     'dq  => decArg [@rs take,16,bs] as drop,16,bs i
     'a   => i.adrSz.{_ =>decArg [@rs take,4,bs] as drop,4,bs i
                      4 =>decArg [@rs take,8,bs] as drop,8,bs i}
     'p   => i.oprSz.{_=>[seg:!U2 v:!U2 @bs]:bs decArg [@rs[seg*16 v]] as bs i
                      4=>[seg:!U2 v:!U4 @bs]:bs decArg [@rs[seg*16 v]] as bs i}
     's   => error "decImm: unexpected type `$s`"
     'b   => [v:!U1 @bs]:bs decArg [@rs v] as bs i
     'w   => [v:!U2 @bs]:bs decArg [@rs v] as bs i
     'd   => [v:!U4 @bs]:bs decArg [@rs v] as bs i
     'v   => i.oprSz.{_ =>[v:!U2 @bs]:bs decArg [@rs v] as bs i
                      4 =>[v:!U4 @bs]:bs decArg [@rs v] as bs i}}

decOff rs as bs i ->
  i.adrSz.{_ =>[v:!U2 @bs]:bs decArg [@rs v] as bs i
           4 =>[v:!U4 @bs]:bs decArg [@rs v] as bs i}

normalizeReg r -> cnd {r.extendedReg? => r.gprTo16bit
                       r.x86Sib?      => do r.index =: r.index.normalizeReg \
                                            r.base =: r.base.normalizeReg \
                                            r
                       ye             => r}

normalizeRegs i -> if i.oprSz == 4
                     then i
                     else set i.args (map normalizeReg i.args)


decArg rs as bs i -> do
  i.args =: rs
  i.bytes =: (take i.bytes.len-bs.len i.bytes)
  i.normalizeRegs

A.decArg rs [[p s]@as] bs i ->
  p.{default  => decArg [@rs p] as bs i // take it from form
     'D       => decArg [@rs modrmMapDR |i.REG] as bs i
     'C       => decArg [@rs modrmMapCR |i.REG] as bs i
     'T       => decArg [@rs modrmMapTR |i.REG] as bs i
     'P       => decArg [@rs modrmMapMM |i.REG] as bs i
     'V       => decArg [@rs modrmMapXM |i.REG] as bs i
     'Q       => i.MOD.{_ => decMem p s rs as bs i
                        3 => decArg [@rs modrmMapMM |i.RM] as bs i}
     'W       => i.MOD.{_ => decMem p s rs as bs i
                        3 => decArg [@rs modrmMapXM |i.RM] as bs i}
     'E       => i.MOD.{_ => decMem p s rs as bs i
                        3 => decArg [@rs modrmMapGPR|i.RM] as bs i}
     'A       => decImm s rs as bs i
     'O       => decOff rs as bs i
     'S       => decArg [@rs modrmMapSeg|i.REG] as bs i
     'J       => decImm s rs as bs i
     'I       => decImm s rs as bs i
     'M       => decMem p s rs as bs i
     'R       => decArg [@rs modrmMapGPR|i.RM ] as bs i
     'G       => decArg [@rs modrmMapGPR|i.REG] as bs i
     !regArg? => decArg [@rs p] as bs i}

decArgs bs i -> decArg rs i.form.args bs i

mergeArgs [oP oS] [nP nS] -> [nP||oP nS||oS]
mergeForms old new -> oa:old.args na:new.args
  set old.args strip,nil,[(mergeArgs oa|0 na|0) (mergeArgs oa|1 na|1)
                          (mergeArgs oa|2 na|2)]

decGrpL n bs i ->
  decArgs bs set,i.form,(mergeForms i.form x86opsExt1|(or n<<3 i.REG))


decGrpH n bs i ->
  i.MOD.{_ =>decGrpH n bs i
         3 =>decArgs bs set,i.form,(mergeForms i.form x86opsExt2|(or n i.REG))}

decExt bs i -> c:i.form.cmd
  c.{default  => decArgs bs i
     ['grp n] => (if n<9 decGrpL decGrpH) n bs i}

decMODRM bs i ->
  i.form.modrm?.{_  => [b@bs]:bs \
                       [i.MOD i.REG i.RM]:[ldb,6,2,b ldb,3,3,b ldb,0,3,b] \
                       decExt bs i
                 no => decArgs bs i}

decPrefix [b@bs] i -> do o:x86ops|b
  o.cmd.{default => decMODRM  bs     (set i.form    o)
         'x86ops => decMODRM  bs.ltl (set i.form    x86ops0F|bs.lhd)
         'segovr => decPrefix bs     (set i.seg     o.args.lhd)
         'oprovr => decPrefix bs     (set i.oprovr? ye)
         'adrovr => decPrefix bs     (set i.adrovr? ye)
         'lock   => decPrefix bs     (set i.lock?   ye)
         'repe   => decPrefix bs     (set i.repe?   ye)
         'repne  => decPrefix bs     (set i.repne?  ye)}

x86decode1 bs mode=32 -> decPrefix bs (X86Inst bytes=bs mode=mode)

x86decode bs mode=32 -> f [] bs
  where f:{rs _     => rs
           rs [@bs] => i:(x86decode1 bs mode=mode) \
                       if i.form.cmd (rec [@rs i] drop,i.bytes.len,bs) r}

deasmSib x -> seg :x.seg.downcase   scale:x.scale  index:x.index.downcase
              base:x.base.downcase  disp :x.disp
              r:index
              r:(if r && x.scale != 1 then #(`*` $scale $r) else r)
              r:(if r then (if base then #(`+` $base $r) else r)
                      else base)
              r:(if r then (if disp !=0 then #(`+` $disp $r) else r)
                      else disp)
              r:#[$r]
              r:(if seg != 'ds then #(`:` $seg $r) else r)
              r

deasmArg x -> cnd {x.sym?    => x.downcase
                   x.x86Sib? => x.deasmSib
                   ye        => x}

x86deasm1 i -> [i.form.cmd @(map deasmArg i.args)]

x86deasm bs mode=32 -> map x86deasm1 (x86decode bs mode=mode)

Name: Anonymous 2010-12-22 3:50

>>5
No parentheses
Syntax everywhere
class
//-style comments


IHBT or you're a really bad person who killed Lisp with some macros.

Name: Anonymous 2010-12-22 3:55

>>6
If list-processing language provides macros and access to AST, then this language is a LISP, even if without parens. Prove me wrong.

Name: Anonymous 2010-12-22 3:58

>>7
LISP is not about macros and AST, it's about ((parentheses)), absence of syntax and ``ABSTRACT BULLSHITE''.

Name: Anonymous 2010-12-22 4:03

>>8
disregard that LISP stands for LISt Processing language.

Name: Anonymous 2010-12-22 4:05

>absence of syntax
please, tell us about '@,`"#\

Name: Anonymous 2010-12-22 4:14

>>10
Real men use (quote), (quasiquote), (unquote) and (unquote-splice).
How "strings" and (#c #h #a #r #s) are syntax? They are data types.

IHSBT

Name: Anonymous 2010-12-22 4:15

>>11
*(#\c #\h #\a #\r #\s)

Name: Anonymous 2010-12-22 4:16

>>11
You still can use list of symbols for them: '(c h a r s)
And what about (a . b)?

Name: Anonymous 2010-12-22 4:22

>>13
(cons (quote a) (quote b))

Symbols are not strings.
Symbols are case insensitive, case insensitive strings and characters would be pretty stupid, don't you think?

Name: Anonymous 2010-12-22 4:25

>>14

(make-string-from-symbols '(c h a r s))

Name: Anonymous 2010-12-22 4:26

>>14
>Symbols are case insensitive
|whatAboutYourMom?|

Name: Anonymous 2010-12-22 4:28

>>15

1 ]=> (eq? 'a 'A)
;Value: #t
1 ]=> (eq? #\a #\A)
;Value: #f

Name: Anonymous 2010-12-22 4:28

>case insensitive strings and characters would be pretty stupid, don't you think?
Older computers were case insensitive. Nobody complained. Character case is overrated.

Name: Anonymous 2010-12-22 4:30

>>17
>(eq? 'a 'A)
Get yourself a modern Lisp, like Clojure. Dont be a caveman.

Name: Anonymous 2010-12-22 4:31

Name: Anonymous 2010-12-22 4:35

>>20
wow! he is mad

Name: Anonymous 2010-12-22 4:39

In CL, symbols aren't really case insensitive, just by default (readtable-case *readtable*) is :UPCASE, which means that a symbol's name is upcased upon reading. Comparison of symbol names is case sensitive. If you were to change (readtable-case *readtable*) to :preserve, your symbols would be read as they are writen (however, if you want this to apply to everything else, you'll have to recompile all that code read symbols with the default readtable-case of :upcase). At minimum, the standard says that implementations must support the following possible values for a readtable's readtable-case: :upcase, :downcase, :preserve, or :invert.

Name: Anonymous 2010-12-22 4:41

>>22
Also, in R6RS, symbols are case sensitive.

Name: Anonymous 2010-12-22 5:12

Dun dun dunnnnnn x86 dreams.
Dun dun dunnnnnn x86 dreams
Dun dun dunnnnnn x86 dreams

Name: Anonymous 2010-12-22 8:31

>>1
I know you, bastard.

Name: Anonymous 2010-12-22 11:01

>>1
What is [ebx+4*ebx] in your output? Is it a symbol named like that or is it an object printed like that (for example, you could have a reader macro for [ which properly parses that)?

Name: Anonymous 2010-12-22 13:46

>>26
I don't think you know about ASM syntax.

Name: Anonymous 2010-12-22 15:01

>>27
You're wrong here. I am both intimately familiar with both Common Lisp and x86 asm. I've used some other people's x86 disassembler/assembler's written in CL, and I've been a bit disappointed about the handling of dereferenced operands, either they handled it as a symbol (like: '|[reg+x*reg+offs]|) or as a list of symbols or as a special operand instance (of some class). Alternate handlings could be a pprint handler which recognizes the list and so on. CL is quite flexible in that in OP's example, the [ebx+4*ebx] text could actually be caused by many things, such as a symbol named like that, a list or some other object which is handled/detected specially by a pretty printer dispatch handler, or an class or structure instance which has a special way of printing itself (can be defined in many ways, but common ones would be defining a method on the print-object generic function, or a print-function in a defstruct, or by defining a pprint dispatcher using set-pprint-dispatch or whatever, there are plenty of ways to play around with the printer). However, just modifying the printer is usually not enough as you might want a piece of text to be read back in as an object of sort, in which case you modify the readtable to handle that.

My question was on the nature of that [ebx+4*ebx] text (its internal representation) as that entails very different ways of processing the operands programmatically, and some representations may mean a lot more chore work, while others may be more easy to destructure and process.

Name: Anonymous 2010-12-22 16:53

>>26
[ebx+4*ebx] is a usual prefix expression pretty-printed in it's infix form. It isn't a symbol.

Name: Anonymous 2010-12-22 17:44

>>25
The only real wisdom is knowing you know nothing. -- Socrates

Name: Anonymous 2010-12-22 17:51

>>29
Ah, okay, makes sense... so something like '(deref (pp+ ebx (pp* 4 ebx))) and have the prettyprinter catch deref or [/m]pp+[/m], [/m]pp-[/m] (or maybe just make that into +/* and only have the preporcessor catch the dereference symbol).

Name: Anonymous 2010-12-22 18:40

>>31
Not exactly. You input #[$base+$scale*$index] - in REPL and get your usual `(list (+ ,base (* ,scale ,index))), that is pretty-printed back as [ebx+4*ebx], using information about operators from reader's table. Of course, if you implement something like this yourself, you should provide a way to turn pretty-printing off, so that confused user could see original AST.

Name: Anonymous 2010-12-24 3:47

>>8
absence of syntax
((parentheses))
what?

Don't change these.
Name: Email:
Entire Thread Thread List