SourceForge.net Logo

What Boomerang can do

On this page, we show some of the things that Boomerang can do so far. An attempt has been made to line up equivalent original source, binary, and decompiled source code lines; this is not always possible. Comments in red are not generated by the decompiler; those in black are.

./boomerang -Td test/pentium/sumarray:
Original source code
Disassembled binary code
Decompiled source code
#include <stdio.h>

int a[10] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};  8049460 01000000 02000000 03000000 04000000
 8049470 05000000 06000000 07000000 08000000
 8049480 09000000 0a000000
int a[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
int main() {  8048328:   push   %ebp
 8048329:   mov    %esp,%ebp
 804832b:   sub     $0x8,%esp
 804832e:   and     $0xfffffff0,%esp
 8048331:   mov    $0x0,%eax
 8048336:   sub     %eax,%esp
int main(int argc, char** argv, char** envp)
{
int local1; // m[r28{0} - 8]   // sum
int local2; // m[r28{0} - 12]  // i
    int sum = 0;  8048338:   movl   $0x0,0xfffffffc(%ebp)     local1 = 0;
    int i;
   
for (i=0; i < 10; i++) {
 804833f:    movl   $0x0,0xfffffff8(%ebp)
 8048346:   cmpl   $0x9,0xfffffff8(%ebp)
 804834a:   jle      804834e <main+0x26>
 804834c:   jmp     8048364 <main+0x3c>
    local2 = 0;
    while (local2 <= 9) {
        sum += a[i];  804834e:   mov    0xfffffff8(%ebp),%eax
 8048351:   mov    0x8049460(,%eax,4),%edx
 8048358:   lea     0xfffffffc(%ebp),%eax
 804835b:   add    %edx,(%eax)
        local1 += a[local2];   // sum += a[i]


    }
 804835d:   lea     0xfffffff8(%ebp),%eax
 8048360:   incl    (%eax)
 8048362:   jmp    8048346 <main+0x1e>
        local2++;              // i++

    }
    printf("Sum is %d\n", sum);  8048364:   sub    $0x8,%esp
 8048367:   pushl  0xfffffffc(%ebp)
 804836a:   push   $0x804842c
 804836f:    call     8048268 <printf@plt>
 8048374:   add     $0x10,%esp
    printf("Sum is %d\n", local1);
    return 0;  8048377:   mov    $0x0,%eax     return 0;
}
 804837c:   leave
 804837d:   ret
}

 804842c 53756d20 69732025  Sum is %
 8048434 640a00                    d..


This example shows:
  • Source that that is fairly readable, compiles with no warnings and runs correctly.
  • Conversion of stack locations to local variables
  • Detection,  declaration, use, and initialisation of an array
  • Correct handling of a C string through the use of the string as a parameter to a library function
  • The output from sumarray-O4 (same program compiled with -O4 optimisation) looks much the same (as of September 2004), except that the pretested while loop is replaced by a posttested do while loop.
./boomerang test/pentium/fibo-O4:
Original source code
Disassembled binary code
Decompiled source code
#include <stdio.h>

int main (void)
{   int number, value;
800487cc: push   %ebp
80487cd: mov    %esp,%ebp
80487cf:  sub     $0x4,%esp
80487d2: push   %esi
80487d3: push   %ebx
int main(int argc, char** argv, char** envp)
{
int local0;   // m[r28{-} -8]   // number
int local8;   // r24{39}
int local9;   // r24            // value
    printf ("Input number: "); 80487d4: push   $0x80488b8
80487d9: call    80486bc <printf@plt>
    printf("Input number: ");
    scanf ("%d", &number); 80487de: lea     0xfffffffc(%ebp),%eax
80487e1: push  %eax
80487e2: push   $0x80488c7
80487e7: call     80486cc <scanf@plt>
    scanf("%d", &local0);
    value = fib(number); 80487ec: mov    0xfffffffc(%ebp),%ebx
80487ef: add     $0xc,%esp
80487f2: cmp    $0x1,%ebx
80487f5: jle      8048814 <main+0x48>
80487f7: lea     0xffffffff(%ebx),%eax
80487fa: push   %eax
80487fb: call     8048798 <fib>
8048800: mov   %eax,%esi
8048802: lea     0xfffffffe(%ebx),%eax
8048805: push  %eax
8048806: call    8048798 <fib>
804880b: add    %esi,%eax
804880d: add    $0x8,%esp
8048810: jmp    8048816 <main+0x4a>
8048812: lea     (%esi),%esi
    // The compiler inlined the call to fib
    if (local0 <= 1) {
        local9 = local0;
    } else {
        local8 = fib(local0 - 1);
        local9 = fib(local0 - 2);
        local9 += local8;
    }
    printf("fibonacci(%d) = %d\n", number, value);
8048814: mov    %ebx,%eax
8048816: push   %eax
8048817: pushl  0xfffffffc(%ebp)
804881a: push   $0x80488ca
804881f: call      80486bc <printf@plt>
    printf("fibonacci(%d) = %d\n", local0, local9);
    return (0);
}
8048824: xor     %eax,%eax
8048826: lea     0xfffffff4(%ebp),%esp
8048829: pop    %ebx
804882a: pop    %esi
804882b: leave
804882c: ret
    return 0;
}



int fib (int x)
{
8048798: push   %ebp
8048799: mov    %esp,%ebp
804879b: push   %esi
804879c: push   %ebx
int fib(int param5)
{
int local8; // r24{18}
int local9; // r24        // Return value
    if (x > 1) 804879d: mov    0x8(%ebp),%ebx
80487a0: cmp    $0x1,%ebx
80487a3: jle    80487c0 <fib+0x28>
    if (param1 <= 1) {  // Note test inverted
        return (fib(x - 1) + fib(x - 2)); 80487a5: lea    0xffffffff(%ebx),%eax
80487a8: push   %eax
80487a9: call   8048798 <fib>
80487ae: mov    %eax,%esi
80487b0: lea    0xfffffffe(%ebx),%eax
80487b3: push   %eax
80487b4: call   8048798 <fib>
80487b9: add    %esi,%eax
        local9 = param5;            // ret = x
    else return (x); 80487bb: jmp    80487c2 <fib+0x2a>
80487bd: lea    0x0(%esi),%esi
80487c0: mov    %ebx,%eax
    } else {
        local8 = fib(param5 - 1);  // temp1 = fib(x-1)
        local9 = fib(param5 - 2);  // ret = fib(x-2)
        local9 += local8;            // ret += temp1
    }
}

80487c2: lea    0xfffffff8(%ebp),%esp
80487c5: pop    %ebx
80487c6: pop    %esi
80487c7: leave
80487c8: ret
    return local9;
}

This example shows:
  • Code that compiles and runs correctly
  • Converts appropriate stack locations into arguments and parameters, including taking the address of a local
  • Converts appropriate registers into the return location, and emits the correct return instruction
  • Inverts the then and else clauses of fib() (but the result is still correct and readable)
  • Handles recursion correctly (no mean feat if you don't make assumptions about the calling convention)
  • As above, handles the strings correctly, etc.
./boomerang test/sparc/fromssa2
Original source code
Disassembled binary code
Decompiled source code
void main() {

  int a, x;
10684: save     %sp, -112, %sp int main(int argc, char **argv, char **envp)
{
    int local17;                // argc{37}
    int local18;                // argc{73}      // "old a"
    int local19;                // local18{73}  // a
  a = 0; 10688: clr       %o0
  do {
   
a = a+1;
    x = a;
    printf("%d ", a);
1068c: sethi    %hi(0x10400), %l0
10690: add     %o0, 1, %i0
10694: or        %l0, 872, %o0
10698: call      printf
1069c: mov     %i0, %o1

    argc = 0;                  // Compiler reuses argc for a
    local19 = argc;
    do {
        local18 = local19;
        printf("%d ", local18 + 1);


  } while (a < 10); 106a0: cmp      %i0, 9
106a4: ble        0x10690
106a8: mov      %i0, %o0
        local17 = local18 + 1;
        local19 = local17;
    } while (local18 + 1 <= 9);

  printf("a is %d, x is %d\n", a, x); 106ac: sethi     %hi(0x10400), %g1
106b0: mov      %i0, %o1
106b4: mov      %i0, %o2
106b8: call       printf
106bc: or         %g1, 880, %o0
    printf("a is %d, x is %d\n", local18 + 1, local18 + 1);
  return 0;
}

106c0: ret
106c4: restore  %g0, 0, %o0
    return 0;
}
This example shows:
  • Boomerang can decompile SPARC binary programs
  • Copes with SPARC "register windows"
  • Untangles the "delay slot" instructions (after every call and branch instruction)
  • local19 had to be generated as a result of transforming out of SSA form
  • too many local variables

What Boomerang has done

Over ther period from August to December 2003, the main Boomerang developers used Boomerang (with a lot of help from IDA Pro and a text editor) to recover source code from a real-world Windows executable. Not all of the program had to be decompiled, and there was source code for an earlier version of the program. Despite the fact that Boomerang was (and still is) not ready for real-world use, the main algorithm that the clients wanted source code for was recovered, and some of the GUI parts of the program were decompiled as well. The experience was published in a paper published in the 2004 Working Conference on Reverse Engineering (WCRE2004) in Delft, Netherlands. You can download an extended version of this paper here.

The example below, taken from the paper, shows unedited output from Boomerang.

Disassembled binary code
Boomerang output
40E0F0 sub    esp, 8 void PlotAxes(CDC* pDC, int ptOrigin_x, int ptOrigin_y, int sizePixelsPerTick_cx, int sizePixelsPerTick_cy, int horizTicks, int vertTicks, int nDrawTicks, int maxTickSizeX, int arg_24, int maxTickSizeY) {
    int local2;  /* m[r28{0} - 8] */   int local11; // r28{67}
    int local12; // vertTicks{312}
    int local26; // r25
40E0F3 lea     eax, [esp+8+dummy]
40E0F7 push  ebx
40E0F8 mov   ebx, [esp+0Ch+ptOrigin.y]
40E0FC push  ebp
40E0FD push  esi
40E0FE mov   esi, [esp+14h+pDC_or_xRight]
40E102 push   edi
40E103 mov    edi, [esp+18h+ptOrigin.x]
40E107 push   ebx
40E108 push   edi
40E109 push   eax
40E10A mov    ecx, esi
40E10C call    CDC::MoveTo(int,int)
    CDC_MoveTo(pDC, &local2, ptOrigin_x, ptOrigin_y);
40E111 mov    ecx, [esp+18h+nVertTicks_or_count]
40E115 mov    edx, ebx
40E117 imul    ecx, [esp+18h+sizePixelsPerTick.cy_]
40E11C sub    edx, ecx        ; ecx is nHeight
40E11E mov    ecx, esi
40E120 push   edx
40E121 push   edi
40E122 call     CDC::LineTo(int,int)
    CDC_LineTo(pDC, ptOrigin_x, ptOrigin_y - sizePixelsPerTick_cy * vertTicks);
40E127 push   ebx
40E128 lea     eax, [esp+1Ch+dummy]
40E12C push  edi
40E12D push  eax
40E12E mov   ecx, esi
40E130 call    CDC::MoveTo(int,int)
    CDC_MoveTo(pDC, &local2, ptOrigin_x, ptOrigin_y);
40E135 mov    ecx, [esp+18h+nHorizTicks]
40E139 mov    ebp, [esp+18h+sizePixelsPerTick.cx_]
40E13D imul    ecx, ebp        ; ecx is nWidth
40E140 add     ecx, edi
40E142 push   ebx
40E143 push   ecx
40E144 mov    ecx, esi
40E146 call     CDC::LineTo(int,int)
    local11 = local18 - 36;
    %pc += 6688008;        // Error
    CDC_LineTo(pDC, sizePixelsPerTick_cx * horizTicks + ptOrigin_x, ptOrigin_y);
40E14B test   byte ptr [esp+18h+nDrawTicks], TICKS_VERT
40E150 jz      short loc_40E1CE
// Did not convert local variable
    if ((*(char*)(local11 + 68) & 1) != 0) {
40E152 mov   eax, 88888889h  ; /1.875
40E157 imul   ebp
40E159 add   edx, ebp
40E15B sar    edx, 4          ; /30
40E15E mov  eax, edx
40E160 shr    eax, 31
40E163 add   edx, eax
40E165 mov   ecx, edx        ; ecx = nTickSize
        local26 = (/* opTruncs/u */ (int) (sizePixelsPerTick_cx * -2004318071 >> 32) + sizePixelsPerTick_cx >> 4) + (/* opTruncs/u */ (int) (sizePixelsPerTick_cx * -2004318071 >> 32) + sizePixelsPerTick_cx >> 4) / -2147483648;

40E167 cmp   ecx, 2
40E16A jge    short loc_40E171

        // Too much propagation
        if ((/* opTruncs/u */ (int) (sizePixelsPerTick_cx * -2004318071 >> 32) + sizePixelsPerTick_cx >> 4) + (/* opTruncs/u */ (int) (sizePixelsPerTick_cx * -2004318071 >> 32) + sizePixelsPerTick_cx >> 4) / -2147483648 < 2) {
40E16C mov   ecx, 2             local26 = 2;
        }
40E171 mov    eax, [esp+18h+maxTickSizeX]
40E175 cdq
40E176 sub    eax, edx
40E178 sar     eax, 1
40E17A cmp   ecx, eax
40E17C jl       short loc_40E181
40E17E lea     ecx, [eax-1]
        if (local26 >= maxTickSizeX - (maxTickSizeX < 0 ? -1 : 0) >> 1) {
            local26 = (maxTickSizeX - (maxTickSizeX < 0 ? -1 : 0) >> 1) - 1;
        }

40E181 mov     edx, [esp+18h+ptOrigin.x]
; edi is xLeft = ptOrigin.x - nTickSize
40E185 sub     edi, ecx
40E187 mov    ebp, ebx        ; ebp = nVpos
40E189 lea      eax, [ecx+edx+1] ; nTickSize + ptOrigin.x + 1
40E18D mov    [esp+18h+pDC_or_xRight], eax
40E191 mov    eax, [esp+18h+nVertTicks_or_count]
40E195 test     eax, eax
40E197 jl        short loc_40E1CA ; edi = nHpos
40E199 inc     eax
40E19A mov   [esp+18h+nVertTicks_or_count], eax
40E19E
40E19E for_i_le_nVertTicks:
        local27 = ptOrigin_y;
        if (vertTicks >= 0) {
            vertTicks++;
            do {
                local12 = vertTicks;
                ...
                vertTicks = local12 - 1;
            } while (local12 != 1);
        }

This example shows:
  • Decompiling C++ binary file (hand editing required)
  • structures and symbols (entered in symbol.h file, using -sf switch)
  • "thiscall" calling convention (implicit "this" parameter passed in register ecx)
  • idiomatic sequence not handled (integer divide using reciprocal)
  • too much propagation
The final code, after some hand editing, is shown below:

void PlotAxes(CDC* pDC, POINT ptOrigin, SIZE sizePixelsPerTick, int horizTicks, int vertTicks, int nDrawTicks, int maxTickSizeX, int arg_24, int maxTickSizeY) {
      int nHeight = sizePixelsPerTick.cy * nVertTicks;
      int nWidth =  sizePixelsPerTick.cx * nHorzTicks;
      pDC->MoveTo(ptOrigin);
      pDC->LineTo(ptOrigin.x, ptOrigin.y - nHeight);
      pDC->MoveTo(ptOrigin);
      pDC->LineTo(ptOrigin.x + nWidth, ptOrigin.y);
      if (nDrawTicks & TICKS_VERT) {
          // Draw Vertical Ticks
          int nTickSize = sizePixelsPerTick.cx / 30 + sizePixelsPerTick.cx / 16;
          if (nTickSize < 2)
              nTickSize = 2;
          if (nTickSize >= maxTickSizeX/2)
              nTickSize = maxTickSizeX/2-1;
          ...
          for (int i = 0; i <= nVertTicks; i++) {
              ...
          }


Last modified 20/Jul/05: very minor touches; 01/Dec/2004: Updated output; more in "what Boomerang has done" section