What Boomerang can do

On this page, we show some of the things that Boomerang can do so far. An attempt has been made to line up equivalent original source, binary, and decompiled source code lines; this is not always possible. Comments in red are not generated by the decompiler; those in black are.

./boomerang -Td test/pentium/sumarray:

Original source code	Disassembled binary code	Decompiled source code
#include <stdio.h>
int a[10] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};	8049460 01000000 02000000 03000000 04000000 8049470 05000000 06000000 07000000 08000000 8049480 09000000 0a000000	int a[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
int main() {	8048328: push %ebp 8048329: mov %esp,%ebp 804832b: sub $0x8,%esp 804832e: and $0xfffffff0,%esp 8048331: mov $0x0,%eax 8048336: sub %eax,%esp	int main(int argc, char argv, char envp) { int local1; // m[r28{0} - 8] // sum int local2; // m[r28{0} - 12] // i
int sum = 0;	8048338: movl $0x0,0xfffffffc(%ebp)	local1 = 0;
int i; for (i=0; i < 10; i++) {	804833f: movl $0x0,0xfffffff8(%ebp) 8048346: cmpl $0x9,0xfffffff8(%ebp) 804834a: jle 804834e <main+0x26> 804834c: jmp 8048364 <main+0x3c>	local2 = 0; while (local2 <= 9) {
sum += a[i];	804834e: mov 0xfffffff8(%ebp),%eax 8048351: mov 0x8049460(,%eax,4),%edx 8048358: lea 0xfffffffc(%ebp),%eax 804835b: add %edx,(%eax)	local1 += a[local2]; // sum += a[i]
}	804835d: lea 0xfffffff8(%ebp),%eax 8048360: incl (%eax) 8048362: jmp 8048346 <main+0x1e>	local2++; // i++ }
printf("Sum is %d\n", sum);	8048364: sub $0x8,%esp 8048367: pushl 0xfffffffc(%ebp) 804836a: push $0x804842c 804836f: call 8048268 <printf@plt> 8048374: add $0x10,%esp	printf("Sum is %d\n", local1);
return 0;	8048377: mov $0x0,%eax	return 0;
}	804837c: leave 804837d: ret	}
	804842c 53756d20 69732025 Sum is % 8048434 640a00 d..

This example shows:

Source that that is fairly readable, compiles with no warnings and runs correctly.
Conversion of stack locations to local variables
Detection, declaration, use, and initialisation of an array
Correct handling of a C string through the use of the string as a parameter to a library function
The output from sumarray-O4 (same program compiled with -O4 optimisation) looks much the same (as of September 2004), except that the pretested while loop is replaced by a posttested do while loop.

./boomerang test/pentium/fibo-O4:

Original source code	Disassembled binary code	Decompiled source code
#include <stdio.h>
int main (void) { int number, value;	800487cc: push %ebp 80487cd: mov %esp,%ebp 80487cf: sub $0x4,%esp 80487d2: push %esi 80487d3: push %ebx	int main(int argc, char argv, char envp) { `int local0; // m[r28{-} -8]` // number int local8; // r24{39} int local9; // r24 // value
printf ("Input number: ");	80487d4: push $0x80488b8 80487d9: call 80486bc <printf@plt>	printf("Input number: ");
scanf ("%d", &number);	80487de: lea 0xfffffffc(%ebp),%eax 80487e1: push %eax 80487e2: push $0x80488c7 80487e7: call 80486cc <scanf@plt>	scanf("%d", &local0);
value = fib(number);	80487ec: mov 0xfffffffc(%ebp),%ebx 80487ef: add $0xc,%esp 80487f2: cmp $0x1,%ebx 80487f5: jle 8048814 <main+0x48> 80487f7: lea 0xffffffff(%ebx),%eax 80487fa: push %eax 80487fb: call 8048798 <fib> 8048800: mov %eax,%esi 8048802: lea 0xfffffffe(%ebx),%eax 8048805: push %eax 8048806: call 8048798 <fib> 804880b: add %esi,%eax 804880d: add $0x8,%esp 8048810: jmp 8048816 <main+0x4a> 8048812: lea (%esi),%esi	// The compiler inlined the call to fib if (local0 <= 1) { local9 = local0; } else { local8 = fib(local0 - 1); local9 = fib(local0 - 2); local9 += local8; }
printf("fibonacci(%d) = %d\n", number, value);	8048814: mov %ebx,%eax 8048816: push %eax 8048817: pushl 0xfffffffc(%ebp) 804881a: push $0x80488ca 804881f: call 80486bc <printf@plt>	printf("fibonacci(%d) = %d\n", local0, local9);
return (0); }	8048824: xor %eax,%eax 8048826: lea 0xfffffff4(%ebp),%esp 8048829: pop %ebx 804882a: pop %esi 804882b: leave 804882c: ret	return 0; }

int fib (int x) {	8048798: push %ebp 8048799: mov %esp,%ebp 804879b: push %esi 804879c: push %ebx	int fib(int param5) { int local8; // r24{18} int local9; // r24 // Return value
if (x > 1)	804879d: mov 0x8(%ebp),%ebx 80487a0: cmp $0x1,%ebx 80487a3: jle 80487c0 <fib+0x28>	if (param1 <= 1) { // Note test inverted
return (fib(x - 1) + fib(x - 2));	80487a5: lea 0xffffffff(%ebx),%eax 80487a8: push %eax 80487a9: call 8048798 <fib> 80487ae: mov %eax,%esi 80487b0: lea 0xfffffffe(%ebx),%eax 80487b3: push %eax 80487b4: call 8048798 <fib> 80487b9: add %esi,%eax	local9 = param5; // ret = x
else return (x);	80487bb: jmp 80487c2 <fib+0x2a> 80487bd: lea 0x0(%esi),%esi 80487c0: mov %ebx,%eax	} else { local8 = fib(param5 - 1); // temp1 = fib(x-1) local9 = fib(param5 - 2); // ret = fib(x-2) local9 += local8; // ret += temp1 }
}	80487c2: lea 0xfffffff8(%ebp),%esp 80487c5: pop %ebx 80487c6: pop %esi 80487c7: leave 80487c8: ret	return local9; }

This example shows:

Code that compiles and runs correctly
Converts appropriate stack locations into arguments and parameters, including taking the address of a local
Converts appropriate registers into the return location, and emits the correct return instruction
Inverts the then and else clauses of fib() (but the result is still correct and readable)
Handles recursion correctly (no mean feat if you don't make assumptions about the calling convention)
As above, handles the strings correctly, etc.

./boomerang test/sparc/fromssa2

Original source code	Disassembled binary code	Decompiled source code
void main() { int a, x;	10684: save %sp, -112, %sp	int main(int argc, char argv, char envp) { int local17; // argc{37} int local18; // argc{73} // "old a" int local19; // local18{73} // a
a = 0;	10688: clr %o0
do { a = a+1; x = a; printf("%d ", a);	1068c: sethi %hi(0x10400), %l0 10690: add %o0, 1, %i0 10694: or %l0, 872, %o0 10698: call printf 1069c: mov %i0, %o1	argc = 0; // Compiler reuses argc for a local19 = argc; do { local18 = local19; printf("%d ", local18 + 1);
} while (a < 10);	106a0: cmp %i0, 9 106a4: ble 0x10690 106a8: mov %i0, %o0	local17 = local18 + 1; local19 = local17; } while (local18 + 1 <= 9);
printf("a is %d, x is %d\n", a, x);	106ac: sethi %hi(0x10400), %g1 106b0: mov %i0, %o1 106b4: mov %i0, %o2 106b8: call printf 106bc: or %g1, 880, %o0	printf("a is %d, x is %d\n", local18 + 1, local18 + 1);
return 0; }	106c0: ret 106c4: restore %g0, 0, %o0	return 0; }

This example shows:

Boomerang can decompile SPARC binary programs
Copes with SPARC "register windows"
Untangles the "delay slot" instructions (after every call and branch instruction)
local19 had to be generated as a result of transforming out of SSA form
too many local variables

What Boomerang has done

Over ther period from August to December 2003, the main Boomerang developers used Boomerang (with a lot of help from IDA Pro and a text editor) to recover source code from a real-world Windows executable. Not all of the program had to be decompiled, and there was source code for an earlier version of the program. Despite the fact that Boomerang was (and still is) not ready for real-world use, the main algorithm that the clients wanted source code for was recovered, and some of the GUI parts of the program were decompiled as well. The experience was published in a paper published in the 2004 Working Conference on Reverse Engineering (WCRE2004) in Delft, Netherlands. You can download an extended version of this paper here.

The example below, taken from the paper, shows unedited output from Boomerang.

Disassembled binary code	Boomerang output
40E0F0 sub esp, 8	void PlotAxes(CDC* pDC, int ptOrigin_x, int ptOrigin_y, int sizePixelsPerTick_cx, int sizePixelsPerTick_cy, int horizTicks, int vertTicks, int nDrawTicks, int maxTickSizeX, int arg_24, int maxTickSizeY) { int local2; /* m[r28{0} - 8] */ int local11; // r28{67} int local12; // vertTicks{312} int local26; // r25
40E0F3 lea eax, [esp+8+dummy] 40E0F7 push ebx 40E0F8 mov ebx, [esp+0Ch+ptOrigin.y] 40E0FC push ebp 40E0FD push esi 40E0FE mov esi, [esp+14h+pDC_or_xRight] 40E102 push edi 40E103 mov edi, [esp+18h+ptOrigin.x] 40E107 push ebx 40E108 push edi 40E109 push eax 40E10A mov ecx, esi 40E10C call CDC::MoveTo(int,int)	CDC_MoveTo(pDC, &local2, ptOrigin_x, ptOrigin_y);
40E111 mov ecx, [esp+18h+nVertTicks_or_count] 40E115 mov edx, ebx 40E117 imul ecx, [esp+18h+sizePixelsPerTick.cy_] 40E11C sub edx, ecx ; ecx is nHeight 40E11E mov ecx, esi 40E120 push edx 40E121 push edi 40E122 call CDC::LineTo(int,int)	CDC_LineTo(pDC, ptOrigin_x, ptOrigin_y - sizePixelsPerTick_cy * vertTicks);
40E127 push ebx 40E128 lea eax, [esp+1Ch+dummy] 40E12C push edi 40E12D push eax 40E12E mov ecx, esi 40E130 call CDC::MoveTo(int,int)	CDC_MoveTo(pDC, &local2, ptOrigin_x, ptOrigin_y);
40E135 mov ecx, [esp+18h+nHorizTicks] 40E139 mov ebp, [esp+18h+sizePixelsPerTick.cx_] 40E13D imul ecx, ebp ; ecx is nWidth 40E140 add ecx, edi 40E142 push ebx 40E143 push ecx 40E144 mov ecx, esi 40E146 call CDC::LineTo(int,int)	local11 = local18 - 36; %pc += 6688008; // Error CDC_LineTo(pDC, sizePixelsPerTick_cx * horizTicks + ptOrigin_x, ptOrigin_y);
40E14B test byte ptr [esp+18h+nDrawTicks], TICKS_VERT 40E150 jz short loc_40E1CE	// Did not convert local variable if (((char)(local11 + 68) & 1) != 0) {
40E152 mov eax, 88888889h ; /1.875 40E157 imul ebp 40E159 add edx, ebp 40E15B sar edx, 4 ; /30 40E15E mov eax, edx 40E160 shr eax, 31 40E163 add edx, eax 40E165 mov ecx, edx ; ecx = nTickSize	local26 = (/* opTruncs/u / (int) (sizePixelsPerTick_cx -2004318071 >> 32) + sizePixelsPerTick_cx >> 4) + (/* opTruncs/u / (int) (sizePixelsPerTick_cx -2004318071 >> 32) + sizePixelsPerTick_cx >> 4) / -2147483648;
40E167 cmp ecx, 2 40E16A jge short loc_40E171	// Too much propagation if ((/* opTruncs/u / (int) (sizePixelsPerTick_cx -2004318071 >> 32) + sizePixelsPerTick_cx >> 4) + (/* opTruncs/u / (int) (sizePixelsPerTick_cx -2004318071 >> 32) + sizePixelsPerTick_cx >> 4) / -2147483648 < 2) {
40E16C mov ecx, 2	local26 = 2; }
40E171 mov eax, [esp+18h+maxTickSizeX] 40E175 cdq 40E176 sub eax, edx 40E178 sar eax, 1 40E17A cmp ecx, eax 40E17C jl short loc_40E181 40E17E lea ecx, [eax-1]	if (local26 >= maxTickSizeX - (maxTickSizeX < 0 ? -1 : 0) >> 1) { local26 = (maxTickSizeX - (maxTickSizeX < 0 ? -1 : 0) >> 1) - 1; }
40E181 mov edx, [esp+18h+ptOrigin.x] ; edi is xLeft = ptOrigin.x - nTickSize 40E185 sub edi, ecx 40E187 mov ebp, ebx ; ebp = nVpos 40E189 lea eax, [ecx+edx+1] ; nTickSize + ptOrigin.x + 1 40E18D mov [esp+18h+pDC_or_xRight], eax 40E191 mov eax, [esp+18h+nVertTicks_or_count] 40E195 test eax, eax 40E197 jl short loc_40E1CA ; edi = nHpos 40E199 inc eax 40E19A mov [esp+18h+nVertTicks_or_count], eax 40E19E 40E19E for_i_le_nVertTicks:	local27 = ptOrigin_y; if (vertTicks >= 0) { vertTicks++; do { local12 = vertTicks; ... vertTicks = local12 - 1; } while (local12 != 1); }

This example shows:

Decompiling C++ binary file (hand editing required)
structures and symbols (entered in symbol.h file, using -sf switch)
"thiscall" calling convention (implicit "this" parameter passed in register ecx)
idiomatic sequence not handled (integer divide using reciprocal)
too much propagation

The final code, after some hand editing, is shown below:

void PlotAxes(CDC* pDC, POINT ptOrigin, SIZE sizePixelsPerTick, int horizTicks, int vertTicks, int nDrawTicks, int maxTickSizeX, int arg_24, int maxTickSizeY) {
      int nHeight = sizePixelsPerTick.cy * nVertTicks;
      int nWidth = sizePixelsPerTick.cx * nHorzTicks;
pDC->MoveTo(ptOrigin);
      pDC->LineTo(ptOrigin.x, ptOrigin.y - nHeight);
      pDC->MoveTo(ptOrigin);
pDC->LineTo(ptOrigin.x + nWidth, ptOrigin.y);
      if (nDrawTicks & TICKS_VERT) {
          // Draw Vertical Ticks
          int nTickSize = sizePixelsPerTick.cx / 30 + sizePixelsPerTick.cx / 16;
        if (nTickSize < 2)
          nTickSize = 2;
          if (nTickSize >= maxTickSizeX/2)
              nTickSize = maxTickSizeX/2-1;
          ...
          for (int i = 0; i <= nVertTicks; i++) {
              ...
          }

Last modified 20/Jul/05: very minor touches; 01/Dec/2004: Updated output; more in "what Boomerang has done" section

Boomerang

General

Status

Research

Documentation

Technical

Sourceforge

Other Resources

What Boomerang can do

What Boomerang has done