What Boomerang can do
On this page, we show some of the things that Boomerang can do so far. An attempt has been made to line up equivalent original source, binary, and decompiled source code lines; this is not always possible. Comments in red are not generated by the decompiler; those in black are../boomerang -Td test/pentium/sumarray:
Original source code |
Disassembled binary code |
Decompiled
source code |
#include <stdio.h> | ||
int a[10] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; | 8049460 01000000 02000000
03000000 04000000 8049470 05000000 06000000 07000000 08000000 8049480 09000000 0a000000 |
int a[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; |
int main() { | 8048328:
push %ebp 8048329: mov %esp,%ebp 804832b: sub $0x8,%esp 804832e: and $0xfffffff0,%esp 8048331: mov $0x0,%eax 8048336: sub %eax,%esp |
int main(int argc, char**
argv, char** envp) { int local1; // m[r28{0} - 8] // sum int local2; // m[r28{0} - 12] // i |
int sum = 0; | 8048338: movl $0x0,0xfffffffc(%ebp) | local1 = 0; |
int i; for (i=0; i < 10; i++) { |
804833f:
movl $0x0,0xfffffff8(%ebp) 8048346: cmpl $0x9,0xfffffff8(%ebp) 804834a: jle 804834e <main+0x26> 804834c: jmp 8048364 <main+0x3c> |
local2 =
0; while (local2 <= 9) { |
sum += a[i]; | 804834e:
mov 0xfffffff8(%ebp),%eax 8048351: mov 0x8049460(,%eax,4),%edx 8048358: lea 0xfffffffc(%ebp),%eax 804835b: add %edx,(%eax) |
local1 += a[local2]; // sum += a[i] |
} |
804835d:
lea 0xfffffff8(%ebp),%eax 8048360: incl (%eax) 8048362: jmp 8048346 <main+0x1e> |
local2++;
// i++ } |
printf("Sum is %d\n", sum); | 8048364:
sub $0x8,%esp 8048367: pushl 0xfffffffc(%ebp) 804836a: push $0x804842c 804836f: call 8048268 <printf@plt> 8048374: add $0x10,%esp |
printf("Sum is %d\n", local1); |
return 0; | 8048377: mov $0x0,%eax | return 0; |
} |
804837c:
leave 804837d: ret |
} |
804842c
53756d20 69732025 Sum is % 8048434 640a00 d.. |
This example shows:
- Source that that is fairly readable, compiles with no warnings and runs correctly.
- Conversion of stack locations to local variables
- Detection, declaration, use, and initialisation of an array
- Correct handling of a C string through the use of the string as a parameter to a library function
- The output from sumarray-O4 (same program compiled with -O4
optimisation) looks much the same (as of September 2004), except that
the pretested while loop
is replaced by a posttested do
while loop.
Original
source code |
Disassembled binary code |
Decompiled
source code |
#include <stdio.h> | ||
int main (void) { int number, value; |
800487cc:
push %ebp 80487cd: mov %esp,%ebp 80487cf: sub $0x4,%esp 80487d2: push %esi 80487d3: push %ebx |
int main(int argc, char**
argv, char** envp) { int local0; // m[r28{-} -8] // number int local8; // r24{39} int local9; // r24 // value |
printf ("Input number: "); | 80487d4:
push $0x80488b8 80487d9: call 80486bc <printf@plt> |
printf("Input number: "); |
scanf ("%d", &number); | 80487de:
lea 0xfffffffc(%ebp),%eax 80487e1: push %eax 80487e2: push $0x80488c7 80487e7: call 80486cc <scanf@plt> |
scanf("%d", &local0); |
value = fib(number); | 80487ec:
mov 0xfffffffc(%ebp),%ebx 80487ef: add $0xc,%esp 80487f2: cmp $0x1,%ebx 80487f5: jle 8048814 <main+0x48> 80487f7: lea 0xffffffff(%ebx),%eax 80487fa: push %eax 80487fb: call 8048798 <fib> 8048800: mov %eax,%esi 8048802: lea 0xfffffffe(%ebx),%eax 8048805: push %eax 8048806: call 8048798 <fib> 804880b: add %esi,%eax 804880d: add $0x8,%esp 8048810: jmp 8048816 <main+0x4a> 8048812: lea (%esi),%esi |
//
The compiler inlined the call to fib if (local0 <= 1) { local9 = local0; } else { local8 = fib(local0 - 1); local9 = fib(local0 - 2); local9 += local8; } |
printf("fibonacci(%d) = %d\n", number, value); |
8048814:
mov %ebx,%eax 8048816: push %eax 8048817: pushl 0xfffffffc(%ebp) 804881a: push $0x80488ca 804881f: call 80486bc <printf@plt> |
printf("fibonacci(%d) = %d\n", local0, local9); |
return
(0); } |
8048824:
xor %eax,%eax 8048826: lea 0xfffffff4(%ebp),%esp 8048829: pop %ebx 804882a: pop %esi 804882b: leave 804882c: ret |
return 0; } |
int fib (int x) { |
8048798:
push %ebp 8048799: mov %esp,%ebp 804879b: push %esi 804879c: push %ebx |
int fib(int param5) { int local8; // r24{18} int local9; // r24 // Return value |
if (x > 1) | 804879d:
mov 0x8(%ebp),%ebx 80487a0: cmp $0x1,%ebx 80487a3: jle 80487c0 <fib+0x28> |
if
(param1 <= 1) { // Note
test inverted |
return (fib(x - 1) + fib(x - 2)); | 80487a5:
lea 0xffffffff(%ebx),%eax 80487a8: push %eax 80487a9: call 8048798 <fib> 80487ae: mov %eax,%esi 80487b0: lea 0xfffffffe(%ebx),%eax 80487b3: push %eax 80487b4: call 8048798 <fib> 80487b9: add %esi,%eax |
local9 = param5; // ret = x |
else return (x); | 80487bb:
jmp 80487c2 <fib+0x2a> 80487bd: lea 0x0(%esi),%esi 80487c0: mov %ebx,%eax |
} else { local8 = fib(param5 - 1); // temp1 = fib(x-1) local9 = fib(param5 - 2); // ret = fib(x-2) local9 += local8; // ret += temp1 } |
} |
80487c2:
lea 0xfffffff8(%ebp),%esp 80487c5: pop %ebx 80487c6: pop %esi 80487c7: leave 80487c8: ret |
return
local9; } |
- Code that compiles and runs correctly
- Converts appropriate stack locations into arguments and
parameters, including taking the address of a local
- Converts appropriate registers into the return location, and emits the correct return instruction
- Inverts the then and else clauses of fib() (but the result is
still correct and readable)
- Handles recursion correctly (no mean feat if you don't make
assumptions about the calling convention)
- As above, handles the strings correctly, etc.
Original
source code |
Disassembled binary code |
Decompiled
source code |
void
main() { int a, x; |
10684: save %sp, -112, %sp | int
main(int argc, char **argv, char **envp) { int local17; // argc{37} int local18; // argc{73} // "old a" int local19; // local18{73} // a |
a = 0; | 10688: clr %o0 | |
do { a = a+1; x = a; printf("%d ", a); |
1068c:
sethi %hi(0x10400), %l0 10690: add %o0, 1, %i0 10694: or %l0, 872, %o0 10698: call printf 1069c: mov %i0, %o1 |
argc =
0;
// Compiler reuses argc for a local19 = argc; do { local18 = local19; printf("%d ", local18 + 1); |
} while (a < 10); | 106a0:
cmp %i0, 9 106a4: ble 0x10690 106a8: mov %i0, %o0 |
local17 = local18 + 1; local19 = local17; } while (local18 + 1 <= 9); |
printf("a is %d, x is %d\n", a, x); | 106ac:
sethi %hi(0x10400), %g1 106b0: mov %i0, %o1 106b4: mov %i0, %o2 106b8: call printf 106bc: or %g1, 880, %o0 |
printf("a is %d, x is %d\n", local18 + 1, local18 + 1); |
return 0; } |
106c0: ret 106c4: restore %g0, 0, %o0 |
return 0; } |
- Boomerang can decompile SPARC binary programs
- Copes with SPARC "register windows"
- Untangles the "delay slot" instructions (after every call and branch instruction)
- local19 had to be generated as a result of transforming out of SSA form
- too many local variables
What Boomerang has done
Over ther period from August to December 2003, the main Boomerang developers used Boomerang (with a lot of help from IDA Pro and a text editor) to recover source code from a real-world Windows executable. Not all of the program had to be decompiled, and there was source code for an earlier version of the program. Despite the fact that Boomerang was (and still is) not ready for real-world use, the main algorithm that the clients wanted source code for was recovered, and some of the GUI parts of the program were decompiled as well. The experience was published in a paper published in the 2004 Working Conference on Reverse Engineering (WCRE2004) in Delft, Netherlands. You can download an extended version of this paper here.The example below, taken from the paper, shows unedited output from Boomerang.
Disassembled binary code |
Boomerang output |
40E0F0 sub esp, 8 | void
PlotAxes(CDC* pDC, int ptOrigin_x, int ptOrigin_y, int
sizePixelsPerTick_cx, int sizePixelsPerTick_cy, int horizTicks, int
vertTicks, int nDrawTicks, int maxTickSizeX, int arg_24, int
maxTickSizeY) { int local2; /* m[r28{0} - 8] */ int local11; // r28{67} int local12; // vertTicks{312} int local26; // r25 |
40E0F3
lea eax, [esp+8+dummy] 40E0F7 push ebx 40E0F8 mov ebx, [esp+0Ch+ptOrigin.y] 40E0FC push ebp 40E0FD push esi 40E0FE mov esi, [esp+14h+pDC_or_xRight] 40E102 push edi 40E103 mov edi, [esp+18h+ptOrigin.x] 40E107 push ebx 40E108 push edi 40E109 push eax 40E10A mov ecx, esi 40E10C call CDC::MoveTo(int,int) |
CDC_MoveTo(pDC, &local2, ptOrigin_x, ptOrigin_y); |
40E111
mov ecx, [esp+18h+nVertTicks_or_count] 40E115 mov edx, ebx 40E117 imul ecx, [esp+18h+sizePixelsPerTick.cy_] 40E11C sub edx, ecx ; ecx is nHeight 40E11E mov ecx, esi 40E120 push edx 40E121 push edi 40E122 call CDC::LineTo(int,int) |
CDC_LineTo(pDC, ptOrigin_x, ptOrigin_y - sizePixelsPerTick_cy * vertTicks); |
40E127
push ebx 40E128 lea eax, [esp+1Ch+dummy] 40E12C push edi 40E12D push eax 40E12E mov ecx, esi 40E130 call CDC::MoveTo(int,int) |
CDC_MoveTo(pDC, &local2, ptOrigin_x, ptOrigin_y); |
40E135
mov ecx, [esp+18h+nHorizTicks] 40E139 mov ebp, [esp+18h+sizePixelsPerTick.cx_] 40E13D imul ecx, ebp ; ecx is nWidth 40E140 add ecx, edi 40E142 push ebx 40E143 push ecx 40E144 mov ecx, esi 40E146 call CDC::LineTo(int,int) |
local11 = local18 - 36; %pc += 6688008; // Error CDC_LineTo(pDC, sizePixelsPerTick_cx * horizTicks + ptOrigin_x, ptOrigin_y); |
40E14B
test byte ptr [esp+18h+nDrawTicks], TICKS_VERT 40E150 jz short loc_40E1CE |
// Did not convert local variable if ((*(char*)(local11 + 68) & 1) != 0) { |
40E152
mov eax, 88888889h ; /1.875 40E157 imul ebp 40E159 add edx, ebp 40E15B sar edx, 4 ; /30 40E15E mov eax, edx 40E160 shr eax, 31 40E163 add edx, eax 40E165 mov ecx, edx ; ecx = nTickSize |
local26 = (/* opTruncs/u */ (int) (sizePixelsPerTick_cx * -2004318071
>> 32) + sizePixelsPerTick_cx >> 4) + (/* opTruncs/u */
(int) (sizePixelsPerTick_cx * -2004318071 >> 32) +
sizePixelsPerTick_cx >> 4) / -2147483648; |
40E167
cmp ecx, 2 40E16A jge short loc_40E171 |
// Too much propagation if ((/* opTruncs/u */ (int) (sizePixelsPerTick_cx * -2004318071 >> 32) + sizePixelsPerTick_cx >> 4) + (/* opTruncs/u */ (int) (sizePixelsPerTick_cx * -2004318071 >> 32) + sizePixelsPerTick_cx >> 4) / -2147483648 < 2) { |
40E16C mov ecx, 2 |
local26 = 2; } |
40E171
mov eax, [esp+18h+maxTickSizeX] 40E175 cdq 40E176 sub eax, edx 40E178 sar eax, 1 40E17A cmp ecx, eax 40E17C jl short loc_40E181 40E17E lea ecx, [eax-1] |
if (local26 >= maxTickSizeX - (maxTickSizeX < 0 ? -1 : 0)
>> 1) { local26 = (maxTickSizeX - (maxTickSizeX < 0 ? -1 : 0) >> 1) - 1; } |
40E181
mov edx, [esp+18h+ptOrigin.x] ; edi is xLeft = ptOrigin.x - nTickSize 40E185 sub edi, ecx 40E187 mov ebp, ebx ; ebp = nVpos 40E189 lea eax, [ecx+edx+1] ; nTickSize + ptOrigin.x + 1 40E18D mov [esp+18h+pDC_or_xRight], eax 40E191 mov eax, [esp+18h+nVertTicks_or_count] 40E195 test eax, eax 40E197 jl short loc_40E1CA ; edi = nHpos 40E199 inc eax 40E19A mov [esp+18h+nVertTicks_or_count], eax 40E19E 40E19E for_i_le_nVertTicks: |
local27 = ptOrigin_y; if (vertTicks >= 0) { vertTicks++; do { local12 = vertTicks; ... vertTicks = local12 - 1; } while (local12 != 1); } |
This example shows:
- Decompiling C++ binary file (hand editing required)
- structures and symbols (entered in symbol.h file, using -sf switch)
- "thiscall" calling convention (implicit "this" parameter passed
in register ecx)
- idiomatic sequence not handled (integer divide using reciprocal)
- too much propagation
void PlotAxes(CDC* pDC, POINT ptOrigin, SIZE sizePixelsPerTick, int horizTicks, int vertTicks, int nDrawTicks, int maxTickSizeX, int arg_24, int maxTickSizeY) {
int nHeight = sizePixelsPerTick.cy * nVertTicks;
int nWidth = sizePixelsPerTick.cx * nHorzTicks;
pDC->MoveTo(ptOrigin);
pDC->LineTo(ptOrigin.x, ptOrigin.y - nHeight);
pDC->MoveTo(ptOrigin);
pDC->LineTo(ptOrigin.x + nWidth, ptOrigin.y);
if (nDrawTicks & TICKS_VERT) {
// Draw Vertical Ticks
int nTickSize = sizePixelsPerTick.cx / 30 + sizePixelsPerTick.cx / 16;
if (nTickSize < 2)
nTickSize = 2;
if (nTickSize >= maxTickSizeX/2)
nTickSize = maxTickSizeX/2-1;
...
for (int i = 0; i <= nVertTicks; i++) {
...
}
Last modified 20/Jul/05: very minor touches; 01/Dec/2004: Updated output; more in "what Boomerang has done" section