GoogleCTF 2025 - Multiarch
We are given the following binary :
-> % file multiarch
multiarch: ELF 64-bit LSB pie executable, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, BuildID[shsegment_metadata]=dc495115eb37cb56a37d5ac691cf406d06f185c7, for GNU/Linux 4.4.0, stripped
-> % xxd crackme.masm
00000000: 4d41 534d 0113 0065 0102 7801 5001 03c8 MASM...e..x.P...
00000010: 022d 0010 4b00 0000 3000 2000 0010 0200 .-..K...0. .....
00000020: 0000 a000 0000 0010 2b00 0000 30ad 2000 ........+...0. .
00000030: 0010 0200 0000 a000 0000 0010 0000 0000 ................
00000040: a000 0000 0020 3713 0000 2039 0500 0030 ..... 7... 9...0
00000050: 0953 6708 6200 0000 0060 0000 0000 30aa .Sg.b....`....0.
00000060: aaaa aa80 0000 0000 720b 1100 00c5 0200 ........r.......
[...]
Based on challenge name, this look like a classical emu/VM challenge with external data to emulate.
Init
(All functions names are hypotethical and based on observed behavior)
Program start by reading crackme.masm with some metadata. It check the magic_header “MASM” then read metadata for segments inside :
// Opens the segments for the emulator.
_QWORD *__fastcall open_segments(const char *masmpath)
{
FILE *v1; // rax
FILE *fd; // rbx
_QWORD *ptr; // rbp
int *v5; // rax
char *v6; // rax
int *v7; // rax
char *v8; // rax
_QWORD v9[7]; // [rsp+0h] [rbp-38h] BYREF
v9[3] = __readfsqword(0x28u);
v1 = fopen(masmpath, "r");
fd = v1;
if ( !v1 )
{
v5 = __errno_location();
v6 = strerror(*v5);
fprintf(stderr, "[E] couldn't open file %s - %s\n", masmpath, v6);
return 0LL;
}
v9[0] = 0LL;
v9[1] = 0LL;
if ( fread(v9, 1uLL, 4uLL, v1) != 4 )
{
v7 = __errno_location();
v8 = strerror(*v7);
fprintf(stderr, "[E] couldn't read magic - %s\n", v8);
LABEL_9:
fclose(fd);
return 0LL;
}
if ( strncmp((const char *)v9, "MASM", 4uLL) )
{
fwrite("[E] bad magic\n", 1uLL, 0xEuLL, stderr);
goto LABEL_9;
}
ptr = calloc(1uLL, 0x30uLL);
if ( !(unsigned __int8)read_segments(ptr, 4LL, fd)
|| !(unsigned __int8)read_segments(ptr, 9LL, fd)
|| !(unsigned __int8)read_segments(ptr, 14LL, fd) )
{
if ( ptr )
free_seg((__int64)ptr);
goto LABEL_9;
}
return ptr;
}
read_segments is quite easy to reverse, we can create a python template for the emulator initialisation :
class Emulator():
def __init__(self, masm):
self.masm = masm
self.fd = open(masm, "rb")
self._load_segments()
def __del__(self):
self.fd.close()
def _load_segments(self):
if self.fd.read(4) != b"MASM":
sys.stderr.write("[E] bad magic\n")
sys.exit()
self.segments_metadata = {}
self.fd.seek(4)
self.read_segments(4)
self.read_segments(9)
self.read_segments(14)
def read_segments(self, size_offset):
self.fd.seek(size_offset)
segment_type_raw = self.fd.read(1)
segment_type = struct.unpack("B", segment_type_raw)[0]
offset_raw = self.fd.read(2)
offset = struct.unpack("<H", offset_raw)[0]
size_raw = self.fd.read(2)
size = struct.unpack("<H", size_raw)[0]
self.fd.seek(offset)
data = self.fd.read(size)
print(f"Segment : {segment_type} | offset={offset} | size={size}")
if segment_type == 1:
self.segments_metadata['code'] = (data, size)
elif segment_type == 2:
self.segments_metadata['data'] = (data, size)
elif segment_type == 3:
self.segments_metadata['extra'] = (data, size)
else:
sys.stderr.write(f"[E] invalid segment type: {segment_type}\n")
return False
return True
if __name__ == "__main__":
emu = Emulator("./crackme.masm")
Following segment init, program start to init some sort of emulation context with this function:
// Initializes the emulator context by allocating memory and copying data from the input file.
emu_ctx *__fastcall initialize_emu_ctx(__int64 segment_metadata)
{
emu_ctx *emu_ctx; // rbx
void *v3; // r14
void *v4; // r13
void *v5; // r12
__int64 v6; // r13
emu_ctx = (emu_ctx *)calloc(1uLL, 0x88uLL);
v3 = mmap(0LL, 0x1000uLL, 7, 33, 0, 0LL);
emu_ctx->code = v3;
v4 = mmap(0LL, 0x1000uLL, 7, 33, 0, 0LL);
emu_ctx->data = v4;
emu_ctx->extra = mmap(0LL, 0x1000uLL, 7, 33, 0, 0LL);
v5 = calloc(1uLL, *(_QWORD *)(segment_metadata + 40));
emu_ctx->ptr_1 = v5;
emu_ctx->check_flag_env = check_flag_env;
memcpy(v3, *(const void **)segment_metadata, *(_QWORD *)(segment_metadata + 8));
memcpy(v4, *(const void **)(segment_metadata + 16), *(_QWORD *)(segment_metadata + 24));
v6 = *(_QWORD *)(segment_metadata + 40);
memcpy(v5, *(const void **)(segment_metadata + 32), *(_QWORD *)(segment_metadata + 40));
emu_ctx->extra_size = v6;
emu_ctx->UNK1 = 4096;
emu_ctx->UNK2 = 0x8F00;
return emu_ctx;
}
We can create a ida struct with the following content :
00000000 struct emu_ctx // sizeof=0x88
00000000 {
00000000 void* code;
00000008 void* data;
00000010 void* extra;
00000018 char *ptr_1;
00000020 __int64 extra_size;
00000028 void * check_flag_env;
00000030 char UNKB;
00000031 char UNKA;
00000032 char UNKC;
00000033 int UNK1;
00000037 int UNK2;
0000003B int UNK3;
0000003F int UNK4;
00000043 int UNK5;
00000047 int UNK6;
0000004B char ptr_array[60];
00000087 char size_unk;
00000088 };
Note we assume first/2nd and third mmap is code/data/extra, when viewing segment data (first contains opcodode, second contains strings, and thirs contains extra data)
Main Looop
Program seem to get a byte from extra, do some bittest on it, and select a handler based on the result. Also when a flag is always set to 1 when some errors happens, exiting the loop.
We can assume that we are selecting the arch mode, and run the arch associated.
We known now that : UNKB -> continue_emu flag
With errors we can have also a dump when emu exiting :
// Dumps the emulator state.
unsigned __int64 __fastcall dump_emu(emu_ctx *emu_ctx, char print_stack)
{
int i; // ebp
unsigned int sp_offset; // r12d
const char *ascii; // rsi
int value; // [rsp+Ch] [rbp-44h] BYREF
unsigned __int64 v7; // [rsp+10h] [rbp-40h]
v7 = __readfsqword(0x28u);
printf(
" ---[ PC=0x%08x SP=0x%08x | A=0x%08x B=0x%08x C=0x%08x D=0x%08x\n",
emu_ctx->PC,
emu_ctx->SP,
emu_ctx->A,
emu_ctx->B,
emu_ctx->C,
emu_ctx->D);
if ( print_stack )
{
puts(" ---[ STACK CONTENTS");
for ( i = -8; i != 20; i += 4 )
{
sp_offset = emu_ctx->SP + i;
if ( !(unsigned __int8)get_memory_emu_dword(emu_ctx, sp_offset, &value) )
break;
ascii = " ";
if ( emu_ctx->SP == sp_offset )
ascii = "* ";
printf("\t%s0x%08x 0x%08x\n", ascii, sp_offset, value);
}
}
return v7 - __readfsqword(0x28u);
}
We can associate new emu registers in the emu_ctx struct.
We can Then have more precise emulator in python:
class EmuCtx:
def __init__(self):
self.code = None
self.data = None
self.extra = None
self.extra_size = 0
self.PC = 0
self.SP = 0
self.dynamic = bytearray(60)
self.size_unk = 0
self.A = 0
self.B = 0
self.C = 0
self.D = 0
def initialize_emu_ctx(self, seg_metadata):
self.code = bytearray(0x1000)
self.data = bytearray(0x1000)
code_data,code_size = seg_metadata['code']
data_data,data_size = seg_metadata['data']
extra_data,extra_size = seg_metadata['extra']
self.extra = bytearray(0x10000)
self.extra_size = extra_size
self.PC = 0x1000
self.SP = 0x8F00
self.code[:code_size] = code_data[:code_size]
self.data[:data_size] = data_data[:data_size]
self.extra[:extra_size] = extra_data[:extra_size]
class Emulator():
def __init__(self, masm):
self.masm = masm
self.fd = open(masm, "rb")
self._load_segments()
self.ctx = EmuCtx()
self.continue_emu = 1
self.ctx.initialize_emu_ctx(self.segments_metadata)
def get_arch_mode(self):
pc = self.ctx.PC
index = pc - 4089
if ( pc - 4096 >= 0 ):
index = pc - 4096
data,_ = self.segments_metadata['extra']
x = data[index >> 3]
if pc == 0x1097: #patch for later
return True
return ((x >> (pc & 7)) & 1)
def run_step(self):
arch = self.get_arch_mode()
print(f"Arch={arch}")
if (not arch):
self.run_step_arch1()
elif (arch == 1):
self.run_step_arch2()
else:
self.continue_emu = 0
def run(self):
print("[+] Emulator is running...")
while self.continue_emu:
self.run_step()
print("[+] Emu has ended")
self.ctx.dump_emu(print_stack=1)
First Input
We have this input when starting program :
-> % ./multiarch crackme.masm
[I] initializing multiarch emulator
[I] executing program
Welcome to the multiarch of madness! Let's see how well you understand it.
Challenge 1 - What's your favorite number?
Some debug show first used arch is ARCH1, so let’s dig in.
Reversing the main arch1 function emulator is quite easy, the functions are straightforward. we assume the following functions are present :
- read_mem/write_mem
- get_memory_emu_dword
- push_dword_to_stack
- pop_dword_from_stack
- some syscall handling
- some misc functions
- handler for xor/add/mul/sub/…
- some flag setting for conditionnal jump
We can create them in the python emulator easily :
def run_step_arch1(self):
value2 = 0
x = self.ctx.read_mem(self.ctx.PC, 5)
value1 = x[0]
value2= int.from_bytes(x[1:3], byteorder="little")
value3= int.from_bytes(x[1:], byteorder="little")
print("--"*50)
print(f"emu v1={hex(value1)}|v2={hex(value2)}|PC={hex(self.ctx.PC)}")
#self.ctx.dump_emu(print_stack=1)
if value1 <= 0x80:
if value1 == 0x10:
self.ctx.push_byte_to_stack(value2)
self.ctx.PC += 5
return
elif value1 == 0x20:
self.ctx.push_short_to_stack(value2)
self.ctx.PC += 5
return
elif value1 == 0x30:
self.ctx.push_dword_to_stack(value3)
self.ctx.PC += 5
return
elif value1 == 0x40:
x = int.from_bytes(self.ctx.read_mem(value2, 5), byteorder="little")
self.ctx.push_dword_to_stack(x)
self.ctx.PC += 5
return
elif value1 == 0x41:
raise Exception
elif value1 == 0x50:
_ = self.ctx.pop_dword_from_stack()
self.ctx.PC += 5
return
elif value1 in (0x60, 0x61, 0x62, 0x63):
v1 = self.ctx.pop_dword_from_stack()
v2 = self.ctx.pop_dword_from_stack()
op = { 0x60: lambda a, b: a + b,
0x61: lambda a, b: a - b,
0x62: lambda a, b: a ^ b,
0x63: lambda a, b: a & b,
}[value1]
def get_op(v):
if v == 0x60:
return "+"
if v == 0x61:
return "-"
if v == 0x62:
return "^"
return "&"
print(f"Push Stack OPERATION : {hex(value1)} -> {hex(v1)}{get_op(value1)}{hex(v2)} = {hex(op(v1,v2))}")
self.ctx.push_dword_to_stack(op(v1, v2))
self.ctx.PC += 5
return
elif value1 in (0x70, 0x71, 0x72):
print(f"check_cond {hex(value1)}")
cond = (self.ctx.flag & 1) != 0
if (value1 == 0x71 and cond) or (value1 == 0x72 and not cond) or value1 == 0x70:
print(f"NEW PC SET -> {hex(value2)}")
self.ctx.PC = value2
return
self.ctx.PC += 5
return
elif value1 == 0x80:
v1 = self.ctx.pop_dword_from_stack()
v2 = self.ctx.pop_dword_from_stack()
self.ctx.set_flag(v1, v2)
self.ctx.PC += 5
return
else:
sys.stderr.write(f"[E] invalid StackVM instruction, pc=0x{self.ctx.PC:X} leader=0x{value1:X}\n")
self.continue_emu = 0
return 0
# == Syscall handling ==
elif value1 == 0xA0:
if not self.ctx.check_syscall_specific():
sys.stderr.write("[E] can't execute that syscall!\n")
self.continue_emu = 0
return
syscall_no = self.ctx.pop_byte_from_stack()
print(f"syscall no -> {syscall_no}")
if syscall_no == 0:
self.ctx.push_dword_to_stack(self.ctx.read_user_input_dword())
print("read_user_input_dword")
self.ctx.PC += 5
return
elif syscall_no == 1:
sys.stderr.write("[E] unsupported syscall!\n")
elif syscall_no == 2:
v1 = self.ctx.pop_dword_from_stack()
v2 = self.ctx.pop_dword_from_stack()
self.ctx.read_and_print(v1, v2)
self.ctx.PC += 5
return
elif syscall_no == 3:
v1 = self.ctx.pop_dword_from_stack()
print(f"syscall srand({hex(v1)})")
libc.srand(v1)
self.ctx.PC += 5
elif syscall_no == 4:
low = libc.rand() & 0xFFFF
high = libc.rand() << 16
gen_val = (high | low) & 0xFFFFFFFF
print(f"syscall rand()={hex(gen_val)}")
self.ctx.push_dword_to_stack(gen_val)
self.ctx.PC += 5
return
elif syscall_no == 5:
print(b"CTF{test_flag}")
self.ctx.PC += 5
return
elif syscall_no == 6:
v1 = self.ctx.pop_dword_from_stack()
raise Exception("todo")
self.ctx.PC += 5
return
else:
sys.stderr.write("[E] bad syscall!\n")
self.continue_emu = 0
return
# == Halt instruction ==
elif value1 == 0xFF:
self.continue_emu = 0
return
# == Unknown instruction ==
else:
sys.stderr.write(f"[E] invalid StackVM instruction, pc=0x{self.ctx.PC:X} leader=0x{value1:X}\n")
self.continue_emu = 0
return
We can then start emulator and look at the dumped emu trace :
Segment : 1 | offset=19 | size=357
Segment : 2 | offset=376 | size=336
Segment : 3 | offset=712 | size=45
[+] Emulator is running...
Arch=0
----------------------------------------------------------------------------------------------------
emu v1=0x10|v2=0x4b|PC=0x1000
push_byte_to_stack -> 0x4b //push size of "Welcome to..." string
Arch=0
----------------------------------------------------------------------------------------------------
emu v1=0x30|v2=0x2000|PC=0x1005
push_dword_to_stack -> 0x2000 // push string adress
Arch=0
----------------------------------------------------------------------------------------------------
emu v1=0x10|v2=0x2|PC=0x100a
push_byte_to_stack -> 0x2 // push syscall no 2 for read_and_print syscall
Arch=0
----------------------------------------------------------------------------------------------------
emu v1=0xa0|v2=0x0|PC=0x100f
check_syscall_specific
pop_byte_from_stack -> 0x2
syscall no -> 2
pop_dword_from_stack -> 0x2000
pop_dword_from_stack -> 0x4b
ReadAndPrint -> bytearray(b"Welcome to the multiarch of madness! Let\'s see how well you understand it.\n")
Arch=0
----------------------------------------------------------------------------------------------------
emu v1=0x10|v2=0x2b|PC=0x1014
push_byte_to_stack -> 0x2b // push "Challenge1..." string size
Arch=0
----------------------------------------------------------------------------------------------------
emu v1=0x30|v2=0x20ad|PC=0x1019
push_dword_to_stack -> 0x20ad // push string addr
Arch=0
----------------------------------------------------------------------------------------------------
emu v1=0x10|v2=0x2|PC=0x101e
push_byte_to_stack -> 0x2 // push syscall no 2 for read and print
Arch=0
----------------------------------------------------------------------------------------------------
emu v1=0xa0|v2=0x0|PC=0x1023
check_syscall_specific
pop_byte_from_stack -> 0x2
syscall no -> 2
pop_dword_from_stack -> 0x20ad
pop_dword_from_stack -> 0x2b
ReadAndPrint -> bytearray(b"Challenge 1 - What\'s your favorite number? ")
Arch=0
----------------------------------------------------------------------------------------------------
emu v1=0x10|v2=0x0|PC=0x1028
push_byte_to_stack -> 0x0 // push syscall no 0 : read_user_dword()
Arch=0
----------------------------------------------------------------------------------------------------
emu v1=0xa0|v2=0x0|PC=0x102d
check_syscall_specific
pop_byte_from_stack -> 0x0
syscall no -> 0
push_dword_to_stack -> 0x8f5a547a // that our input
read_user_input_dword
Arch=0
----------------------------------------------------------------------------------------------------
emu v1=0x20|v2=0x1337|PC=0x1032
push_short_to_stack -> 0x1337 // push short
Arch=0
----------------------------------------------------------------------------------------------------
emu v1=0x20|v2=0x539|PC=0x1037
push_short_to_stack -> 0x539 // push short
Arch=0
----------------------------------------------------------------------------------------------------
emu v1=0x30|v2=0x5309|PC=0x103c
push_dword_to_stack -> 0x8675309 // push dword
Arch=0
----------------------------------------------------------------------------------------------------
emu v1=0x62|v2=0x0|PC=0x1041
pop_dword_from_stack -> 0x8675309 // combine 2short -> dword
pop_dword_from_stack -> 0x13370539
Push Stack OPERATION : 0x62 -> 0x8675309^0x13370539 = 0x1b505630
push_dword_to_stack -> 0x1b505630 // created constant
Arch=0
----------------------------------------------------------------------------------------------------
emu v1=0x60|v2=0x0|PC=0x1046
pop_dword_from_stack -> 0x1b505630 // add last created constant with our input
pop_dword_from_stack -> 0x8f5a547a
Push Stack OPERATION : 0x60 -> 0x1b505630+0x8f5a547a = 0xaaaaaaaa
push_dword_to_stack -> 0xaaaaaaaa
Arch=0
----------------------------------------------------------------------------------------------------
emu v1=0x30|v2=0xaaaa|PC=0x104b
push_dword_to_stack -> 0xaaaaaaaa
Arch=0
----------------------------------------------------------------------------------------------------
emu v1=0x80|v2=0x0|PC=0x1050
pop_dword_from_stack -> 0xaaaaaaaa // compare previous op with 0xaaaaaaaa
pop_dword_from_stack -> 0xaaaaaaaa
set_flag -> 1 // set flag to 1 if A==B
Arch=0
mu v1=0x72|v2=0x110b|PC=0x1055
check_cond 0x72
I gave the good input for the trace, but the first input is quite simple when saving a trace :
take a dword from user -> A then 0x1b505630 + A == 0xaaaaaaaa
We can easily deduce first input is 2405061754 (0x8f5a547a)
Second input
For the second input, program switch to arch2
Arch 2 is a bit more complex to reverse as operations are more precise. The emulator introduce the usage of the registers A/B/C/D
arch2 keep the same functions with some additional content, but the core is the same (push/pop/read_mem/…)
Note that they use the popped value from opcode to write to the good registers
set_flag(emu_ctx, *(&emu_ctx->A + ((val_1 >> 2) & 3)), *(&emu_ctx->A + (val_1 & 3)));
We can create the code in the python emulator:
def run_step_arch2(self):
ctx = self.ctx
print("--"*50)
opcode1 = ctx.read_mem(ctx.PC, 1)[0]
print(f"opcode={hex(opcode1)} | PC={hex(ctx.PC)}")
ctx.PC += 1
op_ext = 0 # « v4 » dans le C ; nibbles d’extension
opcode = opcode1
if (opcode1 >> 4) == 0xA: # 0xA0–0xAF
op_ext = opcode1 & 0xF
opcode = ctx.read_mem(ctx.PC, 1)[0] # vrai opcode
ctx.PC += 1
if opcode == 0x00: # 0: arrêt net
self.continue_emu = 0
return
if opcode == 0x01:
if not ctx.check_syscall_specific():
self.continue_emu = 0
return
sc_no = ctx.A & 0xFF
print(f"[SYSCALL] no={sc_no}")
if sc_no == 0: # read dword -> A
print("read_user_input_dword")
ctx.A = ctx.read_user_input_dword()
elif sc_no == 1: # read dword -> A
print("read_user_input_10char")
ctx.read_user_input_(ctx.B, ctx.C)
elif sc_no == 2: # read&print
print("read_and_print")
ctx.read_and_print(ctx.B, ctx.C)
elif sc_no == 3: # srand(B)
print(f"syscall srand({hex(ctx.B)})")
libc.srand(ctx.B)
elif sc_no == 4: # rand32 -> A
print("rand()")
low = libc.rand() & 0xFFFF
high = libc.rand() << 16
ctx.A = (high | low) & 0xFFFFFFFF
elif sc_no == 5: # flag !
print(b"CTF{test_flag}")
else:
sys.stderr.write("[E] bad syscall!\n")
self.continue_emu = 0
return
if opcode == 0x10: # push imm32
imm = ctx.get_memory_emu_dword(ctx.PC)
ctx.PC += 4
print(f"push imm32 : {hex(imm)}")
ctx.push_dword_to_stack(imm)
return
if 0x11 <= opcode <= 0x14: # push A/B/C/D
reg_idx = opcode - 0x11
print(f"push reg : {self.to_reg(reg_idx)} -> {hex(self._get_reg(reg_idx))}")
ctx.push_dword_to_stack(self._get_reg(reg_idx))
return
if 0x15 <= opcode <= 0x18: # pop A/B/C/D
reg_idx = opcode - 0x15
val = ctx.pop_dword_from_stack()
print(f"pop reg : {self.to_reg(reg_idx)} -> {hex(val)}")
self._set_reg(reg_idx, val)
return
if (opcode >> 4) == 0x7: # 0x70-0x7F
dst = (opcode >> 2) & 3
src = opcode & 3
print(f"set_flag : {hex(dst)}|{hex(src)}")
ctx.set_flag(self._get_reg(dst), self._get_reg(src))
return
if (opcode >> 4) == 0x8: # 0x80-0x8F
imm = ctx.get_memory_emu_dword(ctx.PC)
ctx.PC += 4
dst = opcode & 3
print(f"set_flag_reg : {hex(self._get_reg(dst))}|{hex(imm)}")
if ctx.PC == 0x108d:
ctx.set_flag(0x7331, 0x7331)
return
ctx.set_flag(self._get_reg(dst), imm)
return
if opcode == 0x20:
byte = ctx.read_mem(ctx.PC, 1)[0]
ctx.PC += 1
dst = ((byte >> 4) - 1) & 3
src = ((byte & 0xF) - 1) & 3
res = (self._get_reg(dst) + self._get_reg(src)) & 0xFFFFFFFF
print(f"0x20 ->{self.to_reg(dst)}={hex(res)}")
self._set_reg(dst, res)
return
if opcode == 0x21:
byte = ctx.read_mem(ctx.PC, 1)[0]
ctx.PC += 1
imm = ctx.get_memory_emu_dword(ctx.PC)
ctx.PC += 4
print(f"0X21 set SP={hex(imm+ctx.SP)}")
ctx.SP = (ctx.SP + imm) & 0xFFFFFFFF
return
if opcode == 0x30:
byte = ctx.read_mem(ctx.PC, 1)[0]
ctx.PC += 1
dst = ((byte >> 4) - 1) & 3
src = ((byte & 0xF) - 1) & 3
res = (self._get_reg(dst) - self._get_reg(src)) & 0xFFFFFFFF
self._set_reg(dst, res)
print(f"0X30 -> {self.to_reg(dst)}={hex(res)}")
return
if opcode == 0x31:
byte = ctx.read_mem(ctx.PC, 1)[0]
ctx.PC += 1
imm = ctx.get_memory_emu_dword(ctx.PC)
ctx.PC += 4
high = byte >> 4
if 1 <= high <= 4: # A…D
dst = (high - 1) & 3
self._set_reg(dst, (self._get_reg(dst) - imm) & 0xFFFFFFFF)
print(f"set_reg {self.to_reg(dst)}={hex((self._get_reg(dst) - imm) & 0xFFFFFFFF)}")
return
if high == 5: # SP -= imm
print(f"ctx.SP= {hex((ctx.SP - imm) & 0xFFFFFFFF)}")
ctx.SP = (ctx.SP - imm) & 0xFFFFFFFF
return
self.continue_emu = 0
return
if opcode == 0x40:
byte = ctx.read_mem(ctx.PC, 1)[0]
ctx.PC += 1
dst = ((byte >> 4) - 1) & 3
src = ((byte & 0xF) - 1) & 3
v1 = self._get_reg(dst)
v2 = self._get_reg(src)
r = v1 ^ v2
print(f"XOR 0x40 -> {self.to_reg(dst)}= {hex(v1)} ^ {hex(v2)}={hex(r)}")
self._set_reg(dst, r)
return
if opcode == 0x50:
byte = ctx.read_mem(ctx.PC, 1)[0]
ctx.PC += 1
r1 = ((byte >> 4) - 1) & 3
r2 = ((byte & 0xF) - 1) & 3
prod = (self._get_reg(r1) * self._get_reg(r2)) & 0xFFFFFFFFFFFFFFFF
ctx.A = prod & 0xFFFFFFFF
ctx.D = (prod >> 32) & 0xFFFFFFFF
print(f"MUL 0x50-> A={hex(ctx.A)}|D={hex(ctx.D)}")
return
if opcode == 0x51:
byte = ctx.read_mem(ctx.PC, 1)[0]
ctx.PC += 1
imm = ctx.get_memory_emu_dword(ctx.PC)
ctx.PC += 4
r2 = (((byte >> 4) + 3) & 3) # formule du binaire
prod = (imm * self._get_reg(r2)) & 0xFFFFFFFFFFFFFFFF
print(f"MUL 0x51-> {hex(imm)}*{hex(self._get_reg(r2))} = {hex(prod)}")
ctx.A = prod & 0xFFFFFFFF
ctx.D = (prod >> 32) & 0xFFFFFFFF
print(f"MUL 0x51 A->{hex(ctx.A)} | D={hex(ctx.D)}")
return
if opcode == 0x60:
target = ctx.get_memory_emu_dword(ctx.PC)
ctx.PC += 4
ctx.push_dword_to_stack(ctx.PC) # adresse de retour
ctx.PC = target & 0xFFFFFFFF
ctx.dump_emu(print_stack=True)
print(f"call {hex(ctx.PC)}")
return
if opcode == 0x61:
byte = ctx.read_mem(ctx.PC, 1)[0]
ctx.PC += 1
if ctx.patch_id == 0:
ctx.SP = 0x00008EE2
ctx.patch_id += 1
else:
ctx.SP = (ctx.SP + 4 * byte) & 0xFFFFFFFF
ctx.PC = ctx.pop_dword_from_stack()
print(f"ret {hex(ctx.PC)}")
return
if opcode in (0x62, 0x63, 0x64, 0x68):
print(f"opcode JZ/JNZ/JN/JMP {opcode}")
ctx.cpt += 1
#ctx.dump_emu(print_stack=True)
if ctx.cpt == 19:
print(f"TAKE BRANCH2")
target = ctx.get_memory_emu_dword(ctx.PC)
ctx.PC = target & 0xFFFFFFFF
return
taken = False
if opcode == 0x62: # JZ (flag bit0 == 1)
taken = (ctx.flag & 1) != 0
elif opcode == 0x63: # JNZ (flag bit0 == 0)
taken = (ctx.flag & 1) == 0
elif opcode == 0x64: # JN (flag bit1 == 1)
taken = (ctx.flag & 2) != 0
elif opcode == 0x68: # JMP inconditionnel
taken = True
if taken:
target = ctx.get_memory_emu_dword(ctx.PC)
ctx.PC = target & 0xFFFFFFFF
print(f"TAKEN BRANCH | new pc = {hex(ctx.PC)}")
else:
print(f"NO TAKE BRANCH")
ctx.PC += 4
return
if opcode >= 0xC0:
dst_code = (opcode >> 3) & 7
src_code = opcode & 7
src_mode_indirect = (opcode & 4) == 0 # bit 2 == 0
ext_low2 = op_ext & 3 # v10 → src déréf
ext_high = op_ext >> 2 # v4>>2→ dst [Reg]
if src_mode_indirect:
# src = Reg
src_val = self._get_reg(src_code)
if ext_low2: # [Reg]
src_val = ctx.get_memory_emu_dword(src_val)
else:
# src = variantes 4 / 5 / 6
if src_code == 4: # src = [imm32]
addr = ctx.get_memory_emu_dword(ctx.PC); ctx.PC += 4
src_val = ctx.get_memory_emu_dword(addr)
elif src_code == 5: # src = imm32
src_val = ctx.get_memory_emu_dword(ctx.PC); ctx.PC += 4
elif src_code == 6: # src = SP / [SP]
src_val = ctx.SP
if ext_low2:
src_val = ctx.get_memory_emu_dword(src_val)
else:
self.continue_emu = 0
return
if ext_low2: # déréf interdit ici
self.continue_emu = 0
return
if ext_high: # dst = [RegDst]
if dst_code >= 4: # RegDst doit être A-D
self.continue_emu = 0
return
addr = self._get_reg(dst_code)
ctx.write_to_mem_dword(addr, src_val)
print(f"opcode EH {hex(addr)}={hex(src_val)}")
return
if (opcode & 0x20) == 0: # dst = RegDst
self._set_reg(dst_code, src_val)
print(f"_set_reg 0X20 {self.to_reg(dst_code)}={hex(src_val)}")
return
if dst_code != 4 or src_code == 6:
ctx.exec_failed = 1
self.continue_emu = 0
return
addr = ctx.get_memory_emu_dword(ctx.PC); ctx.PC += 4
ctx.write_to_mem_dword(addr, src_val)
print(f"opcode2 {hex(addr)}={hex(src_val)}")
return
sys.stderr.write(f"[E] invalid RegVM instruction, pc=0x{ctx.PC - 1:08X} leader=0x{opcode:02X}\n")
self.continue_emu = 0
def _get_reg(self, idx: int) -> int:
if idx == 0:
return self.ctx.A
if idx == 1:
return self.ctx.B
if idx == 2:
return self.ctx.C
if idx == 3:
return self.ctx.D
if idx == 4:
return self.ctx.SP
raise ValueError(f"Reg index out of range: {idx}")
def _set_reg(self, idx: int, value: int) -> None:
value &= 0xFFFFFFFF
if idx == 0:
self.ctx.A = value
elif idx == 1:
self.ctx.B = value
elif idx == 2:
self.ctx.C = value
elif idx == 3:
self.ctx.D = value
elif idx == 4:
self.ctx.SP = value
else:
raise ValueError(f"Reg index out of range: {idx}")
def to_reg(self, idx):
if idx == 0:
return "A"
elif idx == 1:
return "B"
elif idx == 2:
return "C"
elif idx == 3:
return "D"
elif idx == 4:
return "SP"
else:
raise ValueError(f"Reg index out of range: {idx}")
There is some opcode i din’t reverse in details as i can copy the decompiled outputs and cast them in python. my goal is mainly to analyze the dumped trace to understand the math operations behing the emulator.
I could also run the program with gdb, script some handlers and generate a trace. I have done that to dump some xor/mul operations in input2:
import gdb
gdb.execute("file multiarch")
gdb.execute("d")
def to_int(gdb_v:gdb.Value):
return int(gdb_v.cast(gdb.lookup_type('long long')))
class Dumper(gdb.Breakpoint):
def __init__(self, bp, opcode):
super().__init__(bp)
self.opcode = opcode
def stop(self):
if self.opcode == 1:
rax = to_int(gdb.parse_and_eval("(uint64_t *)$rax"))
rdx = to_int(gdb.parse_and_eval("(uint64_t *)$rdx"))
result = rax * rdx
print(f"{hex(rax)}*{hex(rdx)}={hex(result)}")
return False
elif self.opcode == 2:
eax = to_int(gdb.parse_and_eval("(uint32_t *)$eax"))
addr = to_int(gdb.parse_and_eval("$rbx + $rdx * 4 + 0x3B"))
v1 = to_int(gdb.parse_and_eval(f"(uint32_t)*(uint32_t *)({addr})"))
print(f"{hex(eax)} ^{hex(v1)}={hex(eax ^ v1 )}")
return False
bp2 = Dumper("*0x0005555555566CE", 2)
bp3 = Dumper("*0x00005555555567F8", 1)
gdb.execute("r crackme.masm")
This code will be used to review some math operations during stage2 We can run the emulator to get the new trace :
... trace part1
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0xc5 | PC=0x105a
_set_reg 0X20 A=0x2 // syscall no 2 for read_and print
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0xcd | PC=0x105f
_set_reg 0X20 B=0x20d8 // "Challenge 2" addr
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0xd5 | PC=0x1064
_set_reg 0X20 C=0x1e // string size
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0x1 | PC=0x1069
check_syscall_specific
[SYSCALL] no=2
read_and_print
ReadAndPrint -> bytearray(b'Challenge 2 - Tell me a joke: ')
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0x31 | PC=0x106a
ctx.SP= 0x8ee6 // set new SP
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0xce | PC=0x1070
_set_reg 0X20 B=0x8ee6
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0x12 | PC=0x1071
push reg : B -> 0x8ee6
push_dword_to_stack -> 0x8ee6
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0xd5 | PC=0x1072
_set_reg 0X20 C=0x20 // push size 0X20 => user_input_len
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0xc5 | PC=0x1077
_set_reg 0X20 A=0x1 // syscall no 1 : read_user_input(size)
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0x1 | PC=0x107c
check_syscall_specific
[SYSCALL] no=1
read_user_input_
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0x15 | PC=0x107d
pop_dword_from_stack -> 0x8ee6
pop reg : A -> 0x8ee6
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0xcd | PC=0x107e
_set_reg 0X20 B=0x20
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0x60 | PC=0x1083
push_dword_to_stack -> 0x1088 // push current PC before call
---[ PC=0x0000111C SP=0x00008EE2 | A=0x00008EE6 B=0x00000020 C=0x00000020 D=0x00000000
---[ STACK CONTENTS
0x00008EDA 0x00000000
0x00008EDE 0x00000000
* 0x00008EE2 0x00001088 // return adress
0x00008EE6 0x2E203320 // our user input is on stack
0x00008EEA 0x2E202020
0x00008EEE 0x41414141
0x00008EF2 0x41414141
call 0x111c // call func
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0xd0 | PC=0x111c
_set_reg 0X20 C=0x8ee6
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0x20 | PC=0x111d
0x20 ->A=0x8f06
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0x11 | PC=0x111f
push reg : A -> 0x8f06
push_dword_to_stack -> 0x8f06
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0xcd | PC=0x1120
_set_reg 0X20 B=0x0
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0xa1 | PC=0x1125
_set_reg 0X20 D=0x2e203320 // first dword of our input
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0x51 | PC=0x1127
MUL 0x51-> 0xcafebabe*0x2e203320 = 0x24934def9acb31c0 // multiply our dword by 0xcafebabe
MUL 0x51 A->0x9acb31c0 | D=0x24934def // take lower32bits of mul
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0x40 | PC=0x112d
XOR 0x40 -> B= 0x0 ^ 0x24934def=0x24934def // xor with a accumulator
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0x15 | PC=0x112f
pop_dword_from_stack -> 0x8f06
pop reg : A -> 0x8f06
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0x11 | PC=0x1130
push reg : A -> 0x8f06
push_dword_to_stack -> 0x8f06
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0x72 | PC=0x1131
set_flag : 0x0|0x2
set_flag -> 0
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0x62 | PC=0x1132
opcode JZ/JNZ/JN/JMP 98
NO TAKE BRANCH
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0x21 | PC=0x1137
0X21 set SP=0x8ee2
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0x68 | PC=0x113d
opcode JZ/JNZ/JN/JMP 104
TAKEN BRANCH | new pc = 0x1125 // while (dword != null)
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0xa1 | PC=0x1125
_set_reg 0X20 D=0x2e203320
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0x51 | PC=0x1127
MUL 0x51-> 0xcafebabe*0x2e203320 = 0x24934def9acb31c0
MUL 0x51 A->0x
This xor loop until dword is finish.
The loop is a simple xor accumulator it take a dword for user input (32char => 8 iteration), multiply with 0xcafebabe , take lower 32bits of mul, then xor with previous acu.
So we have :
((X1 x 0xcafebabe)»32 ^ 0) ^ ((X2 x 0xcafebabe)»32) ^ … = Y
when returning from call, we have a check condition / set_flag with constant 0x7331
So me must find a 32input that return acu=0x7331 after the 8xor loops.
After hardcore personnal reflexion, uh chat gpt, the input that validate the second question can be : “ 3 . .AAAAAAAABBBBBBBBCCCCCCCC” => this return 0x7331 and pass the check.
Third input
----------------------------------------------------------------------------------------------------
opcode=0x1 | PC=0x10ab
check_syscall_specific
[SYSCALL] no=0
read_user_input_dword
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0xc8 | PC=0x10ac
_set_reg 0X20 B=0x2b6043c
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0xc5 | PC=0x10ad
_set_reg 0X20 A=0x3
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0x1 | PC=0x10b2
check_syscall_specific
[SYSCALL] no=3
syscall srand(0x2b6043c)
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0xd5 | PC=0x10b3
_set_reg 0X20 C=0x0
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0x60 | PC=0x10b8
push_dword_to_stack -> 0x10bd
---[ PC=0x00001145 SP=0x00008EE2 | A=0x00000003 B=0x02B6043C C=0x00000000 D=0x24934DEF
---[ STACK CONTENTS
0x00008EDA 0x02000000
0x00008EDE 0x000020F6
* 0x00008EE2 0x000010BD
0x00008EE6 0x2E203320
0x00008EEA 0x2E202020
0x00008EEE 0x41414141
0x00008EF2 0x41414141
call 0x1145
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0xc5 | PC=0x1145
_set_reg 0X20 A=0x133700
Arch=0
----------------------------------------------------------------------------------------------------
emu v1=0x10|v2=0x4|PC=0x114a
push_byte_to_stack -> 0x4
Arch=0
----------------------------------------------------------------------------------------------------
emu v1=0xa0|v2=0x0|PC=0x114f
check_syscall_specific
pop_byte_from_stack -> 0x4
syscall no -> 4
syscall rand()=0x7a213a1c
push_dword_to_stack -> 0x7a213a1c
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0x16 | PC=0x1154
pop_dword_from_stack -> 0x7a213a1c
pop reg : B -> 0x7a213a1c
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0x40 | PC=0x1155
XOR 0x40 -> A= 0x133700 ^ 0x7a213a1c=0x7a320d1c
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0x11 | PC=0x1157
push reg : A -> 0x7a320d1c
push_dword_to_stack -> 0x7a320d1c
Arch=0
----------------------------------------------------------------------------------------------------
emu v1=0x30|v2=0xf2f2|PC=0x1158
push_dword_to_stack -> 0xf2f2f2f2
Arch=0
----------------------------------------------------------------------------------------------------
emu v1=0x62|v2=0x0|PC=0x115d
pop_dword_from_stack -> 0xf2f2f2f2
pop_dword_from_stack -> 0x7a320d1c
Push Stack OPERATION : 0x62 -> 0xf2f2f2f2^0x7a320d1c = 0x88c0ffee
push_dword_to_stack -> 0x88c0ffee
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0x15 | PC=0x1162
pop_dword_from_stack -> 0x88c0ffee
pop reg : A -> 0x88c0ffee
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0x61 | PC=0x1163
pop_dword_from_stack -> 0x10bd
ret 0x10bd
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0x10 | PC=0x10bd
push imm32 : 0xffffff
push_dword_to_stack -> 0xffffff
Arch=1
----------------------------------------------------------------------------------------------------
opcode=0x11 | PC=0x10c2
push reg : A -> 0x88c0ffee
push_dword_to_stack -> 0x88c0ffee
Arch=0
----------------------------------------------------------------------------------------------------
emu v1=0x63|v2=0x0|PC=0x10c3
pop_dword_from_stack -> 0x88c0ffee
pop_dword_from_stack -> 0xffffff
Push Stack OPERATION : 0x63 -> 0x88c0ffee&0xffffff = 0xc0ffee
push_dword_to_stack -> 0xc0ffee
Arch=0
----------------------------------------------------------------------------------------------------
emu v1=0x30|v2=0xffee|PC=0x10c8
push_dword_to_stack -> 0xc0ffee
Arch=0
----------------------------------------------------------------------------------------------------
emu v1=0x80|v2=0x0|PC=0x10cd
pop_dword_from_stack -> 0xc0ffee
pop_dword_from_stack -> 0xc0ffee
set_flag -> 1
Arch=0
Last question is a switch mode between arch 1 and arch2. When reviewing the trace, it ask a dword for user, then srand(user_dword) and do some rand().
it then xor some generated random int, mask it and some others operations. It check if computed data equals to another hardcoded data.
We can resume this as :
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
int main(void)
{
const uint32_t XOR1 = 0x00133700;
const uint32_t XOR2 = 0xF2F2F2F2;
const uint32_t TARGET = 0x00C0FFEE;
const uint32_t MASK = 0x00FFFFFF;
for (uint32_t seed = 0; ; ++seed) {
srand(seed);
uint32_t val = (uint16_t)rand(); // HIGH
val |= ((uint32_t)(rand())) << 16;
uint32_t result = (val ^ XOR1 ^ XOR2) & MASK;
if (result == TARGET) {
printf("[+] Seed trouvé : 0x%08X (%u)\n", seed, seed);
printf(" gen_val = 0x%08X\n", val);
printf(" result = 0x%06X\n", result);
return 0;
}
if (seed == 0xFFFFFFFF) {
puts("[-] Aucun seed trouvé.");
return 1;
}
}
}
This code return 45483068
after 3minutes, which validate last question!
We got the flag (stored in a env on remote server) : CTF{st3ph3n_str4ng3_0nly_us3s_m1ps_wh4t_a_n00b}
a nice not so hard chall but long chall about reversing emulators and analysing trace.
Full emulator :
import os
import struct
import sys
from io import BytesIO
import ctypes
import mmap
libc = ctypes.CDLL("libc.so.6")
class EmuCtx:
def __init__(self):
self.code = None
self.data = None
self.mmap3 = None
self.extra = None
self.extra_size = 0
self.PC = 0
self.SP = 0
self.dynamic = bytearray(60)
self.size_unk = 0
self.A = 0
self.B = 0
self.C = 0
self.D = 0
self.flag = 0
self.execute_as_system = 0
self.cpt = 0
self.patch_id = 0
self.first_time=0
def read_mem(self, offset, size):
end = offset + size
# code segment: 0x1000–0x1FFF
if 0x1000 <= offset <= 0x1FFF and end <= 0x2000:
#print(f"read code {hex(offset)}:{size} -> {self.code[offset-0x1000:offset-0x1000+size]}")
return self.code[offset - 0x1000:end - 0x1000]
# data segment: 0x2000–0x2FFF
if 0x2000 <= offset <= 0x2FFF and end <= 0x3000:
#print(f"read data {hex(offset)}:{size} -> {self.data[offset-0x2000:offset-0x2000+size]}")
return self.data[offset - 0x2000:end - 0x2000]
# extra segment: 0x8000–0x8FFF
if 0x8000 <= offset <= 0x8FFF and end <= 0x9000:
#print(f"read extra {hex(offset)}:{size} -> {self.extra[offset-0x8000:offset-0x8000+size]}")
return self.extra[offset-0x8000:end - 0x8000]
for entry in self.dynamic[:self.size_unk]:
base = entry["base"]
if base <= offset < base + 512 and end <= base + 512:
offset = offset - base
return entry["ptr"][offset:offset + size if size else None]
raise Exception("Invalid readmem")
def set_flag(self, a2, a3):
diff = a2 - a3
flag = 2 * ((diff >> 31) & 1) # sets flag = 2 if negative
if diff == 0:
flag = 1 # overwrite if result is zero
print(f"set_flag -> {flag}")
self.flag = flag | 0x4 # always OR with 0x4
def write_to_mem_dword(self, addr: int, value: int) -> bool:
self.write_mem(addr, value & 0xFFFFFFFF, 4)
def write_mem(self, offset, data, size, format=int):
end = offset + size
#print(f"write_mem {hex(offset)} = {data}|{size}")
if 0x1000 <= offset <= 0x1FFF and end <= 0x2000:
if format == bytes:
self.code[offset - 0x1000:end - 0x1000] = data
else:
self.code[offset - 0x1000:end - 0x1000] = data.to_bytes(size, byteorder="little")
return
# data segment: 0x2000–0x2FFF
if 0x2000 <= offset <= 0x2FFF and end <= 0x3000:
if format == bytes:
self.data[offset - 0x2000:end - 0x2000] = data
else:
self.data[offset - 0x2000:end - 0x2000] = data.to_bytes(size, byteorder="little")
return
# extra segment: 0x8000–0x8FFF
if 0x8000 <= offset <= 0x8FFF and end <= 0x9000:
if format == bytes:
self.extra[offset - 0x8000:end - 0x8000] = data
else:
self.extra[offset - 0x8000:end - 0x8000] = data.to_bytes(size, byteorder="little")
return
raise Exception("todo ptr_array")
def read_user_input_dword(self):
if self.first_time==0:
self.first_time +=1
return 0x8f5a547a
else:
return 45483068
def read_user_input_(self, offset, size):
x = b" 3 . .AAAAAAAABBBBBBBBCCCCCCCC"
self.write_mem(offset, x, size, format=bytes)
def read_and_print(self, v1, v2):
x = self.read_mem(v1, v2)
print(f"ReadAndPrint -> {x}")
def check_syscall_specific(self):
print("check_syscall_specific")
syscall_no = self.A & 0xFF # LOBYTE equivalent
if syscall_no <= 5:
return True
elif syscall_no == 6:
return self.execute_as_system != 0
else:
sys.stderr.write(f"[E] invalid syscall! 0x{syscall_no:X}\n")
return False
def get_memory_emu_dword(self, offset):
return int.from_bytes(self.read_mem(offset, 4), byteorder="little")
def push_byte_to_stack(self, data):
x = self.SP - 1
self.SP = x
print(f"push_byte_to_stack -> {hex(data)}")
self.write_mem(x, data, 1)
def push_short_to_stack(self, data):
x = self.SP - 2
self.SP = x
print(f"push_short_to_stack -> {hex(data)}")
self.write_mem(x, data, 2)
def push_dword_to_stack(self, data):
x = self.SP - 4
self.SP = x
print(f"push_dword_to_stack -> {hex(data)}")
self.write_mem(x, data, 4)
def pop_byte_from_stack(self):
r = self.read_mem(self.SP, 1)
self.SP += 1
x = int.from_bytes(r, byteorder="little")
print(f"pop_byte_from_stack -> {hex(x)}")
return x
def pop_short_from_stack(self):
r = self.read_mem(self.SP, 2)
self.SP += 2
x = int.from_bytes(r, byteorder="little")
print(f"pop_short_from_stack -> {hex(x)}")
return x
def pop_dword_from_stack(self):
r = self.read_mem(self.SP, 4)
self.SP += 4
x = int.from_bytes(r, byteorder="little")
print(f"pop_dword_from_stack -> {hex(x)}")
return x
def initialize_emu_ctx(self, seg_metadata):
self.code = bytearray(0x1000)
self.data = bytearray(0x1000)
code_data,code_size = seg_metadata['code']
data_data,data_size = seg_metadata['data']
extra_data,extra_size = seg_metadata['extra']
self.extra = bytearray(0x10000)
self.extra_size = extra_size
self.PC = 0x1000
self.SP = 0x8F00
self.code[:code_size] = code_data[:code_size]
self.data[:data_size] = data_data[:data_size]
self.extra[:extra_size] = extra_data[:extra_size]
def dump_emu(self, print_stack=False):
print(
f" ---[ PC=0x{self.PC:08X} SP=0x{self.SP:08X} | "
f"A=0x{self.A:08X} B=0x{self.B:08X} "
f"C=0x{self.C:08X} D=0x{self.D:08X}"
)
if print_stack:
print(" ---[ STACK CONTENTS")
for i in range(-8, 20, 4):
sp_offset = self.SP + i
value = self.get_memory_emu_dword(sp_offset)
prefix = "* " if sp_offset == self.SP else " "
print(f"\t{prefix}0x{sp_offset:08X} 0x{value:08X}")
class Emulator():
def __init__(self, masm):
self.masm = masm
self.fd = open(masm, "rb")
self._load_segments()
self.ctx = EmuCtx()
self.continue_emu = 1
self.ctx.initialize_emu_ctx(self.segments_metadata)
def __del__(self):
self.fd.close()
def _load_segments(self):
if self.fd.read(4) != b"MASM":
sys.stderr.write("[E] bad magic\n")
sys.exit()
self.segments_metadata = {}
self.fd.seek(4)
self.read_segments(4)
self.read_segments(9)
self.read_segments(14)
def read_segments(self, size_offset):
self.fd.seek(size_offset)
segment_type_raw = self.fd.read(1)
segment_type = struct.unpack("B", segment_type_raw)[0]
offset_raw = self.fd.read(2)
offset = struct.unpack("<H", offset_raw)[0]
size_raw = self.fd.read(2)
size = struct.unpack("<H", size_raw)[0]
self.fd.seek(offset)
data = self.fd.read(size)
print(f"Segment : {segment_type} | offset={offset} | size={size}")
if segment_type == 1:
self.segments_metadata['code'] = (data, size)
elif segment_type == 2:
self.segments_metadata['data'] = (data, size)
elif segment_type == 3:
self.segments_metadata['extra'] = (data, size)
else:
sys.stderr.write(f"[E] invalid segment type: {segment_type}\n")
return False
return True
def get_arch_mode(self):
pc = self.ctx.PC
index = pc - 4089
if ( pc - 4096 >= 0 ):
index = pc - 4096
data,_ = self.segments_metadata['extra']
x = data[index >> 3]
#print(x, x>0, hex(pc))
if pc == 0x1097:
return True
return ((x >> (pc & 7)) & 1)
def run_step_arch1(self):
value2 = 0
x = self.ctx.read_mem(self.ctx.PC, 5)
value1 = x[0]
value2= int.from_bytes(x[1:3], byteorder="little")
value3= int.from_bytes(x[1:], byteorder="little")
print("--"*50)
print(f"emu v1={hex(value1)}|v2={hex(value2)}|PC={hex(self.ctx.PC)}")
#self.ctx.dump_emu(print_stack=1)
if value1 <= 0x80:
if value1 == 0x10:
self.ctx.push_byte_to_stack(value2)
self.ctx.PC += 5
return
elif value1 == 0x20:
self.ctx.push_short_to_stack(value2)
self.ctx.PC += 5
return
elif value1 == 0x30:
self.ctx.push_dword_to_stack(value3)
self.ctx.PC += 5
return
elif value1 == 0x40:
x = int.from_bytes(self.ctx.read_mem(value2, 5), byteorder="little")
self.ctx.push_dword_to_stack(x)
self.ctx.PC += 5
return
elif value1 == 0x41:
raise Exception
elif value1 == 0x50:
_ = self.ctx.pop_dword_from_stack()
self.ctx.PC += 5
return
elif value1 in (0x60, 0x61, 0x62, 0x63):
v1 = self.ctx.pop_dword_from_stack()
v2 = self.ctx.pop_dword_from_stack()
op = { 0x60: lambda a, b: a + b,
0x61: lambda a, b: a - b,
0x62: lambda a, b: a ^ b,
0x63: lambda a, b: a & b,
}[value1]
def get_op(v):
if v == 0x60:
return "+"
if v == 0x61:
return "-"
if v == 0x62:
return "^"
return "&"
print(f"Push Stack OPERATION : {hex(value1)} -> {hex(v1)}{get_op(value1)}{hex(v2)} = {hex(op(v1,v2))}")
self.ctx.push_dword_to_stack(op(v1, v2))
self.ctx.PC += 5
return
elif value1 in (0x70, 0x71, 0x72):
print(f"check_cond {hex(value1)}")
cond = (self.ctx.flag & 1) != 0
if (value1 == 0x71 and cond) or (value1 == 0x72 and not cond) or value1 == 0x70:
print(f"NEW PC SET -> {hex(value2)}")
self.ctx.PC = value2
return
self.ctx.PC += 5
return
elif value1 == 0x80:
v1 = self.ctx.pop_dword_from_stack()
v2 = self.ctx.pop_dword_from_stack()
self.ctx.set_flag(v1, v2)
self.ctx.PC += 5
return
else:
sys.stderr.write(f"[E] invalid StackVM instruction, pc=0x{self.ctx.PC:X} leader=0x{value1:X}\n")
self.continue_emu = 0
return 0
# == Syscall handling ==
elif value1 == 0xA0:
if not self.ctx.check_syscall_specific():
sys.stderr.write("[E] can't execute that syscall!\n")
self.continue_emu = 0
return
syscall_no = self.ctx.pop_byte_from_stack()
print(f"syscall no -> {syscall_no}")
if syscall_no == 0:
self.ctx.push_dword_to_stack(self.ctx.read_user_input_dword())
print("read_user_input_dword")
self.ctx.PC += 5
return
elif syscall_no == 1:
sys.stderr.write("[E] unsupported syscall!\n")
elif syscall_no == 2:
v1 = self.ctx.pop_dword_from_stack()
v2 = self.ctx.pop_dword_from_stack()
self.ctx.read_and_print(v1, v2)
self.ctx.PC += 5
return
elif syscall_no == 3:
v1 = self.ctx.pop_dword_from_stack()
print(f"syscall srand({hex(v1)})")
libc.srand(v1)
self.ctx.PC += 5
elif syscall_no == 4:
low = libc.rand() & 0xFFFF
high = libc.rand() << 16
gen_val = (high | low) & 0xFFFFFFFF
print(f"syscall rand()={hex(gen_val)}")
self.ctx.push_dword_to_stack(gen_val)
self.ctx.PC += 5
return
elif syscall_no == 5:
print(b"CTF{test_flag}")
self.ctx.PC += 5
return
elif syscall_no == 6:
v1 = self.ctx.pop_dword_from_stack()
raise Exception("todo")
self.ctx.PC += 5
return
else:
sys.stderr.write("[E] bad syscall!\n")
self.continue_emu = 0
return
# == Halt instruction ==
elif value1 == 0xFF:
self.continue_emu = 0
return
# == Unknown instruction ==
else:
sys.stderr.write(f"[E] invalid StackVM instruction, pc=0x{self.ctx.PC:X} leader=0x{value1:X}\n")
self.continue_emu = 0
return
def run_step_arch2(self):
ctx = self.ctx
print("--"*50)
# ------------------------------------------------------ FETCH --- #
opcode1 = ctx.read_mem(ctx.PC, 1)[0]
print(f"opcode={hex(opcode1)} | PC={hex(ctx.PC)}")
ctx.PC += 1
op_ext = 0 # « v4 » dans le C ; nibbles d’extension
opcode = opcode1
# --------------------------- pré-décodage 0xAx? à deux octets --- #
if (opcode1 >> 4) == 0xA: # 0xA0–0xAF
op_ext = opcode1 & 0xF
opcode = ctx.read_mem(ctx.PC, 1)[0] # vrai opcode
ctx.PC += 1
# ------------------------------------------------------ HALT ---- #
if opcode == 0x00: # 0: arrêt net
self.continue_emu = 0
return
# --------------------------------------------------- SYSCALL ---- #
if opcode == 0x01:
if not ctx.check_syscall_specific():
self.continue_emu = 0
return
sc_no = ctx.A & 0xFF
print(f"[SYSCALL] no={sc_no}")
if sc_no == 0: # read dword -> A
print("read_user_input_dword")
ctx.A = ctx.read_user_input_dword()
elif sc_no == 1: # read dword -> A
print("read_user_input_10char")
ctx.read_user_input_(ctx.B, ctx.C)
elif sc_no == 2: # read&print
print("read_and_print")
ctx.read_and_print(ctx.B, ctx.C)
elif sc_no == 3: # srand(B)
print(f"syscall srand({hex(ctx.B)})")
libc.srand(ctx.B)
elif sc_no == 4: # rand32 -> A
print("rand()")
low = libc.rand() & 0xFFFF
high = libc.rand() << 16
ctx.A = (high | low) & 0xFFFFFFFF
elif sc_no == 5: # flag !
print(b"CTF{test_flag}")
else:
sys.stderr.write("[E] bad syscall!\n")
self.continue_emu = 0
return
# ---------------------------------------------------- PUSH IMM -- #
if opcode == 0x10: # push imm32
imm = ctx.get_memory_emu_dword(ctx.PC)
ctx.PC += 4
print(f"push imm32 : {hex(imm)}")
ctx.push_dword_to_stack(imm)
return
# ---------------------------------------------- PUSH REG 0x11-14 #
if 0x11 <= opcode <= 0x14: # push A/B/C/D
reg_idx = opcode - 0x11
print(f"push reg : {self.to_reg(reg_idx)} -> {hex(self._get_reg(reg_idx))}")
ctx.push_dword_to_stack(self._get_reg(reg_idx))
return
# ---------------------------------------------- POP REG 0x15-18 #
if 0x15 <= opcode <= 0x18: # pop A/B/C/D
reg_idx = opcode - 0x15
val = ctx.pop_dword_from_stack()
print(f"pop reg : {self.to_reg(reg_idx)} -> {hex(val)}")
self._set_reg(reg_idx, val)
return
# ---------------------------------------------------- SET_FLAG -- #
if (opcode >> 4) == 0x7: # 0x70-0x7F
dst = (opcode >> 2) & 3
src = opcode & 3
print(f"set_flag : {hex(dst)}|{hex(src)}")
ctx.set_flag(self._get_reg(dst), self._get_reg(src))
return
# --------------------------------------------- SET_FLAG IMM 0x8x #
if (opcode >> 4) == 0x8: # 0x80-0x8F
imm = ctx.get_memory_emu_dword(ctx.PC)
ctx.PC += 4
dst = opcode & 3
print(f"set_flag_reg : {hex(self._get_reg(dst))}|{hex(imm)}")
if ctx.PC == 0x108d:
ctx.set_flag(0x7331, 0x7331)
return
ctx.set_flag(self._get_reg(dst), imm)
return
# -------------------------------------------------- ADD 0x20 ---- #
if opcode == 0x20:
byte = ctx.read_mem(ctx.PC, 1)[0]
ctx.PC += 1
dst = ((byte >> 4) - 1) & 3
src = ((byte & 0xF) - 1) & 3
res = (self._get_reg(dst) + self._get_reg(src)) & 0xFFFFFFFF
print(f"0x20 ->{self.to_reg(dst)}={hex(res)}")
self._set_reg(dst, res)
return
# ---------------------------------------------- ADD IMM -> SP -- #
if opcode == 0x21:
byte = ctx.read_mem(ctx.PC, 1)[0]
ctx.PC += 1
imm = ctx.get_memory_emu_dword(ctx.PC)
ctx.PC += 4
print(f"0X21 set SP={hex(imm+ctx.SP)}")
ctx.SP = (ctx.SP + imm) & 0xFFFFFFFF
return
# -------------------------------------------------- SUB 0x30 ---- #
if opcode == 0x30:
byte = ctx.read_mem(ctx.PC, 1)[0]
ctx.PC += 1
dst = ((byte >> 4) - 1) & 3
src = ((byte & 0xF) - 1) & 3
res = (self._get_reg(dst) - self._get_reg(src)) & 0xFFFFFFFF
self._set_reg(dst, res)
print(f"0X30 -> {self.to_reg(dst)}={hex(res)}")
return
# ---------------------------------------------- SUB IMM / SP 31 -- #
if opcode == 0x31:
byte = ctx.read_mem(ctx.PC, 1)[0]
ctx.PC += 1
imm = ctx.get_memory_emu_dword(ctx.PC)
ctx.PC += 4
high = byte >> 4
if 1 <= high <= 4: # A…D
dst = (high - 1) & 3
self._set_reg(dst, (self._get_reg(dst) - imm) & 0xFFFFFFFF)
print(f"set_reg {self.to_reg(dst)}={hex((self._get_reg(dst) - imm) & 0xFFFFFFFF)}")
return
if high == 5: # SP -= imm
print(f"ctx.SP= {hex((ctx.SP - imm) & 0xFFFFFFFF)}")
ctx.SP = (ctx.SP - imm) & 0xFFFFFFFF
return
self.continue_emu = 0
return
# -------------------------------------------------- XOR 0x40 ---- #
if opcode == 0x40:
byte = ctx.read_mem(ctx.PC, 1)[0]
ctx.PC += 1
dst = ((byte >> 4) - 1) & 3
src = ((byte & 0xF) - 1) & 3
v1 = self._get_reg(dst)
v2 = self._get_reg(src)
r = v1 ^ v2
print(f"XOR 0x40 -> {self.to_reg(dst)}= {hex(v1)} ^ {hex(v2)}={hex(r)}")
self._set_reg(dst, r)
return
# -------------------------------------------------- MUL 0x50 ---- #
if opcode == 0x50:
byte = ctx.read_mem(ctx.PC, 1)[0]
ctx.PC += 1
r1 = ((byte >> 4) - 1) & 3
r2 = ((byte & 0xF) - 1) & 3
prod = (self._get_reg(r1) * self._get_reg(r2)) & 0xFFFFFFFFFFFFFFFF
ctx.A = prod & 0xFFFFFFFF
ctx.D = (prod >> 32) & 0xFFFFFFFF
print(f"MUL 0x50-> A={hex(ctx.A)}|D={hex(ctx.D)}")
return
# -------------------------------------------------- MUL IMM 0x51 -#
if opcode == 0x51:
byte = ctx.read_mem(ctx.PC, 1)[0]
ctx.PC += 1
imm = ctx.get_memory_emu_dword(ctx.PC)
ctx.PC += 4
r2 = (((byte >> 4) + 3) & 3) # formule du binaire
prod = (imm * self._get_reg(r2)) & 0xFFFFFFFFFFFFFFFF
print(f"MUL 0x51-> {hex(imm)}*{hex(self._get_reg(r2))} = {hex(prod)}")
ctx.A = prod & 0xFFFFFFFF
ctx.D = (prod >> 32) & 0xFFFFFFFF
print(f"MUL 0x51 A->{hex(ctx.A)} | D={hex(ctx.D)}")
return
# -------------------------------------------------- CALL 0x60 --- #
if opcode == 0x60:
target = ctx.get_memory_emu_dword(ctx.PC)
ctx.PC += 4
ctx.push_dword_to_stack(ctx.PC) # adresse de retour
ctx.PC = target & 0xFFFFFFFF
ctx.dump_emu(print_stack=True)
print(f"call {hex(ctx.PC)}")
return
# --------------------------------------------------- RET 0x61 --- #
if opcode == 0x61:
byte = ctx.read_mem(ctx.PC, 1)[0]
ctx.PC += 1
if ctx.patch_id == 0:
ctx.SP = 0x00008EE2
ctx.patch_id += 1
else:
ctx.SP = (ctx.SP + 4 * byte) & 0xFFFFFFFF
ctx.PC = ctx.pop_dword_from_stack()
print(f"ret {hex(ctx.PC)}")
return
# ------------------------------------- JZ / JNZ / JN / JMP ------ #
if opcode in (0x62, 0x63, 0x64, 0x68):
print(f"opcode JZ/JNZ/JN/JMP {opcode}")
ctx.cpt += 1
#ctx.dump_emu(print_stack=True)
if ctx.cpt == 19:
print(f"TAKE BRANCH2")
target = ctx.get_memory_emu_dword(ctx.PC)
ctx.PC = target & 0xFFFFFFFF
return
taken = False
if opcode == 0x62: # JZ (flag bit0 == 1)
taken = (ctx.flag & 1) != 0
elif opcode == 0x63: # JNZ (flag bit0 == 0)
taken = (ctx.flag & 1) == 0
elif opcode == 0x64: # JN (flag bit1 == 1)
taken = (ctx.flag & 2) != 0
elif opcode == 0x68: # JMP inconditionnel
taken = True
if taken:
target = ctx.get_memory_emu_dword(ctx.PC)
ctx.PC = target & 0xFFFFFFFF
print(f"TAKEN BRANCH | new pc = {hex(ctx.PC)}")
else:
print(f"NO TAKE BRANCH")
ctx.PC += 4
return
# ------------------------------------------------------ TODO ---- #
# Les opcodes >= 0xC0 gèrent plusieurs modes d’adressage mémoire.
if opcode >= 0xC0:
dst_code = (opcode >> 3) & 7
src_code = opcode & 7
src_mode_indirect = (opcode & 4) == 0 # bit 2 == 0
ext_low2 = op_ext & 3 # v10 → src déréf
ext_high = op_ext >> 2 # v4>>2→ dst [Reg]
# --------- 1) Récupérer la valeur source -------- #
if src_mode_indirect:
# src = Reg
src_val = self._get_reg(src_code)
if ext_low2: # [Reg]
src_val = ctx.get_memory_emu_dword(src_val)
else:
# src = variantes 4 / 5 / 6
if src_code == 4: # src = [imm32]
addr = ctx.get_memory_emu_dword(ctx.PC); ctx.PC += 4
src_val = ctx.get_memory_emu_dword(addr)
elif src_code == 5: # src = imm32
src_val = ctx.get_memory_emu_dword(ctx.PC); ctx.PC += 4
elif src_code == 6: # src = SP / [SP]
src_val = ctx.SP
if ext_low2:
src_val = ctx.get_memory_emu_dword(src_val)
else:
self.continue_emu = 0
return
if ext_low2: # déréf interdit ici
self.continue_emu = 0
return
# --------- 2) Écrire le résultat dans la destination -------- #
if ext_high: # dst = [RegDst]
if dst_code >= 4: # RegDst doit être A-D
self.continue_emu = 0
return
addr = self._get_reg(dst_code)
ctx.write_to_mem_dword(addr, src_val)
print(f"opcode EH {hex(addr)}={hex(src_val)}")
return
if (opcode & 0x20) == 0: # dst = RegDst
self._set_reg(dst_code, src_val)
print(f"_set_reg 0X20 {self.to_reg(dst_code)}={hex(src_val)}")
return
# dst = [imm32] (seulement si dst==SP et src!=6)
if dst_code != 4 or src_code == 6:
ctx.exec_failed = 1
self.continue_emu = 0
return
addr = ctx.get_memory_emu_dword(ctx.PC); ctx.PC += 4
ctx.write_to_mem_dword(addr, src_val)
print(f"opcode2 {hex(addr)}={hex(src_val)}")
return
# -------------------------------------------------- Unknown ----- #
sys.stderr.write(f"[E] invalid RegVM instruction, pc=0x{ctx.PC - 1:08X} leader=0x{opcode:02X}\n")
self.continue_emu = 0
def _get_reg(self, idx: int) -> int:
"""Retourne A(0), B(1), C(2), D(3) ou SP(4)."""
if idx == 0:
return self.ctx.A
if idx == 1:
return self.ctx.B
if idx == 2:
return self.ctx.C
if idx == 3:
return self.ctx.D
if idx == 4:
return self.ctx.SP
raise ValueError(f"Reg index out of range: {idx}")
def _set_reg(self, idx: int, value: int) -> None:
"""Écrit A(0), B(1), C(2), D(3) ou SP(4). Valeur tronquée à 32 bits."""
value &= 0xFFFFFFFF
if idx == 0:
self.ctx.A = value
elif idx == 1:
self.ctx.B = value
elif idx == 2:
self.ctx.C = value
elif idx == 3:
self.ctx.D = value
elif idx == 4:
self.ctx.SP = value
else:
raise ValueError(f"Reg index out of range: {idx}")
def to_reg(self, idx):
if idx == 0:
return "A"
elif idx == 1:
return "B"
elif idx == 2:
return "C"
elif idx == 3:
return "D"
elif idx == 4:
return "SP"
else:
raise ValueError(f"Reg index out of range: {idx}")
def run_step(self):
arch = self.get_arch_mode()
print(f"Arch={arch}")
if (not arch):
self.run_step_arch1()
elif (arch == 1):
self.run_step_arch2()
else:
self.continue_emu = 0
def run(self):
print("[+] Emulator is running...")
while self.continue_emu:
self.run_step()
print("[+] Emu has ended")
self.ctx.dump_emu(print_stack=1)
if __name__ == "__main__":
emu = Emulator("./crackme.masm")
emu.run()
#Q1: 2405061754
#Q2: 3 . .AAAAAAAABBBBBBBBCCCCCCCC
#Q3: 45483068