Class 05: Introduction to Assembly History of Intel Microprocessors 4004: 1971, 2300 transistors, 108KHz First ever single-chip microprocessor. Designed for desktop calculator 8086/8088: 1978, 29K transistors, 5MHz Early 16-bit micro. Basis for IBM PC i386: 1985, 275K transistors, 16MHz Extend x86 from 16 to 32 bits. Basis for current linux systems Pentium, ..., Pentium 4: Got serious about performance/capacity. Overtook competitors Limitations of 32 bits: Only GB of virtual memory. Need to switch to 64 bits Itanium: 2001 16M transistors 800MHz VLIW. Hasn't worked. AMD x86-64: 2002 Opteron, Athlon Extension of x86 to 64 bits. Fully backward compatible. Intel Pentium 4 Xeon EM64T (code named Nocona): 2004 100M transistors 3.2GHz Saltwater fish machines Assembly Code Model: State: CPU Program Counter: %rip Registers: %rdi, %rsi, %rdx, %rcx (+ more stuff later) Memory: Linear array of bytes, but we split things up Executable code Global Data Stack (Procedure state & data) Heap (Dynamically allocated data) Basic operation: Fetch instruction at %eip Read values from registers and/or memory Perform arithmetic operation Write values to registers and/or memory Update %eip to next instruction Main properties of assembly code: Textual representation of individual machine instructions Much information from C program missing Variable names Data types Higher level control structures Elementary example long fun(long x, long y, long z) { long t = x * y - z; return t; } This is the code you'll find in the book: # IA32 code # x at 8(%epb), y at 12(%ebp) # Return value in %eax fun: pushl %ebp movl %esp, %ebp movl 12(%ebp), %eax imull 8(%ebp), %eax subl 16(%ebp), %eax leave ret This is the code we'll use in the class # x86-64 code # x in %rdi, y in %rsi, z in %rdx # return value in %rax fun: imulq %rsi, %rdi # x *= y subq %rdx, %rdi # x -= z movq %rdi, %rax # Return value = x ret # return This is what the real object code looks like: # Disassembled Object Code 0000000000400510 : 400510: 48 0f af fe imul %rsi,%rdi 400514: 48 29 d7 sub %rdx,%rdi 400517: 48 89 f8 mov %rdi,%rax 40051a: c3 retq Getting it with gdb: gdb asm-intro.64x (gdb) disass fun Dump of assembler code for function fun: 0x0000000000400510 : imul %rsi,%rdi 0x0000000000400514 : sub %rdx,%rdi 0x0000000000400517 : mov %rdi,%rax 0x000000000040051a : retq 0x000000000040051b : data16 0x000000000040051c : data16 0x000000000040051d : nop 0x000000000040051e : data16 0x000000000040051f : nop End of assembler dump. (gdb) x/11b fun 0x400510 : 0x48 0x0f 0xaf 0xfe 0x48 0x29 0xd7 0x48 0x400518 : 0x89 0xf8 0xc3 Let's run it: unix> ./asm-intro.64x 575 373 2400 f(575, 373, 2400) --> 212075 gdb asm-intro.64x (gdb) break fun Breakpoint 1 at 0x400510 (gdb) run 575 373 2400 Breakpoint 1, 0x0000000000400510 in fun () (gdb) print $rdi $1 = 575 (gdb) print $rsi $2 = 373 (gdb) print $rdx $3 = 2400 (gdb) print $rip $4 = (void (*)()) 0x400510 (gdb) stepi 0x0000000000400514 in fun () (gdb) print $rdi $5 = 214475 (gdb) stepi 0x0000000000400517 in fun () (gdb) print $rdi $6 = 212075 (gdb) stepi 0x000000000040051a in fun () (gdb) print $rax $7 = 212075 (gdb) stepi 0x0000000000400670 in main () (gdb) disass 0x400510,0x40051a What do we see: Argument data in registers, result in registers 4 instructions do the trick Data movement: movq instruction can do lots of things: Register Register movq %rdx, %rax Memory Register movq (%rdx), %rax Register Register movq %rdx, (%rax) (Can't do memory-memory) Memory Reads & Writes long read_store_ll(long new_val, long *dst) { long old_val = *dst; *dst = new_val; return old_val; } # new_val in %rdi, dst in %rsi # Return value in %rax read_store_ll: movq (%rsi), %rax # old_val = *dst (also return value) movq %rdi, (%rsi) # *dst = new_val ret 0000000000400520 : 400520: 48 8b 06 mov (%rsi),%rax 400523: 48 89 3e mov %rdi,(%rsi) 400526: c3 retq Let's give it a try: unix> ./asm-intro.64x 575 373 2400 y = 373; read_store_ll(575, &y) --> 373; y = 575 gdb asm-intro.64x (gdb) break read_store_ll Breakpoint 1 at 0x400520 (gdb) run 575 373 2400 Breakpoint 1, 0x0000000000400520 in read_store_ll () (gdb) print $rdi $1 = 575 (gdb) print /x $rsi $2 = 0x7ffffff240e8 (gdb) print *$rsi $4 = 373 (gdb) x/d 0x7ffffff240e8 0x7ffffff240e8: 373 (gdb) stepi 0x0000000000400523 in read_store_ll () (gdb) print $rax $5 = 373 (gdb) stepi 0x0000000000400526 in read_store_ll () (gdb) print *$rsi $6 = 575 (gdb) stepi 0x00000000004006b1 in main () (gdb) Mixing 32 & 64 bits So far, have only looked at pointers & longs (64 bits) What about int's? Lower 32 bits of registers named %edi, %esi, etc. movl instruction operates on 32 bit data int read_store_ii(int new_val, int *dst) { int old_val = *dst; *dst = new_val; return old_val; } # new_val in %edi (32 bits), dst in %rsi (64 bits) # Return value in %eax read_store_ii: movl (%rsi), %eax # read 4 bytes from %rsi to %eax (zero extends) movl %edi, (%rsi) # write 4 bytes to %rsi ret Conversions What if we need to convert 32 bits to 64? 32 bit instructions zero extend rest of register. E.g., movl %edi, %eax Use movslq to sign extend e.g., movslq %eax,%rax long read_store_il(int new_val, long *dst) { long old_val = *dst; long new_val_l = (long) new_val; *dst = new_val_l; return old_val; } Conversion requires explicit sign extension # new_val in %edi, dst in %rsi # Return value in %rax read_store_il: movq (%rsi), %rax # old_val = *dst (Return value) movslq %edi,%rdi # new_val_l (long) new_val (sign extend) movq %rdi, (%rsi) # *dst = new_val_l ret int read_store_li(long new_val, int *dst) { int old_val = *dst; int new_val_i = (int) new_val; *dst = new_val_i; return old_val; } Just chop off the upper 32 bits to do type conversion # new_val in %rdi, dst in %rsi # Return value in %eax read_store_li: movl (%rsi), %eax # old_val = *dst (Return value) movl %edi, (%rsi) # *dst = (int) new_val ret Immediate Data movl instruction can also do constant (immediate) data void set_tmin_i(int *dst) { *dst = INT_MIN; } set_tmin_i: movl $-2147483648, (%rdi) ret void set_tmin_l(long *dst) { *dst = LONG_MIN; } movq can do this too, as long as number can be represented in 32 bits void set_minus72(long *dst) { *dst = -72; } set_minus72: movq $-72, (%rdi) ret For moving really big constants, need to use movabsq set_tmin_l: movabsq $-9223372036854775808, %rax movq %rax, (%rdi) ret Pointers. Can see now that pointers are simply addresses: void swap1(long *xp, long *yp) { long t0 = *xp; long t1 = *yp; *yp = t0; *xp = t1; } # xp in %rdi, yp in %rsi swap1: movq (%rdi), %rax # t0 = *xp movq (%rsi), %rdx # t1 = *yp movq %rax, (%rsi) # *yp = t0 movq %rdx, (%rdi) # *xp = t1 ret Other kinds of references: %rsp Rsp (%rsp) M[Rsp] -24(%rsp) M[Rsp - 24] -24(%rsp,%rdi,8) M[Rsp + 8*Rdi -24] D(r1, r2, s) M[R1 + s*R2 + D] s = 1, 2, 4, 8 long choose(int c) { long t[2] = {-3, 5}; c &= 0x1; return t[c]; } What's going on: +-----------------------+ <== SP | | +-----------------------+ <-- SP-8 | t[1] | +-----------------------+ <-- SP-16 | t[0] | +-----------------------+ <-- SP-24 # c in %edi # Return value in %rax choose: andl $1, %edi # c &= 1 movq $-3, -24(%rsp) # t[0] = -3 movq $5, -16(%rsp) # t[1] = 5 movslq %edi,%rdi # Sign extend c movq -24(%rsp,%rdi,8), %rax (Either SP-24 or SP-16 ret Arithmetic Operations addq %rdx, %rax # %rax += %rdx leaq ADDR, Reg # Like address computation long arith(long x, long y, long z) { long t1 = x-y; long t2 = z+t1; long t3 = x+4; long t4 = y * 48; long t5 = t3 + t4; long rval = t2 * t5; return rval; } # x in %rdi, y in %rsi, z in %rdx # return value in %rax arith: movq %rdi, %rax # t1 = x (%rax=t1, %rdi=x, %rsi=y, %rdx=z) subq %rsi, %rax # t1 -= y leaq (%rsi,%rsi,2), %rsi # t4 = y*3 (%rax=t1, %rdi=x, %rsi=t4, %rdx=z) leaq (%rdx,%rax), %rax # t2 = z+t1 (%rax=t2, %rdi=x, %rsi=t4, %rdx=z) salq $4, %rsi # t4 *= 16 leaq 4(%rdi,%rsi), %rdi # t5 = 4+x+t4 (%rax=t2, %rdi=t5, %rsi=t4, %rdx=z) imulq %rdi, %rax # rval = t2*t5 (%rax=rval, %rdi=t5, %rsi=t4, %rdx=z) ret Important lessons: Use leaq to do (scaled) addition Replace multiplication by constant with shifts & adds Don't execute in program order Some steps skipped, split up, combined, ... Logical Operations long logical1(long x, long y) { long t1 = x^y; long t2 = t1 >> 17; long mask = (-1L << 17) + 7; long rval = t2 & mask; return rval; } # x in %rdi, y in %rsi # return value in %rax logical1: xorq %rsi, %rdi # t1 = x^y (%rdi:t1, %rsi:y) sarq $17, %rdi # t2 = t1 >> 17 (%rdi:t2, %rsi:y) # mask: -131065 == 0xfffffffffffe0007 andq $-131065, %rdi # rval = t2 & mask (%rdi:rval, %rsi:y) movq %rdi, %rax # Return rval ret Important Lessons gcc finds many ways to simplify constants Weird example long logical2(long x, long y) { long t1 = x^y; long t2 = t1 >> 17; long mask = (1L << 13) - 7; long rval = t2 & mask; return rval; } # x in %rdi, y in %rsi # return value in %rax logical2: xorq %rsi, %rdi sarq $17, %rdi andl $8185, %edi movq %rdi, %rax ret Use of andL is very clever here. Determined that upper 32 bits of mask = 0. ... and will give 0 there. Make use of fact that andl does zero extension Reverse Engineering Exercise # %x in %rdi, y in %rsi, z in %rdx # return value in %rax mystery: (%rdi:x %rsi:y %rdx:z) imulq %rdx, %rdi # t1 = x*z (%rdi:t1 %rsi:y %rdx:z) leaq (%rsi,%rsi,4), %rsi # t2 = 5*y (%rdi:t1 %rsi:t2 %rdx:z) andq %rsi, %rdi # t3 = t1&t2 (%rdi:t3 %rsi:t2 %rdx:z) notq %rdi # rval = ~t3 (%rdi:rval %rsi:t2 %rdx:z) movq %rdi, %rax # Return rval ret long mystery(long x, long y, long z) { long t1 = __________; long t2 = __________; long t3 = __________; long rval = __________; return rval; } long mystery(long x, long y, long z) { long t1 = x * z; long t2 = 5 * y; long t3 = t1 & t2; long rval = ~t3; return rval; }