Class 06: Control Constructs at the machine level Reminder: System State: Registers: PC %rip GPR %rax, %rdi, %rsi, %rdx ** Condition Codes: CF: Carry Flag (carry generated) ZF: Zero Flag (result = 0) SF: Sign Flag (resut < 0) OF: Overflow flag (2's complement OVF) Memory Code (text) Global data Stack Heap data Setting & Examining Condition Codes Set by arithmetic + special operations testq & cmpq testq: based on & cmpq: based on - Possible outcomes: eq z equal/zero l Two's complement less g le ge b Below (One's complement less) a Above int has_nonzero_masked(long x, long mask) { return !!(x & mask); } # x in %rdi, y in %rsi, return value in %eax has_nonzero_masked: xorl %eax, %eax # return = 0 testq %rsi, %rdi # x & mask (implicit) setne %al # return val = (result != 0) ret int has_nonzero_omasked(long x, long mask) { return !!(x | mask); } # x in %rdi, y in %rsi, return value in %eax has_nonzero_omasked: orq %rsi, %rdi # x | mask (explicit) setne %al # LSB = (result != 0) movzbl %al, %eax # Zero extend ret int gt(long x, long y) { return x > y; } # x in %rdi, y in %rsi, return value in %eax gt: xorl %eax, %eax # Result = 0 cmpq %rsi, %rdi # Compare x:y (Note reverse order!) setg %al # result = (>) ret All of these involve translating the result of a comparison into a single bit. How about actually doing conditional operations Conditional moves (Introduced w/ PentiumPro. Only available now) cmovT S, D == (if T) D <-- S Where T is one of the standard tests long max(long x, long y) { return (x > y) ? x : y; } # x in %rdi, y in %rsi, return value in %rax max: cmpq %rdi, %rsi y:x cmovl %rdi, %rsi if (<) y = x movq %rsi, %rax return val = y ret Note that conditional moves only work when OK to evaluate both branches. Branching code: More traditional way void smax(long x, long *vp) { long v = *vp; if (v < x) *vp = x; } # x in %rdi, vp in %rsi smax: cmpq %rdi, (%rsi) *vp:x jge .L6 Skip next if >= movq %rdi, (%rsi) *vp = x .L6: rep ; ret Note "rep", since branch target Function with side effect. Can't evaluate true branch when test false. int winx = 0; long maxw(long x, long y) { if (x > y) { winx++; return x; } else return y; } # x in %rdi, y in %rsi, return value in %rax maxw: cmpq %rsi, %rdi x:y jle .L9 if (>=) goto finish) incl winx(%rip) winx++ (Note PC-relative addressing) movq %rdi, %rsi y = x .L9: finish: movq %rsi, %rax return val = y ret Do-while loop template Overall form: do body while (test) Translate to loop: Body t = Test if (t) goto loop done: long fib_dw(int n) { long fprev = 1L; long fcurr = 1L; do { long fnext = fprev+fcurr; fprev = fcurr; fcurr = fnext; n--; } while (n > 2); return fcurr; } # n in %rdi, return value in %rax fib_dw: movl $1, %ecx movl $1, %edx .L13: Loop leaq (%rcx,%rdx), %rax decl %edi movq %rdx, %rcx cmpl $2, %edi t = test movq %rax, %rdx jg .L13 if (t) goto loop rep ; ret Jump into middle loop template while (Test) Body goto middle loop: Body middle: t = Test if (t) goto loop long fib_while(int n) { long fprev = 1L; long fcurr = 1L; while (n > 2) { long fnext = fprev+fcurr; fprev = fcurr; fcurr = fnext; n--; } return fcurr; } # n in %rdi, return value in %rax fib_while: movl $1, %ecx movl $1, %edx jmp .L22 goto middle .L23: loop: leaq (%rcx,%rdx), %rax decl %edi movq %rdx, %rcx movq %rax, %rdx .L22: middle: cmpl $2, %edi t = test jg .L23 goto loop movq %rdx, %rax ret While compiled using do-while template while (Test) Body ==> if (Test) { do Body while (Test) } t = Test if (!t) goto done loop: Body t = Test if (t) goto loop done: long fib_while2(int n) { long fprev = 1L; long fcurr = 1L; int i = 2; while (i < n) { long fnext = fprev+fcurr; fprev = fcurr; fcurr = fnext; i++; } return fcurr; } Weird example. Transformed code into something similar to fib_while # n in %rdi, return value in %rax fib_while2: cmpl $2, %edi n:2 (t = Test) movl $1, %esi movl $1, %ecx jle .L29 if (<=) goto done (!t) leal -2(%rdi), %edx cnt = n-2 .L27: loop: leaq (%rsi,%rcx), %rax decl %edx cnt-- (set CC) movq %rcx, %rsi movq %rax, %rcx jne .L27 if !=0 goto loop .L29: done: movq %rcx, %rax ret For loop long fib_for(long n) { long fprev = 1L; long fcurr = 1L; int i; for (i = 2; i < n; i++) { long fnext = fprev + fcurr; fprev = fcurr; fcurr = fnext; } return fcurr; } for (Init; Test; Update) Body Init while (Test) { Body; Update: } # n in %rdi, return value in %rax fib_for: cmpq $2, %rdi t = Test movl $1, %esi movl $1, %ecx movl $2, %edx jle .L36 if (t) goto done .L34: loop: leaq (%rsi,%rcx), %rax Body incl %edx Update movq %rcx, %rsi Body movq %rax, %rcx Body movslq %edx,%rax Body cmpq %rdi, %rax t = Test jl .L34 if (t) goto loop .L36: done movq %rcx, %rax ret Switch statement example Very different style. Multiple compilation techniques. Most interesting is "jump table". O(1) complexity long switch_eg(long x, long y, long z) { long w = 1; switch(x) { case 1: No case 0 w = y*z; break; case 2: w = y/z; /* Fall Through */ case 3: w += z; break; No case 4 case 5: Merge cases 5 & 6 case 6: w -= z; break; default: Cases: <0, 0, 4, >6 w = 2; } return w; } Switch Code # x in %rdi, y in %rsi, z in %rdx # return value in %rax switch_eg: cmpq $6, %rdi movq %rdx, %rcx movl $1, %r8d ja .L44 if (unsigned x > 6) goto default jmp *.L45(,%rdi,8) goto tab[x] .L44: default code movl $2, %r8d movq %r8, %rax ret .L40: case 2 code movq %rsi, %rax cqto idivq %rcx movq %rax, %r8 Fall through .L41: case 3 code addq %rcx, %r8 movq %r8, %rax ret .L43: Case 5/6 code subq %rdx, %r8 movq %r8, %rax ret .L39: Case 1 code movq %rsi, %r8 imulq %rdx, %r8 movq %r8, %rax ret .section .rodata .L45: Jump Table .quad .L44 0: default .quad .L39 1: case 1 code .quad .L40 2: case 2 code .quad .L41 3: case 3 code .quad .L44 4: default .quad .L43 5: case 5/6 .quad .L43 6: case 5/6 .text Binary view of switch code (gdb) disassemble switch_eg Dump of assembler code for function switch_eg: 0x0400680 <+0>: cmp $0x6,%rdi 0x0400684 <+4>: mov %rdx,%rcx 0x0400687 <+7>: mov $0x1,%r8d 0x040068d <+13>: ja 0x400696 <+22> 0x040068f <+15>: jmpq *0x400910(,%rdi,8) (table at 0x400910) 0x0400696 <+22>: mov $0x2,%r8d default 0x040069c <+28>: mov %r8,%rax 0x040069f <+31>: retq 0x04006a0 <+32>: mov %rsi,%rax case 2 code 0x04006a3 <+35>: cqto 0x04006a5 <+37>: idiv %rcx 0x04006a8 <+40>: mov %rax,%r8 fall through 0x04006ab <+43>: add %rcx,%r8 case 3 code 0x04006ae <+46>: mov %r8,%rax 0x04006b1 <+49>: retq 0x04006b2 <+50>: sub %rdx,%r8 case 5/6 code 0x04006b5 <+53>: mov %r8,%rax 0x04006b8 <+56>: retq 0x04006b9 <+57>: mov %rsi,%r8 case 1 code 0x04006bc <+60>: imul %rdx,%r8 0x04006c0 <+64>: mov %r8,%rax 0x04006c3 <+67>: retq Implementation of jump table (gdb)x/7a 0x400910 0x400910: 0x400696 <+22> 0x4006b9 <+57> 0x400920: 0x4006a0 <+32> 0x4006ab <+43> 0x400930: 0x400696 <+22> 0x4006b2 <+50> 0x400940: 0x4006b2 <+50> Hard to read due to byte ordering issues unix>objdump asm-cntl.64x -s --section=.rodata Contents of section .rodata: 400908 01000200 00000000 96064000 00000000 ..........@..... 400918 b9064000 00000000 a0064000 00000000 ..@.......@..... 400928 ab064000 00000000 96064000 00000000 ..@.......@..... 400938 b2064000 00000000 b2064000 00000000 ..@.......@..... Alternate compilation of switch Jump table would require 501 entries. Not a good idea long sparse_switch(long x, long y, long z) { long w = 1; switch(x) { case 100: w = y*z; break; case 200: w = y/z; /* Fall Through */ case 300: w += z; break; case 500: case 600: w -= z; default: w = 2; } return w; } # x in %rdi, y in %rsi, z in %rdx # return value in %rax sparse_switch: cmpq $300, %rdi x:300 movq %rdx, %rcx movl $1, %r8d je .L50 = goto E300 jg .L54 > goto G300 cmpq $100, %rdi < x:100 je .L48 = goto E100 cmpq $200, %rdi != x:200 je .L49 = goto E200 .L53: default movl $2, %r8d movq %r8, %rax ret .L54: G300: cmpq $500, %rdi x:500 je .L52 = goto E500 cmpq $600, %rdi x:600 jne .L53 != goto default .L52: E500: (=600) case 500/600 code subq %rcx, %r8 movq %r8, %rax ret .L49: E200 movq %rsi, %rax cqto idivq %rcx movq %rax, %r8 Fall Through .L50: E300 addq %rcx, %r8 movq %r8, %rax ret .L48: E100 movq %rsi, %r8 imulq %rdx, %r8 movq %r8, %rax ret In-class puzzle long puzzle_loop(long x, long y, long z) { long t1 = ____; x+y long t2 = ____; x*y long i; for (i = ____; 456 i > ____; z i += _____) { t1 long temp = t1; t1 = t2; t2 = temp; } return t1; } # x in %rdi, y in %rsi, z in %rdx # return value in %rax # t1 in rcx # t2 in rdi # i in rsi (don't need for y) puzzle_loop: leaq (%rdi,%rsi), %rcx t1 = x+y imulq %rsi, %rdi t2 = x*y movl $456, %esi i = 456 (zero extended) jmp .L62 goto middle .L63: loop: movq %rcx, %rax movq %rdi, %rcx addq %rcx, %rsi i += t1 movq %rax, %rdi .L62: middle cmpq %rdx, %rsi i:z jg .L63 if > goto loop movq %rcx, %rax return value = t1 ret