Class 07 Procedures What's in a procedure: * Control: Call and return * Data: Pass arguments Create local storage Return result This must be supported at machine level, but it is done in pieces. First, let's look at control. We've already seen examples of simple procedures. Pass arguments in registers, return result in registers. "Leaf" procedure: Has no arguments. /* leaf procedure: no stack storage required */ long prod(long x, long y) { return x*y; } # x in %rdi, y in %rsi, return value in %rax prod: imulq %rsi, %rdi # x *= y movq %rdi, %rax # return value = x ret /* Nonleaf procedure with no local storage */ long mul_2p1(long x, long y) { long u = prod(y, x); return u + 1; } # x in %rdi, y in %rsi # return value in %rax mul_2p1: movq %rsi, %rax # Swap %rdi & %rsi ... movq %rdi, %rsi movq %rax, %rdi call prod # Make call incq %rax # Increment result ret Let's look at call & ret instructions call dest: pushq $rip (of following instruction) jmp dst ret: popq $rip (Then continues one instruction beyond original call. Let's see this in action: 0000000000400560 : 400560: 48 0f af fe imul %rsi,%rdi 400564: 48 89 f8 mov %rdi,%rax 400567: c3 retq 0000000000400570 : 400570: 48 89 f0 mov %rsi,%rax 400573: 48 89 fe mov %rdi,%rsi 400576: 48 89 c7 mov %rax,%rdi 400579: e8 e2 ff ff ff callq 400560 40057e: 48 ff c0 inc %rax 400581: c3 retq unix> gdb asm-proc.64x (gdb) break mul_2p1 Breakpoint 1 at 0x400570 (gdb) run 7 9 Breakpoint 1, 0x0000000000400570 in mul_2p1 () (gdb) print $rdi $1 = 7 (gdb) print $rsi $2 = 9 (gdb) stepi (gdb) stepi (gdb) stepi 0x0000000000400579 in mul_2p1 () ## Here's the call (gdb) print $rsp $3 = (void *) 0x7fffffffe818 ## Note stack pointer (will vary) (gdb) stepi 0x0000000000400560 in prod () ## Now we're in the code for prod (gdb) print $rsp $4 = (void *) 0x7fffffffe810 ## Stack pointer has decreased by 8 (gdb) x/a $rsp 0x7fffffffe810: 0x40057e ## TOS contains return address (gdb) stepi (gdb) stepi 0x0000000000400567 in prod () ## Here's the ret (gdb) stepi 0x000000000040057e in mul_2p1 () ## Now we're back in caller (gdb) print $rsp $5 = (void *) 0x7fffffffe818 ## With incremented stack pointer Important points: * call/ret only provide minimal part of procedure call implementation Transfer of control. * Other code required to set up arguments, do something with result /* Leaf procedure: Stack storage required for array */ long val_loc(long x, long y, int i) { long vals[2] = { x*y, x+y }; i &= 0x1; return vals[i]; } # x in %rdi, y in %rsi, i in %edx # return value in %rax val_loc: movq %rdi, %rax andl $1, %edx addq %rsi, %rdi imulq %rsi, %rax movslq %edx,%rdx movq %rdi, -16(%rsp) # Initialize vals[1] movq %rax, -24(%rsp) # Initialize vals[0] movq -24(%rsp,%rdx,8), %rax # Does array access ret Uses 24 bytes beyond stack pointer (lower addresses) - 8: Unused -16: vals[1] -24: vals[0] Stack pointer unchanged while executing proc. /* Nonleaf with local storage for intermediate results */ long mul_3p1(long x, long y, long z) { long u = prod(x, y); long v = prod(u, z); return v + 1; } # x in %rdi, y in %rsi, z in %rdx # return value in %rax mul_3p1: pushq %rbx movq %rdx, %rbx call prod movq %rbx, %rsi movq %rax, %rdi call prod popq %rbx incq %rax ret During 1st call to prod, need safe place to store z. Possibilites: Keep in register %rdx Would work here, since prod doesn't use this register But that might not work in general. Push onto stack That would work, but we don't do it. Use callee saved register: About 8 registers available for this. Must save state before using them. If everyone does this, then can be sure state valid across function call Here we use %rbx Note that %rbx not touched by prod, so no problem here. /* Same idea, but generates interesting completion */ long mul_3(long x, long y, long z) { long u = prod(x, y); long v = prod(u, z); return v; } # x in %rdi, y in %rsi, z in %rdx # return value in %rax mul_3: pushq %rbx movq %rdx, %rbx call prod movq %rbx, %rsi movq %rax, %rdi popq %rbx # Deallocate all storage jmp prod # Check this out! Look at what this does: prod: imulq %rsi, %rdi # x *= y movq %rdi, %rax # return value = x ret Note that this trick would work even for more exotic versions of prod. State as enter prod exactly what arbitrary call to prod expects: Arguments in registers Return pointer at top of stack /* Leaf with pointer argument */ void store_prod(long x, long y, long *dst) { long u = x * y; *dst = u; } # x in %rdi, y in %rsi, dst in %rdx store_prod: imulq %rsi, %rdi movq %rdi, (%rdx) ret /* Nonleaf with local storage for address generation */ long mul_2s(long x, long y) { long u; store_prod(x, y, &u); return u; } # x in %rdi, y in %rsi # result in %rax mul_2s: subq $8, %rsp # Allocate 8 bytes for u movq %rsp, %rdx # &u as second arg. call store_prod movq (%rsp), %rax # Get u addq $8, %rsp ret Simple example of 8-byte stack frame. Used to hold local u. Must do this since use & operation on u. (Cannot create address for register) /* Local storage for intermediates & address gen. */ long mul_3s(long x, long y, long z) { long u; store_prod(x, y, &u); store_prod(u, z, &u); return u; } # x in %rdi, y in %rsi, z in %rdx # return value in %rax mul_3s: pushq %rbx # Save callee save movq %rdx, %rbx subq $8, %rsp # Allocate 8 bytes for u movq %rsp, %rdx # &u as 2nd arg call store_prod movq (%rsp), %rdi # Read u movq %rsp, %rdx movq %rbx, %rsi call store_prod movq (%rsp), %rax addq $8, %rsp # popq %rbx ret Uses 16 bytes total. Allocates / deallocates in two junks /* Simple Recursion */ long mul_list_r0(long a[], int cnt) { if (cnt <= 0) return 1L; else { long last = a[cnt-1]; long p1 = mul_list_r0(a, cnt-1); return p1*last; } } # a in %rdi, cnt in %esi # return value in %rax mul_list_r0: testl %esi, %esi pushq %rbx movl $1, %eax # Weird way to get return value = 1L jle .L19 movslq %esi,%rax decl %esi # cnt-- movq -8(%rdi,%rax,8), %rbx # Save last (a[cnt-1]) in %rbx call mul_list_r0 # Recursive call imulq %rbx, %rax # Final multiply .L19: popq %rbx ret Let's see this in action 0000000000400690 : 400690: 85 f6 test %esi,%esi 400692: 53 push %rbx 400693: b8 01 00 00 00 mov $0x1,%eax 400698: 7e 13 jle 4006ad 40069a: 48 63 c6 movslq %esi,%rax 40069d: ff ce dec %esi 40069f: 48 8b 5c c7 f8 mov 0xfffffffffffffff8(%rdi,%rax,8),%rbx 4006a4: e8 e7 ff ff ff callq 400690 4006a9: 48 0f af c3 imul %rbx,%rax 4006ad: 5b pop %rbx 4006ae: c3 retq unix> gdb mul_list_r0 (gdb) break mul_list_r0 (gdb) break *0x4006ae (gdb) run 10 20 30 Breakpoint 1, 0x0000000000400690 in mul_list_r0 () (gdb) print $rsi $1 = 3 (gdb) x/3gd $rdi 0x502010: 10 20 0x502020: 30 (gdb) print /x $rsp $2 = 0x7fffffffe818 # This will vary (gdb) x /a $rsp 0x7fffffffe818: 0x400928 # Returns back to main (gdb) print $rbx $3 = 2 # Old value (gdb) cont Breakpoint 1, 0x0000000000400690 in mul_list_r0 () (gdb) print $rsi $4 = 2 (gdb) print /x $rsp $5 = 0x7fffffffe808 # 16 bytes: 8 for saved %rbx, 8 for return pointer (gdb) x /a $rsp 0x7fffffffe808: 0x4006a9 # Returns back to mul_list_r0 (gdb) print $rbx $6 = 30 # a[2] (gdb) cont Breakpoint 1, 0x0000000000400690 in mul_list_r0 () (gdb) print $rsi $7 = 1 (gdb) print /x $rsp $8 = 0x7fffffffe7f8 # 16 bytes: 8 for saved %rbx, 8 for return pointer (gdb) x /a $rsp 0x7fffffffe7f8: 0x4006a9 # Back to mul_list_r0 (gdb) print $rbx $9 = 20 # a[1] (gdb) cont Breakpoint 1, 0x0000000000400690 in mul_list_r0 () (gdb) print $rsi $10 = 0 # Will complete recursion here (gdb) print /x $rsp $11 = 0x7fffffffe7e8 # 16 bytes again (gdb) x /a $rsp 0x7fffffffe7e8: 0x4006a9 (gdb) print $rbx $12 = 10 # a[0] (gdb) cont Breakpoint 2, 0x00000000004006ae in mul_list_r0 () # Return (gdb) print $rax $13 = 1 # First return value (gdb) print $rbx $14 = 10 # %rbx still holds value from before (gdb) stepi 2 0x00000000004006ad in mul_list_r0 () (gdb) print $rax $15 = 10 # Second return value (gdb) stepi Breakpoint 2, 0x00000000004006ae in mul_list_r0 () (gdb) print $rbx $17 = 20 # Restored value (gdb) stepi 3 Breakpoint 2, 0x00000000004006ae in mul_list_r0 () (gdb) print $rax $20 = 200 # Third return value (gdb) print $rbx $21 = 30 # Restored value (gdb) stepi 3 Breakpoint 2, 0x00000000004006ae in mul_list_r0 () (gdb) print $rax $24 = 6000 # Final return value (gdb) print $rbx $25 = 2 (gdb) stepi 0x0000000000400928 in main () # Back to main (gdb) /* More Recursion */ long mul_list_r1(long a[], int cnt) { if (cnt <= 0) return 1L; else if (cnt == 1) /* Same idea, but make third case */ return a[0]; else { long last = a[cnt-1]; long p1 = mul_list_r1(a, cnt-1); return p1*last; } } # a in %rdi, cnt in %esi # return value in %rax mul_list_r1: testl %esi, %esi pushq %rbx movl $1, %eax jle .L23 cmpl $1, %esi je .L29 movslq %esi,%rax decl %esi movq -8(%rdi,%rax,8), %rbx call mul_list_r1 imulq %rbx, %rax .L23: # cnt <= 0 popq %rbx ret .L29: # cnt == 1 popq %rbx movq (%rdi), %rax ret This is a different function (and not very good code) /* Tail Recursion (destructive) */ long mul_list_r2(long a[], int cnt) { if (cnt <= 0) return 1L; else if (cnt == 1) return a[0]; else { a[cnt-2] *= a[cnt-1]; /* Tail recursion: Recursive calls gives function result */ return mul_list_r2(a, cnt-1); } } Compiles without recursion. Creates loop instead # a in %rdi, cnt in %esi # return value in %rax mul_list_r2: .L35: loop: testl %esi, %esi jle .L36 cmpl $1, %esi je .L37 movslq %esi,%rdx ocnt = cnt decl %esi cnt-- movq -8(%rdi,%rdx,8), %rax t = a[ocnt-1] imulq -16(%rdi,%rdx,8), %rax t *= a[ocnt-2] movq %rax, -16(%rdi,%rdx,8) a[ocnt-2] = t jmp .L35 goto loop .L36: movl $1, %eax ret .L37: movq (%rdi), %rax ret General rule: long f(long x) { if (Test) return Expr1 else return f(Expr2) } loop: if (Test) return Expr1 x = Expr2; goto loop; /* Divide & conquer recursion */ long mul_list_r3(long a[], int cnt) { if (cnt <= 0) return 1L; else if (cnt == 1) return a[0]; else { int cd2 = cnt >> 1; long p1 = mul_list_r3(a, cd2); long p2 = mul_list_r3(a+cd2, cnt-cd2); return p1*p2; } } This is a real mess. But the general ideas are the same. Callee save registers used: %rbp, %r13, %rbp, and %r12 (32 bytes) Note that it saves these registers using movq rather than pushq And then it decrements the stack pointer by 32. Weird, but it works. # a in %rdi, cnt in %esi # return value in %rax mul_list_r3: movq %rbp, -24(%rsp) movq %r13, -8(%rsp) movl %esi, %ebp movq %rbx, -32(%rsp) movq %r12, -16(%rsp) subq $32, %rsp testl %esi, %esi movq %rdi, %r13 movl $1, %eax Return value = 1 jle .L38 goto done cmpl $1, %esi je .L44 goto cntone movl %esi, %r12d # Recursive case sarl %r12d movl %r12d, %esi subl %r12d, %ebp call mul_list_r3 movq %rax, %rbx movslq %r12d,%rax movl %ebp, %esi leaq (%r13,%rax,8), %rdi call mul_list_r3 imulq %rbx, %rax .L38: done: movq (%rsp), %rbx movq 8(%rsp), %rbp movq 16(%rsp), %r12 movq 24(%rsp), %r13 addq $32, %rsp ret .L44: cntone: movq (%rdi), %rax read *a movq (%rsp), %rbx movq 8(%rsp), %rbp movq 16(%rsp), %r12 movq 24(%rsp), %r13 addq $32, %rsp ret In class quiz problem In class quiz problem long flip(long x) { if (x ______) >= 0 return ____; x else return flip(_____) * _____; -x, x } # x in %rdi. Return value in %rax flip: testq %rdi, %rdi x? pushq %rbx Save %rbx movq %rdi, %rax return value = x movq %rdi, %rbx Save x js .L60 if (< 0) goto recurse popq %rbx Restore %rbx ret .L60: negq %rdi Arg = -x call flip flip(-x) imulq %rbx, %rax result * x popq %rbx ret