Data Structures A compiler's perspective Every data type T has a length len(T) and an alignment align(T). Some principles: Alignment is a power of 2 on all machines. Ranges between 1 and 16 Also have the property that len(T) must be a multiple of align(T). (This is important for array allocation) Primitive data types len(T) = align(T): char 1 short 2 int/unsigned/float 4 long/pointer/double 8 long double 16 C provides three type constructors of concern here: Arrays Structures Unions Each can be viewed as a way to construct a new type from some other types. Arrays T': T a[N] (N is a nonnegative constant) len(T') = N*length(T) align(T') = align(T) Organize as contiguous allocation a[0] ... a[N] Each object of type T. Access element i at Addr(a) + i*len(T). Example: long a[15] T is long T' is array of 15 long's len: 120 align: 8 access a[i] at Addr(a) + 8*i. Example code: typedef int vec5[5]; int read_vec(vec5 v, int i) { return v[i]; } # v in %rdi, i in %esi, return value in %eax read_vec: movslq %esi,%rsi # Sign extend i movl (%rdi,%rsi,4), %eax # Read int from v+4i ret long a[5][3]; Build up as: typedef long T1[3]; T1 a[5]; T1: length = 3*8 = 24, align = 8 a: length = 5*24 = 120, align = 8 a[0]: (a[0][0] ... a[0][2]) # Each row 24 bytes long a[1]: (a[1][0] ... a[1][2]) .... a[3]: (a[3][0] ... a[3][2]) a[4]: (a[4][0] ... a[4][2]) Element a[r][c] at address A + 8(3r + c) = A + 24r + 8c typedef long array53[5][3]; long *fetch_row(array53 a, int r) { return a[r]; } # a in %rdi, r in %esi, return value in %rax fetch_row: movslq %esi,%rsi # Sign extend r leaq (%rsi,%rsi,2), %rsi # 3r leaq (%rdi,%rsi,8), %rax # Return A+24r ret long fetch_ele_1(array53 a, int r, int c) { return a[r][c]; } # a in %rdi, r in %esi, c in %edx # return value in %rax fetch_ele_1: movslq %esi,%rsi # Sign extend r movslq %edx,%rdx # Sign extend c leaq (%rsi,%rsi,2), %rsi # 3r addq %rdx, %rsi # 3r+c movq (%rdi,%rsi,8), %rax # M[A+8*(24r+c)] ret long fetch_ele_2(array53 a, int r, int c) { long *row = a[r]; return row[c]; } # a in %rdi, r in %esi, c in %edx # return value in %rax fetch_ele_2: movslq %esi,%rsi # Sign extend r movslq %edx,%rdx # Sign extend c leaq (%rsi,%rsi,2), %rsi # 3r leaq (%rdi,%rsi,8), %rsi # A+24r movq (%rsi,%rdx,8), %rax # M[a+24r+8c] ret Array code. Many optimizations are possible long sum_col(array53 a, int c) { int r; long result = 0; for (r = 0; r < 5; r++) result += a[r][c]; return result; } # a in %rdi, c in %esi, return value in %rax sum_col: movslq %esi,%rsi # sign extend c xorl %eax, %eax # result = 0 movl $4, %ecx # i = 4 leaq (%rdi,%rsi,8), %rdx # p = &a[0][c]; (A+8c) .L9: # loop: addq (%rdx), %rax # result += *p addq $24, %rdx # p += 3 (scaled by 8) decl %ecx # i-- jns .L9 # if i >= 0 goto loop rep ; ret # return result Compiler has generated pointer code. Could write this in C as: long sum_col_p(array53 a, int c) { long *p = &a[0][c]; int i; long result = 0; for (i = 4; i >= 0; i--) { result += *p; *p += 3; /* Skip to next row */ } return result; } Structures: typedef struct { T1: f1; ... Tn: fn; } T; T s; Want to lay out component fields so that: 1. Each has enough storage 2. Each satisfies its alignment requirement How do we do this: First, make sure align(T) = max align(Ti). Make sure s at address S that is multiple of align(T). So, starting address of structure satisfies max. alignment requirment Second, within structure, assign each field an offset offset(fi): Offset(f_i) = Roundup(offset(f_i-1) + length(T_i-1), align(T_i)). Property: Offset(f_i) is a multiple of align(T_i). So S+Offset(f_i) is multiple of align(T_i). Finally, we may pad out the overall structure: length(T) = Roundup(Offset(f_n) + length(T_n), align(T)) So, if have array of structures, will be at S, S+length(T), S+2*length(T), ... Each structure will satisfy initial alignment requirement. Examples: struct { int x; char c; int *y; short w; } Field Offset x 0 c 4 y Roundup(4+1, 8) = 8 w 16 length Roundup(16+2, 8) = 24 Contrast with struct { int x; char c; int y[2]; short w; } Field Offset x 0 c 4 y Roundup(4+1, 4) = 8 w 16 length Roundup(16+2, 4) = 20 struct { int *y; int x; short w; char c; } Field Offset y 0 x 8 w 12 c 14 length Roundup(14+1, 8) = 16 (Note that declaration order affects structure length) Structure Code typedef struct ELE { int val; struct ELE *next; } list_ele, *list_ptr; Offsets val = 0 next = 8 Length = 16 int get_val(list_ptr ls) { return ls->val; } # ls in %rdi, return val in %eax get_val: movl (%rdi), %eax # M[S+0] ret list_ptr get_next(list_ptr ls) { return ls->next; } # ls in %rdi, return val in %rax get_next: movq 8(%rdi), %rax # M[S+8] ret list_ptr new_ele(int val, list_ptr next) { list_ptr result = malloc(sizeof(list_ele)); result->val = val; result->next = next; return result; } # val in %edi, next in %rsi, return value in %rax new_ele: movq %rbx, -16(%rsp) # Save registers movq %r12, -8(%rsp) movl %edi, %ebx # Save val subq $24, %rsp movq %rsi, %r12 # Save next movl $16, %edi # sizeof(list_ele) call malloc # malloc movl %ebx, (%rax) # M[R+0] = val movq %r12, 8(%rax) # M[R+8] = next movq 8(%rsp), %rbx # Restore registers movq 16(%rsp), %r12 addq $24, %rsp ret Unions Declaration looks like structure, but rules are very different. typedef union { T1: f1; ... Tn: fn; } T; T u; Share storage between all fields. Require 1. Each has enough storage 2. Each satisfies its alignment requirement align(T) = max align(Ti) length(T) = Roundup (max length(Ti), align(T)) Examples union { int a[3]; double d; } align = 8 length = Roundup(12, 8) = 16 Unions are a way to 1. overload storage with multiple different values 2. Circumvent type system unsigned long double_bits(double d) { union { double d; unsigned long b; } u; u.d = d; return u.b; } Here's our first floating point code. With x86-64, have simple FP architecture based on SSE3. Use registers %xmm0--%xmm15. Each is 16 bytes, but we only use lower 4 (float) or 8 (double.) Pass arguments in %xmm0--%xmm5. Return value in %xmm0. # d in %xmm0, return value in %rax double_bits: movsd %xmm0, -8(%rsp) Copy 8 bytes to stack movq -8(%rsp), %rdx Copy them back to rdx movq %rdx, %rax Copy that to rax (wasteful) ret Main point: you get the same bits in & out. By contrast, if you cast, it uses conversion instruction long double_val(double d) { return (long) d; } double_val: cvttsd2siq %xmm0, %rax Convert with truncation scalar double to integer quad ret In class puzzle typedef struct { int a[3]; union { double d; int *p; } u; int i; } s_ele, *s_ptr; Offsets: a[0] = 0 a[1] = 4 a[2] = 8 u.d = 16 u.p = 16 i = 24 Length = 32 # s in %rdi set_i1: movl 8(%rdi), %eax # s->a[2] movl %eax, 24(%rdi) ret void set_i1(s_ptr s) { s->i = ______; s->a[2] } # s in %rdi set_i2: movlpd 16(%rdi), %xmm0 # s->u.d cvttsd2si %xmm0, %eax # Integer conversion movl %eax, 24(%rdi) ret void set_i2(s_ptr s) { s->i = _______; s->u.d } # s in %rdi set_i3: movq 16(%rdi), %rax # s->u.p movl (%rax), %eax # *s->u.p movl %eax, 24(%rdi) ret void set_i3(s_ptr s) { s->i = _______; *s->u.p } # s in %rdi set_p1: leaq 24(%rdi), %rax &s->i movq %rax, 16(%rdi) ret void set_p1(s_ptr s) { s->u.p = _______; &s->i } # s in %rdi set_p2: movslq 24(%rdi),%rax s->i leaq (%rdi,%rax,4), %rax &s->a[s->i] movq %rax, 16(%rdi) ret void set_p2(s_ptr s) { s->u.p = ______; &s->a[s->i] }