register_combiners: movl $combine1, %esi subq $8, %rsp movl $combine1_descr, %edx movq %rsi, %rdi call add_combiner movl $combine2_descr, %edx movl $combine1, %esi movl $combine2, %edi call add_combiner movl $combine3_descr, %edx movl $combine1, %esi movl $combine3, %edi call add_combiner movl $combine4_descr, %edx movl $combine1, %esi movl $combine4, %edi call add_combiner movl $combine4p_descr, %edx movl $combine1, %esi movl $combine4p, %edi call add_combiner movl $unroll2a_descr, %edx movl $combine1, %esi movl $unroll2a_combine, %edi call add_combiner movl $combine5p_descr, %edx movl $combine1, %esi movl $combine5p, %edi call add_combiner movl $unroll3aw_descr, %edx movl $combine1, %esi movl $unroll3aw_combine, %edi call add_combiner movl $unroll4a_descr, %edx movl $combine1, %esi movl $unroll4a_combine, %edi call add_combiner movl $unroll8a_descr, %edx movl $combine1, %esi movl $unroll8a_combine, %edi call add_combiner movl $unroll16a_descr, %edx movl $combine1, %esi movl $unroll16a_combine, %edi call add_combiner movl $unroll2_descr, %edx movl $combine1, %esi movl $unroll2_combine, %edi call add_combiner movl $unroll3_descr, %edx movl $combine1, %esi movl $unroll3_combine, %edi call add_combiner movl $unroll4_descr, %edx movl $combine1, %esi movl $unroll4_combine, %edi call add_combiner movl $unroll8_descr, %edx movl $combine1, %esi movl $unroll8_combine, %edi call add_combiner movl $unroll16_descr, %edx movl $combine1, %esi movl $unroll16_combine, %edi call add_combiner movl $combine6_descr, %edx movl $combine1, %esi movl $combine6, %edi call add_combiner movl $unroll4x2a_descr, %edx movl $combine1, %esi movl $unroll4x2a_combine, %edi call add_combiner movl $unroll8x2a_descr, %edx movl $combine1, %esi movl $unroll8x2a_combine, %edi call add_combiner movl $unroll3x3a_descr, %edx movl $combine1, %esi movl $unroll3x3a_combine, %edi call add_combiner movl $unroll4x4a_descr, %edx movl $combine1, %esi movl $unroll4x4a_combine, %edi call add_combiner movl $unroll8x4a_descr, %edx movl $combine1, %esi movl $unroll8x4a_combine, %edi call add_combiner movl $unroll6x6a_descr, %edx movl $combine1, %esi movl $unroll6x6a_combine, %edi call add_combiner movl $unroll8x8a_descr, %edx movl $combine1, %esi movl $unroll8x8a_combine, %edi call add_combiner movl $unroll10x10a_descr, %edx movl $combine1, %esi movl $unroll10x10a_combine, %edi call add_combiner movl $unroll12x6a_descr, %edx movl $combine1, %esi movl $unroll12x6a_combine, %edi call add_combiner movl $unroll12x12a_descr, %edx movl $combine1, %esi movl $unroll12x12a_combine, %edi call add_combiner movl $unroll8x2_descr, %edx movl $combine1, %esi movl $unroll8x2_combine, %edi call add_combiner movl $unroll8x4_descr, %edx movl $combine1, %esi movl $unroll8x4_combine, %edi call add_combiner movl $unroll8x8_descr, %edx movl $combine1, %esi movl $unroll8x8_combine, %edi call add_combiner movl $unroll9x3_descr, %edx movl $combine1, %esi movl $unroll9x3_combine, %edi call add_combiner movl $unrollx2as_descr, %edx movl $combine1, %esi movl $unrollx2as_combine, %edi call add_combiner movl $unroll2aa_descr, %edx movl $combine1, %esi movl $unroll2aa_combine, %edi call add_combiner movl $unroll3aa_descr, %edx movl $combine1, %esi movl $unroll3aa_combine, %edi call add_combiner movl $unroll4aa_descr, %edx movl $combine1, %esi movl $unroll4aa_combine, %edi call add_combiner movl $unroll6aa_descr, %edx movl $combine1, %esi movl $unroll6aa_combine, %edi call add_combiner movl $unroll8aa_descr, %edx movl $combine1, %esi movl $unroll8aa_combine, %edi call add_combiner movl $unrollv1_descr, %edx movl $combine1, %esi movl $unrollv1_combine, %edi call add_combiner movl $unrollv2_descr, %edx movl $combine1, %esi movl $unrollv2_combine, %edi call add_combiner movl $unrollv4_descr, %edx movl $combine1, %esi movl $unrollv4_combine, %edi call add_combiner movl $unrollv8_descr, %edx movl $combine1, %esi movl $unrollv8_combine, %edi call add_combiner movl $unrollv12_descr, %edx movl $combine1, %esi movl $unrollv12_combine, %edi call add_combiner movl $unrollv2a_descr, %edx movl $combine1, %esi movl $unrollv2a_combine, %edi call add_combiner movl $unrollv4a_descr, %edx movl $combine1, %esi movl $unrollv4a_combine, %edi call add_combiner movl $unrollv8a_descr, %edx movl $combine1, %esi movl $unrollv8a_combine, %edi addq $8, %rsp jmp add_combiner unrollv8a_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $16, %rsp call get_vec_start movq %rbx, %rdi movq %rax, %rbp call vec_length movl $0, (%rsp) movl $0, 4(%rsp) cmpl $31, %eax movl $0, 8(%rsp) movl $0, 12(%rsp) movdqa (%rsp), %xmm3 jle .L4 .L11: movdqa 16(%rbp), %xmm2 subl $32, %eax movdqa 48(%rbp), %xmm0 paddd (%rbp), %xmm2 paddd 32(%rbp), %xmm0 movdqa 80(%rbp), %xmm1 paddd %xmm0, %xmm2 movdqa 112(%rbp), %xmm0 paddd 64(%rbp), %xmm1 paddd 96(%rbp), %xmm0 subq $-128, %rbp cmpl $31, %eax paddd %xmm0, %xmm1 paddd %xmm1, %xmm2 paddd %xmm2, %xmm3 jg .L11 .L4: xorl %edx, %edx testl %eax, %eax je .L8 .L9: addl (%rbp), %edx addq $4, %rbp subl $1, %eax jne .L9 .L8: movdqa %xmm3, (%rsp) movl (%rsp), %eax addl 4(%rsp), %eax addl 8(%rsp), %eax addl %edx, %eax addl 12(%rsp), %eax movl %eax, (%r12) addq $16, %rsp popq %rbx popq %rbp popq %r12 ret unrollv4a_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $16, %rsp call get_vec_start movq %rbx, %rdi movq %rax, %rbp call vec_length movl $0, (%rsp) movl $0, 4(%rsp) cmpl $15, %eax movl $0, 8(%rsp) movl $0, 12(%rsp) movdqa (%rsp), %xmm2 jle .L17 .L24: movdqa 16(%rbp), %xmm1 subl $16, %eax movdqa 48(%rbp), %xmm0 paddd (%rbp), %xmm1 paddd 32(%rbp), %xmm0 addq $64, %rbp cmpl $15, %eax paddd %xmm0, %xmm1 paddd %xmm1, %xmm2 jg .L24 .L17: xorl %edx, %edx testl %eax, %eax je .L21 .L22: addl (%rbp), %edx addq $4, %rbp subl $1, %eax jne .L22 .L21: movdqa %xmm2, (%rsp) movl (%rsp), %eax addl 4(%rsp), %eax addl 8(%rsp), %eax addl %edx, %eax addl 12(%rsp), %eax movl %eax, (%r12) addq $16, %rsp popq %rbx popq %rbp popq %r12 ret unrollv2a_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $16, %rsp call get_vec_start movq %rbx, %rdi movq %rax, %rbp call vec_length movl $0, (%rsp) movl $0, 4(%rsp) cmpl $7, %eax movl $0, 8(%rsp) movl $0, 12(%rsp) movdqa (%rsp), %xmm1 jle .L29 .L36: movdqa 16(%rbp), %xmm0 subl $8, %eax paddd (%rbp), %xmm0 addq $32, %rbp cmpl $7, %eax paddd %xmm0, %xmm1 jg .L36 .L29: xorl %edx, %edx testl %eax, %eax je .L33 .L34: addl (%rbp), %edx addq $4, %rbp subl $1, %eax jne .L34 .L33: movdqa %xmm1, (%rsp) movl (%rsp), %eax addl 4(%rsp), %eax addl 8(%rsp), %eax addl %edx, %eax addl 12(%rsp), %eax movl %eax, (%r12) addq $16, %rsp popq %rbx popq %rbp popq %r12 ret unrollv12_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $16, %rsp call get_vec_start movq %rbx, %rdi movq %rax, %rbp call vec_length movl $0, (%rsp) movl $0, 4(%rsp) cmpl $47, %eax movl $0, 8(%rsp) movl $0, 12(%rsp) movdqa (%rsp), %xmm0 jle .L53 movdqa %xmm0, %xmm11 movdqa %xmm0, %xmm5 movdqa %xmm0, %xmm10 movdqa %xmm0, %xmm4 movdqa %xmm0, %xmm9 movdqa %xmm0, %xmm3 movdqa %xmm0, %xmm8 movdqa %xmm0, %xmm2 movdqa %xmm0, %xmm7 movdqa %xmm0, %xmm1 movdqa %xmm0, %xmm6 .L44: subl $48, %eax paddd (%rbp), %xmm0 paddd 16(%rbp), %xmm11 paddd 32(%rbp), %xmm5 paddd 48(%rbp), %xmm10 paddd 64(%rbp), %xmm4 paddd 80(%rbp), %xmm9 paddd 96(%rbp), %xmm3 paddd 112(%rbp), %xmm8 paddd 128(%rbp), %xmm2 paddd 144(%rbp), %xmm7 paddd 160(%rbp), %xmm1 paddd 176(%rbp), %xmm6 addq $192, %rbp cmpl $47, %eax jg .L44 .L43: xorl %edx, %edx testl %eax, %eax je .L47 .L48: addl (%rbp), %edx addq $4, %rbp subl $1, %eax jne .L48 .L47: paddd %xmm11, %xmm0 paddd %xmm10, %xmm5 paddd %xmm9, %xmm4 paddd %xmm5, %xmm0 paddd %xmm8, %xmm3 paddd %xmm4, %xmm0 paddd %xmm7, %xmm2 paddd %xmm3, %xmm0 paddd %xmm6, %xmm1 paddd %xmm2, %xmm0 paddd %xmm1, %xmm0 movdqa %xmm0, (%rsp) movl (%rsp), %eax addl 4(%rsp), %eax addl 8(%rsp), %eax addl %edx, %eax addl 12(%rsp), %eax movl %eax, (%r12) addq $16, %rsp popq %rbx popq %rbp popq %r12 ret .L53: movdqa %xmm0, %xmm11 movdqa %xmm0, %xmm5 movdqa %xmm0, %xmm10 movdqa %xmm0, %xmm4 movdqa %xmm0, %xmm9 movdqa %xmm0, %xmm3 movdqa %xmm0, %xmm8 movdqa %xmm0, %xmm2 movdqa %xmm0, %xmm7 movdqa %xmm0, %xmm1 movdqa %xmm0, %xmm6 jmp .L43 unrollv8_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $16, %rsp call get_vec_start movq %rbx, %rdi movq %rax, %rbp call vec_length movl $0, (%rsp) movl $0, 4(%rsp) cmpl $31, %eax movl $0, 8(%rsp) movl $0, 12(%rsp) movdqa (%rsp), %xmm0 jle .L67 movdqa %xmm0, %xmm7 movdqa %xmm0, %xmm3 movdqa %xmm0, %xmm6 movdqa %xmm0, %xmm2 movdqa %xmm0, %xmm5 movdqa %xmm0, %xmm1 movdqa %xmm0, %xmm4 .L58: subl $32, %eax paddd (%rbp), %xmm0 paddd 16(%rbp), %xmm7 paddd 32(%rbp), %xmm3 paddd 48(%rbp), %xmm6 paddd 64(%rbp), %xmm2 paddd 80(%rbp), %xmm5 paddd 96(%rbp), %xmm1 paddd 112(%rbp), %xmm4 subq $-128, %rbp cmpl $31, %eax jg .L58 .L57: xorl %edx, %edx testl %eax, %eax je .L61 .L62: addl (%rbp), %edx addq $4, %rbp subl $1, %eax jne .L62 .L61: paddd %xmm7, %xmm0 paddd %xmm6, %xmm3 paddd %xmm5, %xmm2 paddd %xmm3, %xmm0 paddd %xmm4, %xmm1 paddd %xmm2, %xmm0 paddd %xmm1, %xmm0 movdqa %xmm0, (%rsp) movl (%rsp), %eax addl 4(%rsp), %eax addl 8(%rsp), %eax addl %edx, %eax addl 12(%rsp), %eax movl %eax, (%r12) addq $16, %rsp popq %rbx popq %rbp popq %r12 ret .L67: movdqa %xmm0, %xmm7 movdqa %xmm0, %xmm3 movdqa %xmm0, %xmm6 movdqa %xmm0, %xmm2 movdqa %xmm0, %xmm5 movdqa %xmm0, %xmm1 movdqa %xmm0, %xmm4 jmp .L57 unrollv4_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $16, %rsp call get_vec_start movq %rbx, %rdi movq %rax, %rbp call vec_length movl $0, (%rsp) movl $0, 4(%rsp) cmpl $15, %eax movl $0, 8(%rsp) movl $0, 12(%rsp) movdqa (%rsp), %xmm1 jle .L81 movdqa %xmm1, %xmm3 movdqa %xmm1, %xmm0 movdqa %xmm1, %xmm2 .L72: subl $16, %eax paddd (%rbp), %xmm1 paddd 16(%rbp), %xmm3 paddd 32(%rbp), %xmm0 paddd 48(%rbp), %xmm2 addq $64, %rbp cmpl $15, %eax jg .L72 .L71: xorl %edx, %edx testl %eax, %eax je .L75 .L76: addl (%rbp), %edx addq $4, %rbp subl $1, %eax jne .L76 .L75: paddd %xmm3, %xmm1 paddd %xmm2, %xmm0 paddd %xmm0, %xmm1 movdqa %xmm1, (%rsp) movl (%rsp), %eax addl 4(%rsp), %eax addl 8(%rsp), %eax addl %edx, %eax addl 12(%rsp), %eax movl %eax, (%r12) addq $16, %rsp popq %rbx popq %rbp popq %r12 ret .L81: movdqa %xmm1, %xmm3 movdqa %xmm1, %xmm0 movdqa %xmm1, %xmm2 jmp .L71 unrollv2_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $16, %rsp call get_vec_start movq %rbx, %rdi movq %rax, %rbp call vec_length movl $0, (%rsp) movl $0, 4(%rsp) cmpl $7, %eax movl $0, 8(%rsp) movl $0, 12(%rsp) movdqa (%rsp), %xmm0 movdqa %xmm0, %xmm1 jle .L85 .L86: subl $8, %eax paddd (%rbp), %xmm0 paddd 16(%rbp), %xmm1 addq $32, %rbp cmpl $7, %eax jg .L86 .L85: xorl %edx, %edx testl %eax, %eax je .L89 .L90: addl (%rbp), %edx addq $4, %rbp subl $1, %eax jne .L90 .L89: paddd %xmm1, %xmm0 movdqa %xmm0, (%rsp) movl (%rsp), %eax addl 4(%rsp), %eax addl 8(%rsp), %eax addl %edx, %eax addl 12(%rsp), %eax movl %eax, (%r12) addq $16, %rsp popq %rbx popq %rbp popq %r12 ret unrollv1_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $16, %rsp call get_vec_start movq %rbx, %rdi movq %rax, %rbp call vec_length movl $0, (%rsp) movl $0, 4(%rsp) cmpl $3, %eax movl $0, 8(%rsp) movl $0, 12(%rsp) movdqa (%rsp), %xmm0 jle .L96 .L103: subl $4, %eax paddd (%rbp), %xmm0 addq $16, %rbp cmpl $3, %eax jg .L103 .L96: xorl %edx, %edx testl %eax, %eax je .L100 .L101: addl (%rbp), %edx addq $4, %rbp subl $1, %eax jne .L101 .L100: movdqa %xmm0, (%rsp) movl (%rsp), %eax addl 4(%rsp), %eax addl 8(%rsp), %eax addl %edx, %eax addl 12(%rsp), %eax movl %eax, (%r12) addq $16, %rsp popq %rbx popq %rbp popq %r12 ret unroll8aa_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -7(%r12), %ebp call get_vec_start xorl %esi, %esi xorl %ecx, %ecx testl %ebp, %ebp movq %rax, %rdi jle .L110 movq %rax, %rdx .L111: movl (%rdx), %eax addl 4(%rdx), %eax addl $8, %ecx addl 8(%rdx), %eax addl 12(%rdx), %eax addl 16(%rdx), %eax addl 20(%rdx), %eax addl 24(%rdx), %eax addl 28(%rdx), %eax addq $32, %rdx addl %eax, %esi cmpl %ecx, %ebp jg .L111 .L110: cmpl %ecx, %r12d jle .L112 movslq %ecx,%rax subl %ecx, %r12d xorl %edx, %edx leaq (%rdi,%rax,4), %rax movl %r12d, %ecx .L114: addl $1, %edx addl (%rax), %esi addq $4, %rax cmpl %ecx, %edx jne .L114 .L112: movl %esi, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll6aa_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -5(%r12), %ebp call get_vec_start xorl %esi, %esi xorl %ecx, %ecx testl %ebp, %ebp movq %rax, %rdi jle .L121 movq %rax, %rdx .L122: movl (%rdx), %eax addl 4(%rdx), %eax addl $6, %ecx addl 8(%rdx), %eax addl 12(%rdx), %eax addl 16(%rdx), %eax addl 20(%rdx), %eax addq $24, %rdx addl %eax, %esi cmpl %ecx, %ebp jg .L122 .L121: cmpl %ecx, %r12d jle .L123 movslq %ecx,%rax subl %ecx, %r12d xorl %edx, %edx leaq (%rdi,%rax,4), %rax movl %r12d, %ecx .L125: addl $1, %edx addl (%rax), %esi addq $4, %rax cmpl %ecx, %edx jne .L125 .L123: movl %esi, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll4aa_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -3(%r12), %ebp call get_vec_start xorl %esi, %esi xorl %ecx, %ecx testl %ebp, %ebp movq %rax, %rdi jle .L132 movq %rax, %rdx .L133: movl (%rdx), %eax addl 4(%rdx), %eax addl $4, %ecx addl 8(%rdx), %eax addl 12(%rdx), %eax addq $16, %rdx addl %eax, %esi cmpl %ecx, %ebp jg .L133 .L132: cmpl %ecx, %r12d jle .L134 movslq %ecx,%rax subl %ecx, %r12d xorl %edx, %edx leaq (%rdi,%rax,4), %rax movl %r12d, %ecx .L136: addl $1, %edx addl (%rax), %esi addq $4, %rax cmpl %ecx, %edx jne .L136 .L134: movl %esi, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll3aa_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx call vec_length movq %rbx, %rdi movl %eax, %ebp call get_vec_start movq %rax, %r8 leal -2(%rbp), %eax xorl %edi, %edi xorl %esi, %esi testl %eax, %eax jle .L143 leal -3(%rbp), %edx movl $-1431655765, %eax movq %r8, %rcx mull %edx shrl %edx leal 3(%rdx,%rdx,2), %edx .L144: movl (%rcx), %eax addl 4(%rcx), %eax addl $3, %esi addl 8(%rcx), %eax addq $12, %rcx addl %eax, %edi cmpl %edx, %esi jne .L144 .L143: cmpl %esi, %ebp jle .L145 movslq %esi,%rax movl %ebp, %edx xorl %ecx, %ecx leaq (%r8,%rax,4), %rax subl %esi, %edx .L147: addl $1, %ecx addl (%rax), %edi addq $4, %rax cmpl %edx, %ecx jne .L147 .L145: popq %rbx popq %rbp movl %edi, (%r12) popq %r12 ret unroll2aa_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -1(%r12), %ebp call get_vec_start xorl %esi, %esi xorl %ecx, %ecx testl %ebp, %ebp movq %rax, %rdi jle .L154 movq %rax, %rdx .L155: movl (%rdx), %eax addl 4(%rdx), %eax addl $2, %ecx addq $8, %rdx addl %eax, %esi cmpl %ecx, %ebp jg .L155 .L154: cmpl %ecx, %r12d jle .L156 movslq %ecx,%rax subl %ecx, %r12d xorl %edx, %edx leaq (%rdi,%rax,4), %rax movl %r12d, %ecx .L158: addl $1, %edx addl (%rax), %esi addq $4, %rax cmpl %ecx, %edx jne .L158 .L156: movl %esi, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll8x8_combine: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx call vec_length movl %eax, %ebx movq %r12, %rdi movslq %ebx,%rbx call get_vec_start leaq -28(%rax,%rbx,4), %rdx cmpq %rdx, %rax jae .L174 xorl %ecx, %ecx xorl %ebx, %ebx xorl %r11d, %r11d xorl %r10d, %r10d xorl %r9d, %r9d xorl %r8d, %r8d xorl %edi, %edi xorl %esi, %esi .L166: addl (%rax), %ecx addl 4(%rax), %ebx addl 8(%rax), %r11d addl 12(%rax), %r10d addl 16(%rax), %r9d addl 20(%rax), %r8d addl 24(%rax), %edi addl 28(%rax), %esi addq $32, %rax cmpq %rax, %rdx ja .L166 .L165: addq $28, %rdx cmpq %rax, %rdx jbe .L167 .L170: addl (%rax), %ecx addq $4, %rax cmpq %rax, %rdx ja .L170 .L167: leal (%r11,%rbx), %eax addl %r10d, %eax addl %r9d, %eax addl %r8d, %eax addl %edi, %eax addl %esi, %eax addl %ecx, %eax movl %eax, (%rbp) popq %rbx popq %rbp popq %r12 ret .L174: xorl %ecx, %ecx xorl %ebx, %ebx xorl %r11d, %r11d xorl %r10d, %r10d xorl %r9d, %r9d xorl %r8d, %r8d xorl %edi, %edi xorl %esi, %esi jmp .L165 unroll8x4_combine: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx call vec_length movl %eax, %ebx movq %r12, %rdi movslq %ebx,%rbx call get_vec_start leaq -28(%rax,%rbx,4), %rcx movq %rax, %rdx cmpq %rcx, %rax jae .L187 xorl %esi, %esi xorl %r9d, %r9d xorl %r8d, %r8d xorl %edi, %edi .L179: movl (%rdx), %eax addl 16(%rdx), %eax addl %eax, %esi movl 4(%rdx), %eax addl 20(%rdx), %eax addl %eax, %r9d movl 8(%rdx), %eax addl 24(%rdx), %eax addl %eax, %r8d movl 12(%rdx), %eax addl 28(%rdx), %eax addq $32, %rdx addl %eax, %edi cmpq %rdx, %rcx ja .L179 .L178: leaq 28(%rcx), %rax cmpq %rdx, %rax jbe .L180 .L183: addl (%rdx), %esi addq $4, %rdx cmpq %rdx, %rax ja .L183 .L180: leal (%r8,%r9), %eax addl %edi, %eax addl %esi, %eax movl %eax, (%rbp) popq %rbx popq %rbp popq %r12 ret .L187: xorl %esi, %esi xorl %r9d, %r9d xorl %r8d, %r8d xorl %edi, %edi jmp .L178 unroll9x3_combine: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx call vec_length movl %eax, %ebx movq %r12, %rdi movslq %ebx,%rbx call get_vec_start leaq -32(%rax,%rbx,4), %rcx movq %rax, %rdx cmpq %rcx, %rax jae .L200 xorl %esi, %esi xorl %r8d, %r8d xorl %edi, %edi .L192: movl (%rdx), %eax addl 12(%rdx), %eax addl 24(%rdx), %eax addl %eax, %esi movl 4(%rdx), %eax addl 16(%rdx), %eax addl 28(%rdx), %eax addl %eax, %r8d movl 8(%rdx), %eax addl 20(%rdx), %eax addl 32(%rdx), %eax addq $36, %rdx addl %eax, %edi cmpq %rdx, %rcx ja .L192 .L191: leaq 32(%rcx), %rax cmpq %rdx, %rax jbe .L193 .L196: addl (%rdx), %esi addq $4, %rdx cmpq %rdx, %rax ja .L196 .L193: leal (%rdi,%r8), %eax addl %esi, %eax movl %eax, (%rbp) popq %rbx popq %rbp popq %r12 ret .L200: xorl %esi, %esi xorl %r8d, %r8d xorl %edi, %edi jmp .L191 unroll8x2_combine: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx call vec_length movl %eax, %ebx movq %r12, %rdi movslq %ebx,%rbx call get_vec_start leaq -28(%rax,%rbx,4), %rcx xorl %esi, %esi xorl %edi, %edi movq %rax, %rdx cmpq %rcx, %rax jae .L204 .L205: movl (%rdx), %eax addl 8(%rdx), %eax addl 16(%rdx), %eax addl 24(%rdx), %eax addl %eax, %esi movl 4(%rdx), %eax addl 12(%rdx), %eax addl 20(%rdx), %eax addl 28(%rdx), %eax addq $32, %rdx addl %eax, %edi cmpq %rdx, %rcx ja .L205 .L204: leaq 28(%rcx), %rax cmpq %rdx, %rax jbe .L206 .L209: addl (%rdx), %esi addq $4, %rdx cmpq %rdx, %rax ja .L209 .L206: leal (%rsi,%rdi), %eax movl %eax, (%rbp) popq %rbx popq %rbp popq %r12 ret unroll4x2as_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d shrl $31, %eax movq %rbx, %rdi leal (%rax,%r12), %ebp call get_vec_start sarl %ebp movq %rax, %rsi xorl %r8d, %r8d movslq %ebp,%rax xorl %edi, %edi testl %ebp, %ebp leaq (%rsi,%rax,4), %rcx jle .L216 xorl %edx, %edx xorl %eax, %eax .L217: addl $1, %edx addl (%rsi,%rax,4), %r8d addl (%rcx,%rax,4), %edi addq $1, %rax cmpl %ebp, %edx jne .L217 .L216: leal (%rbp,%rbp), %ecx cmpl %ecx, %r12d jle .L218 movslq %ecx,%rax subl %ecx, %r12d xorl %edx, %edx leaq (%rsi,%rax,4), %rax movl %r12d, %ecx .L220: addl $1, %edx addl (%rax), %edi addq $4, %rax cmpl %ecx, %edx jne .L220 .L218: leal (%rdi,%r8), %eax movl %eax, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unrollx2as_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d shrl $31, %eax movq %rbx, %rdi leal (%rax,%r12), %ebp call get_vec_start sarl %ebp movq %rax, %rsi xorl %r8d, %r8d movslq %ebp,%rax xorl %edi, %edi testl %ebp, %ebp leaq (%rsi,%rax,4), %rcx jle .L227 xorl %edx, %edx xorl %eax, %eax .L228: addl $1, %edx addl (%rsi,%rax,4), %r8d addl (%rcx,%rax,4), %edi addq $1, %rax cmpl %ebp, %edx jne .L228 .L227: leal (%rbp,%rbp), %ecx cmpl %ecx, %r12d jle .L229 movslq %ecx,%rax subl %ecx, %r12d xorl %edx, %edx leaq (%rsi,%rax,4), %rax movl %r12d, %ecx .L231: addl $1, %edx addl (%rax), %edi addq $4, %rax cmpl %ecx, %edx jne .L231 .L229: leal (%rdi,%r8), %eax movl %eax, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll10x10a_combine: pushq %r15 pushq %r14 pushq %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp movq %rsi, (%rsp) call vec_length movl %eax, %r15d movq %rbx, %rdi leal -9(%r15), %ebp call get_vec_start testl %ebp, %ebp movq %rax, %rdx jle .L246 xorl %edi, %edi xorl %r14d, %r14d xorl %r13d, %r13d xorl %r12d, %r12d xorl %ebx, %ebx xorl %r11d, %r11d xorl %r10d, %r10d xorl %r9d, %r9d xorl %r8d, %r8d xorl %esi, %esi xorl %ecx, %ecx .L239: addl $10, %ecx addl (%rax), %edi addl 4(%rax), %r14d addl 8(%rax), %r13d addl 12(%rax), %r12d addl 16(%rax), %ebx addl 20(%rax), %r11d addl 24(%rax), %r10d addl 28(%rax), %r9d addl 32(%rax), %r8d addl 36(%rax), %esi addq $40, %rax cmpl %ecx, %ebp jg .L239 .L238: cmpl %ecx, %r15d jle .L240 movslq %ecx,%rax subl %ecx, %r15d leaq (%rdx,%rax,4), %rax movl %r15d, %ecx xorl %edx, %edx .L242: addl $1, %edx addl (%rax), %edi addq $4, %rax cmpl %ecx, %edx jne .L242 .L240: leal (%r8,%rsi), %eax movq (%rsp), %rdx addl %r9d, %eax addl %r10d, %eax addl %r11d, %eax addl %ebx, %eax addl %r12d, %eax addl %r13d, %eax addl %r14d, %eax addl %edi, %eax movl %eax, (%rdx) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 popq %r14 popq %r15 ret .L246: xorl %edi, %edi xorl %ecx, %ecx xorl %r14d, %r14d xorl %r13d, %r13d xorl %r12d, %r12d xorl %ebx, %ebx xorl %r11d, %r11d xorl %r10d, %r10d xorl %r9d, %r9d xorl %r8d, %r8d xorl %esi, %esi jmp .L238 unroll8x8a_combine: pushq %r14 movq %rsi, %r14 pushq %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx call vec_length movl %eax, %r13d movq %rbx, %rdi leal -7(%r13), %ebp call get_vec_start testl %ebp, %ebp movq %rax, %rdx jle .L258 xorl %esi, %esi xorl %r12d, %r12d xorl %ebx, %ebx xorl %r11d, %r11d xorl %r10d, %r10d xorl %r8d, %r8d xorl %edi, %edi xorl %r9d, %r9d xorl %ecx, %ecx .L251: addl $8, %ecx addl (%rax), %esi addl 4(%rax), %r12d addl 8(%rax), %ebx addl 12(%rax), %r11d addl 16(%rax), %r10d addl 20(%rax), %r8d addl 24(%rax), %edi addl 28(%rax), %r9d addq $32, %rax cmpl %ecx, %ebp jg .L251 .L250: cmpl %ecx, %r13d jle .L252 movslq %ecx,%rax subl %ecx, %r13d leaq (%rdx,%rax,4), %rax movl %r13d, %ecx xorl %edx, %edx .L254: addl $1, %edx addl (%rax), %esi addq $4, %rax cmpl %ecx, %edx jne .L254 .L252: leal (%r8,%rdi), %eax addl %r10d, %eax addl %r11d, %eax addl %ebx, %eax popq %rbx addl %r12d, %eax popq %rbp addl %r9d, %eax popq %r12 addl %esi, %eax popq %r13 movl %eax, (%r14) popq %r14 ret .L258: xorl %esi, %esi xorl %ecx, %ecx xorl %r12d, %r12d xorl %ebx, %ebx xorl %r11d, %r11d xorl %r10d, %r10d xorl %r8d, %r8d xorl %edi, %edi xorl %r9d, %r9d jmp .L250 unroll6x6a_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -5(%r12), %ebp call get_vec_start testl %ebp, %ebp movq %rax, %rdx jle .L270 xorl %esi, %esi xorl %r11d, %r11d xorl %r10d, %r10d xorl %r9d, %r9d xorl %r8d, %r8d xorl %edi, %edi xorl %ecx, %ecx .L263: addl $6, %ecx addl (%rax), %esi addl 4(%rax), %r11d addl 8(%rax), %r10d addl 12(%rax), %r9d addl 16(%rax), %r8d addl 20(%rax), %edi addq $24, %rax cmpl %ecx, %ebp jg .L263 .L262: cmpl %ecx, %r12d jle .L264 movslq %ecx,%rax subl %ecx, %r12d leaq (%rdx,%rax,4), %rax movl %r12d, %ecx xorl %edx, %edx .L266: addl $1, %edx addl (%rax), %esi addq $4, %rax cmpl %ecx, %edx jne .L266 .L264: leal (%r10,%r9), %eax addl %r11d, %eax addl %r8d, %eax addl %edi, %eax addl %esi, %eax movl %eax, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret .L270: xorl %esi, %esi xorl %ecx, %ecx xorl %r11d, %r11d xorl %r10d, %r10d xorl %r9d, %r9d xorl %r8d, %r8d xorl %edi, %edi jmp .L262 unroll12x12a_combine: pushq %r15 pushq %r14 pushq %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $24, %rsp movq %rsi, 8(%rsp) call vec_length movl %eax, %ebp movq %rbx, %rdi movl %eax, 16(%rsp) subl $11, %ebp call get_vec_start testl %ebp, %ebp movq %rax, (%rsp) jle .L282 movq (%rsp), %rax xorl %r8d, %r8d movl $0, 20(%rsp) xorl %r15d, %r15d xorl %r14d, %r14d xorl %r13d, %r13d xorl %r12d, %r12d xorl %ebx, %ebx xorl %r11d, %r11d xorl %r10d, %r10d xorl %r9d, %r9d xorl %edi, %edi xorl %esi, %esi xorl %ecx, %ecx .L275: addl $12, %ecx addl (%rax), %r8d addl 24(%rax), %ebx movl 4(%rax), %edx addl 28(%rax), %r11d addl 8(%rax), %r15d addl 32(%rax), %r10d addl 12(%rax), %r14d addl 36(%rax), %r9d addl 16(%rax), %r13d addl 40(%rax), %edi addl 20(%rax), %r12d addl 44(%rax), %esi addq $48, %rax addl %edx, 20(%rsp) cmpl %ecx, %ebp jg .L275 .L274: cmpl %ecx, 16(%rsp) jle .L276 movq (%rsp), %rbp movslq %ecx,%rax xorl %edx, %edx leaq (%rbp,%rax,4), %rax movl 16(%rsp), %ebp subl %ecx, %ebp movl %ebp, %ecx .L278: addl $1, %edx addl (%rax), %r8d addq $4, %rax cmpl %ecx, %edx jne .L278 .L276: leal (%rdi,%rsi), %eax movq 8(%rsp), %rdx addl %r9d, %eax addl %r10d, %eax addl %r11d, %eax addl %ebx, %eax addl %r12d, %eax addl %r13d, %eax addl %r14d, %eax addl %r15d, %eax addl 20(%rsp), %eax addl %r8d, %eax movl %eax, (%rdx) addq $24, %rsp popq %rbx popq %rbp popq %r12 popq %r13 popq %r14 popq %r15 ret .L282: xorl %r8d, %r8d xorl %ecx, %ecx xorl %r15d, %r15d xorl %r14d, %r14d xorl %r13d, %r13d xorl %r12d, %r12d xorl %ebx, %ebx xorl %r11d, %r11d xorl %r10d, %r10d xorl %r9d, %r9d xorl %edi, %edi xorl %esi, %esi movl $0, 20(%rsp) jmp .L274 unroll12x6a_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -11(%r12), %ebp call get_vec_start testl %ebp, %ebp movq %rax, %rbx jle .L294 movq %rax, %rdx xorl %esi, %esi xorl %r11d, %r11d xorl %r10d, %r10d xorl %r9d, %r9d xorl %r8d, %r8d xorl %edi, %edi xorl %ecx, %ecx .L287: movl (%rdx), %eax addl 24(%rdx), %eax addl $12, %ecx addl %eax, %esi movl 4(%rdx), %eax addl 28(%rdx), %eax addl %eax, %r11d movl 8(%rdx), %eax addl 32(%rdx), %eax addl %eax, %r10d movl 12(%rdx), %eax addl 36(%rdx), %eax addl %eax, %r9d movl 16(%rdx), %eax addl 40(%rdx), %eax addl %eax, %r8d movl 20(%rdx), %eax addl 44(%rdx), %eax addq $48, %rdx addl %eax, %edi cmpl %ecx, %ebp jg .L287 .L286: cmpl %ecx, %r12d jle .L288 movslq %ecx,%rax subl %ecx, %r12d xorl %edx, %edx leaq (%rbx,%rax,4), %rax movl %r12d, %ecx .L290: addl $1, %edx addl (%rax), %esi addq $4, %rax cmpl %ecx, %edx jne .L290 .L288: leal (%r8,%rdi), %eax addl %r9d, %eax addl %r10d, %eax addl %r11d, %eax addl %esi, %eax movl %eax, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret .L294: xorl %esi, %esi xorl %ecx, %ecx xorl %r11d, %r11d xorl %r10d, %r10d xorl %r9d, %r9d xorl %r8d, %r8d xorl %edi, %edi jmp .L286 unroll8x4a_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -7(%r12), %ebp call get_vec_start testl %ebp, %ebp movq %rax, %r10 jle .L306 movq %rax, %rdx xorl %esi, %esi xorl %r9d, %r9d xorl %r8d, %r8d xorl %edi, %edi xorl %ecx, %ecx .L299: movl (%rdx), %eax addl 16(%rdx), %eax addl $8, %ecx addl %eax, %esi movl 4(%rdx), %eax addl 20(%rdx), %eax addl %eax, %r9d movl 8(%rdx), %eax addl 24(%rdx), %eax addl %eax, %r8d movl 12(%rdx), %eax addl 28(%rdx), %eax addq $32, %rdx addl %eax, %edi cmpl %ecx, %ebp jg .L299 .L298: cmpl %ecx, %r12d jle .L300 movslq %ecx,%rax subl %ecx, %r12d xorl %edx, %edx leaq (%r10,%rax,4), %rax movl %r12d, %ecx .L302: addl $1, %edx addl (%rax), %esi addq $4, %rax cmpl %ecx, %edx jne .L302 .L300: leal (%r8,%rdi), %eax addl %r9d, %eax addl %esi, %eax movl %eax, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret .L306: xorl %esi, %esi xorl %ecx, %ecx xorl %r9d, %r9d xorl %r8d, %r8d xorl %edi, %edi jmp .L298 unroll4x4a_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -3(%r12), %ebp call get_vec_start testl %ebp, %ebp movq %rax, %rdx jle .L318 xorl %esi, %esi xorl %r9d, %r9d xorl %r8d, %r8d xorl %edi, %edi xorl %ecx, %ecx .L311: addl $4, %ecx addl (%rax), %esi addl 4(%rax), %r9d addl 8(%rax), %r8d addl 12(%rax), %edi addq $16, %rax cmpl %ecx, %ebp jg .L311 .L310: cmpl %ecx, %r12d jle .L312 movslq %ecx,%rax subl %ecx, %r12d leaq (%rdx,%rax,4), %rax movl %r12d, %ecx xorl %edx, %edx .L314: addl $1, %edx addl (%rax), %esi addq $4, %rax cmpl %ecx, %edx jne .L314 .L312: leal (%r8,%r9), %eax addl %edi, %eax addl %esi, %eax movl %eax, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret .L318: xorl %esi, %esi xorl %ecx, %ecx xorl %r9d, %r9d xorl %r8d, %r8d xorl %edi, %edi jmp .L310 unroll3x3a_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx call vec_length movq %rbx, %rdi movl %eax, %ebp call get_vec_start movq %rax, %r10 leal -2(%rbp), %eax testl %eax, %eax jle .L330 leal -3(%rbp), %edx movl $-1431655765, %eax movq %r10, %rcx xorl %edi, %edi xorl %r9d, %r9d xorl %r8d, %r8d mull %edx xorl %esi, %esi shrl %edx leal 3(%rdx,%rdx,2), %eax .L323: addl $3, %esi addl (%rcx), %edi addl 4(%rcx), %r9d addl 8(%rcx), %r8d addq $12, %rcx cmpl %eax, %esi jne .L323 .L322: cmpl %esi, %ebp jle .L324 movslq %esi,%rax movl %ebp, %edx xorl %ecx, %ecx leaq (%r10,%rax,4), %rax subl %esi, %edx .L326: addl $1, %ecx addl (%rax), %edi addq $4, %rax cmpl %edx, %ecx jne .L326 .L324: leal (%r8,%r9), %eax popq %rbx addl %edi, %eax popq %rbp movl %eax, (%r12) popq %r12 ret .L330: xorl %edi, %edi xorl %esi, %esi xorl %r9d, %r9d xorl %r8d, %r8d jmp .L322 unroll8x2a_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -7(%r12), %ebp call get_vec_start testl %ebp, %ebp movq %rax, %r8 jle .L342 movq %rax, %rdx xorl %esi, %esi xorl %edi, %edi xorl %ecx, %ecx .L335: movl (%rdx), %eax addl 8(%rdx), %eax addl $8, %ecx addl 16(%rdx), %eax addl 24(%rdx), %eax addl %eax, %esi movl 4(%rdx), %eax addl 12(%rdx), %eax addl 20(%rdx), %eax addl 28(%rdx), %eax addq $32, %rdx addl %eax, %edi cmpl %ecx, %ebp jg .L335 .L334: cmpl %ecx, %r12d jle .L336 movslq %ecx,%rax subl %ecx, %r12d xorl %edx, %edx leaq (%r8,%rax,4), %rax movl %r12d, %ecx .L338: addl $1, %edx addl (%rax), %esi addq $4, %rax cmpl %ecx, %edx jne .L338 .L336: leal (%rsi,%rdi), %eax movl %eax, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret .L342: xorl %esi, %esi xorl %ecx, %ecx xorl %edi, %edi jmp .L334 unroll4x2a_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -3(%r12), %ebp call get_vec_start testl %ebp, %ebp movq %rax, %r8 jle .L354 movq %rax, %rdx xorl %esi, %esi xorl %edi, %edi xorl %ecx, %ecx .L347: movl (%rdx), %eax addl 8(%rdx), %eax addl $4, %ecx addl %eax, %esi movl 4(%rdx), %eax addl 12(%rdx), %eax addq $16, %rdx addl %eax, %edi cmpl %ecx, %ebp jg .L347 .L346: cmpl %ecx, %r12d jle .L348 movslq %ecx,%rax subl %ecx, %r12d xorl %edx, %edx leaq (%r8,%rax,4), %rax movl %r12d, %ecx .L350: addl $1, %edx addl (%rax), %esi addq $4, %rax cmpl %ecx, %edx jne .L350 .L348: leal (%rsi,%rdi), %eax movl %eax, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret .L354: xorl %esi, %esi xorl %ecx, %ecx xorl %edi, %edi jmp .L346 combine6: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -1(%r12), %ebp call get_vec_start testl %ebp, %ebp movq %rax, %rdx jle .L366 xorl %esi, %esi xorl %edi, %edi xorl %ecx, %ecx .L359: addl $2, %ecx addl (%rax), %esi addl 4(%rax), %edi addq $8, %rax cmpl %ecx, %ebp jg .L359 .L358: cmpl %ecx, %r12d jle .L360 movslq %ecx,%rax subl %ecx, %r12d leaq (%rdx,%rax,4), %rax movl %r12d, %ecx xorl %edx, %edx .L362: addl $1, %edx addl (%rax), %esi addq $4, %rax cmpl %ecx, %edx jne .L362 .L360: leal (%rsi,%rdi), %eax movl %eax, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret .L366: xorl %esi, %esi xorl %ecx, %ecx xorl %edi, %edi jmp .L358 unroll16_combine: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx call vec_length movq %r12, %rdi movl %eax, %ebx call get_vec_start movl %ebx, %edx movq %rax, %rcx xorl %esi, %esi sarl $31, %edx shrl $28, %edx leal (%rbx,%rdx), %eax movslq %ebx,%rbx andl $15, %eax subl %edx, %eax leaq (%rcx,%rbx,4), %rdx cltq leaq 0(,%rax,4), %rdi subq %rdi, %rdx cmpq %rdx, %rcx jae .L370 .L371: movl (%rcx), %eax addl 4(%rcx), %eax addl 8(%rcx), %eax addl 12(%rcx), %eax addl 16(%rcx), %eax addl 20(%rcx), %eax addl 24(%rcx), %eax addl 28(%rcx), %eax addl 32(%rcx), %eax addl 36(%rcx), %eax addl 40(%rcx), %eax addl 44(%rcx), %eax addl 48(%rcx), %eax addl 52(%rcx), %eax addl 56(%rcx), %eax addl 60(%rcx), %eax addq $64, %rcx addl %eax, %esi cmpq %rcx, %rdx ja .L371 .L370: leaq (%rdi,%rdx), %rax cmpq %rcx, %rax jbe .L372 .L375: addl (%rcx), %esi addq $4, %rcx cmpq %rcx, %rax ja .L375 .L372: movl %esi, (%rbp) popq %rbx popq %rbp popq %r12 ret unroll8_combine: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx call vec_length movq %r12, %rdi movl %eax, %ebx call get_vec_start movl %ebx, %edx movq %rax, %rcx xorl %esi, %esi sarl $31, %edx shrl $29, %edx leal (%rbx,%rdx), %eax movslq %ebx,%rbx andl $7, %eax subl %edx, %eax leaq (%rcx,%rbx,4), %rdx cltq leaq 0(,%rax,4), %rdi subq %rdi, %rdx cmpq %rdx, %rcx jae .L382 .L383: movl (%rcx), %eax addl 4(%rcx), %eax addl 8(%rcx), %eax addl 12(%rcx), %eax addl 16(%rcx), %eax addl 20(%rcx), %eax addl 24(%rcx), %eax addl 28(%rcx), %eax addq $32, %rcx addl %eax, %esi cmpq %rcx, %rdx ja .L383 .L382: leaq (%rdi,%rdx), %rax cmpq %rcx, %rax jbe .L384 .L387: addl (%rcx), %esi addq $4, %rcx cmpq %rcx, %rax ja .L387 .L384: movl %esi, (%rbp) popq %rbx popq %rbp popq %r12 ret unroll4_combine: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx call vec_length movl %eax, %ebx movq %r12, %rdi movslq %ebx,%rbx call get_vec_start leaq -12(%rax,%rbx,4), %rcx xorl %esi, %esi movq %rax, %rdx cmpq %rcx, %rax jae .L394 .L395: movl (%rdx), %eax addl 4(%rdx), %eax addl 8(%rdx), %eax addl 12(%rdx), %eax addq $16, %rdx addl %eax, %esi cmpq %rdx, %rcx ja .L395 .L394: leaq 12(%rcx), %rax cmpq %rdx, %rax jbe .L396 .L399: addl (%rdx), %esi addq $4, %rdx cmpq %rdx, %rax ja .L399 .L396: movl %esi, (%rbp) popq %rbx popq %rbp popq %r12 ret unroll3_combine: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx call vec_length movl %eax, %ebx movq %r12, %rdi movslq %ebx,%rbx call get_vec_start leaq -8(%rax,%rbx,4), %rcx xorl %esi, %esi movq %rax, %rdx cmpq %rcx, %rax jae .L406 .L407: movl (%rdx), %eax addl 4(%rdx), %eax addl 8(%rdx), %eax addq $12, %rdx addl %eax, %esi cmpq %rdx, %rcx ja .L407 .L406: leaq 8(%rcx), %rax cmpq %rdx, %rax jbe .L408 .L411: addl (%rdx), %esi addq $4, %rdx cmpq %rdx, %rax ja .L411 .L408: movl %esi, (%rbp) popq %rbx popq %rbp popq %r12 ret unroll2_combine: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx call vec_length movq %r12, %rdi movl %eax, %ebx call get_vec_start movl %ebx, %edx movq %rax, %rcx xorl %esi, %esi shrl $31, %edx leal (%rbx,%rdx), %eax movslq %ebx,%rbx andl $1, %eax subl %edx, %eax leaq (%rcx,%rbx,4), %rdx cltq leaq 0(,%rax,4), %rdi subq %rdi, %rdx cmpq %rdx, %rcx jae .L418 .L419: movl (%rcx), %eax addl 4(%rcx), %eax addq $8, %rcx addl %eax, %esi cmpq %rcx, %rdx ja .L419 .L418: leaq (%rdi,%rdx), %rax cmpq %rcx, %rax jbe .L420 .L423: addl (%rcx), %esi addq $4, %rcx cmpq %rcx, %rax ja .L423 .L420: movl %esi, (%rbp) popq %rbx popq %rbp popq %r12 ret unroll16a_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -15(%r12), %ebp call get_vec_start xorl %esi, %esi xorl %ecx, %ecx testl %ebp, %ebp movq %rax, %rdi jle .L430 movq %rax, %rdx .L431: movl (%rdx), %eax addl 4(%rdx), %eax addl $16, %ecx addl 8(%rdx), %eax addl 12(%rdx), %eax addl 16(%rdx), %eax addl 20(%rdx), %eax addl 24(%rdx), %eax addl 28(%rdx), %eax addl 32(%rdx), %eax addl 36(%rdx), %eax addl 40(%rdx), %eax addl 44(%rdx), %eax addl 48(%rdx), %eax addl 52(%rdx), %eax addl 56(%rdx), %eax addl 60(%rdx), %eax addq $64, %rdx addl %eax, %esi cmpl %ecx, %ebp jg .L431 .L430: cmpl %ecx, %r12d jle .L432 movslq %ecx,%rax subl %ecx, %r12d xorl %edx, %edx leaq (%rdi,%rax,4), %rax movl %r12d, %ecx .L434: addl $1, %edx addl (%rax), %esi addq $4, %rax cmpl %ecx, %edx jne .L434 .L432: movl %esi, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll8a_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -7(%r12), %ebp call get_vec_start xorl %esi, %esi xorl %ecx, %ecx testl %ebp, %ebp movq %rax, %rdi jle .L441 movq %rax, %rdx .L442: movl (%rdx), %eax addl 4(%rdx), %eax addl $8, %ecx addl 8(%rdx), %eax addl 12(%rdx), %eax addl 16(%rdx), %eax addl 20(%rdx), %eax addl 24(%rdx), %eax addl 28(%rdx), %eax addq $32, %rdx addl %eax, %esi cmpl %ecx, %ebp jg .L442 .L441: cmpl %ecx, %r12d jle .L443 movslq %ecx,%rax subl %ecx, %r12d xorl %edx, %edx leaq (%rdi,%rax,4), %rax movl %r12d, %ecx .L445: addl $1, %edx addl (%rax), %esi addq $4, %rax cmpl %ecx, %edx jne .L445 .L443: movl %esi, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll4a_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -3(%r12), %ebp call get_vec_start xorl %esi, %esi xorl %ecx, %ecx testl %ebp, %ebp movq %rax, %rdi jle .L452 movq %rax, %rdx .L453: movl (%rdx), %eax addl 4(%rdx), %eax addl $4, %ecx addl 8(%rdx), %eax addl 12(%rdx), %eax addq $16, %rdx addl %eax, %esi cmpl %ecx, %ebp jg .L453 .L452: cmpl %ecx, %r12d jle .L454 movslq %ecx,%rax subl %ecx, %r12d xorl %edx, %edx leaq (%rdi,%rax,4), %rax movl %r12d, %ecx .L456: addl $1, %edx addl (%rax), %esi addq $4, %rax cmpl %ecx, %edx jne .L456 .L454: movl %esi, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll3aw_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx call vec_length movq %rbx, %rdi movl %eax, %ebp call get_vec_start movq %rax, %r8 leal -1(%rbp), %eax xorl %edi, %edi xorl %esi, %esi testl %eax, %eax jle .L463 leal -2(%rbp), %edx movl $-1431655765, %eax movq %r8, %rcx mull %edx shrl %edx leal 3(%rdx,%rdx,2), %edx .L464: movl (%rcx), %eax addl 4(%rcx), %eax addl $3, %esi addl 8(%rcx), %eax addq $12, %rcx addl %eax, %edi cmpl %edx, %esi jne .L464 .L463: cmpl %esi, %ebp jle .L465 movslq %esi,%rax movl %ebp, %edx xorl %ecx, %ecx leaq (%r8,%rax,4), %rax subl %esi, %edx .L467: addl $1, %ecx addl (%rax), %edi addq $4, %rax cmpl %edx, %ecx jne .L467 .L465: popq %rbx popq %rbp movl %edi, (%r12) popq %r12 ret combine5p: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx call get_vec_start movq %rbx, %rdi movq %rax, %rbp call vec_length cltq xorl %ecx, %ecx leaq (%rbp,%rax,4), %rdx leaq -8(%rdx), %rsi cmpq %rsi, %rbp jae .L483 .L475: movl (%rbp), %eax addl 4(%rbp), %eax addl 8(%rbp), %eax addq $12, %rbp addl %eax, %ecx cmpq %rbp, %rsi ja .L475 cmpq %rbp, %rdx jbe .L484 .L479: addl (%rbp), %ecx addq $4, %rbp .L483: cmpq %rbp, %rdx ja .L479 .L484: popq %rbx popq %rbp movl %ecx, (%r12) popq %r12 ret combine5: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx call vec_length movq %rbx, %rdi movl %eax, %ebp call get_vec_start movq %rax, %r8 leal -2(%rbp), %eax xorl %edi, %edi xorl %esi, %esi testl %eax, %eax jle .L488 leal -3(%rbp), %edx movl $-1431655765, %eax movq %r8, %rcx mull %edx shrl %edx leal 3(%rdx,%rdx,2), %edx .L489: movl (%rcx), %eax addl 4(%rcx), %eax addl $3, %esi addl 8(%rcx), %eax addq $12, %rcx addl %eax, %edi cmpl %edx, %esi jne .L489 .L488: cmpl %esi, %ebp jle .L490 movslq %esi,%rax movl %ebp, %edx xorl %ecx, %ecx leaq (%r8,%rax,4), %rax subl %esi, %edx .L492: addl $1, %ecx addl (%rax), %edi addq $4, %rax cmpl %edx, %ecx jne .L492 .L490: popq %rbx popq %rbp movl %edi, (%r12) popq %r12 ret unroll2a_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -1(%r12), %ebp call get_vec_start xorl %esi, %esi xorl %ecx, %ecx testl %ebp, %ebp movq %rax, %rdi jle .L499 movq %rax, %rdx .L500: movl (%rdx), %eax addl 4(%rdx), %eax addl $2, %ecx addq $8, %rdx addl %eax, %esi cmpl %ecx, %ebp jg .L500 .L499: cmpl %ecx, %r12d jle .L501 movslq %ecx,%rax subl %ecx, %r12d xorl %edx, %edx leaq (%rdi,%rax,4), %rax movl %r12d, %ecx .L503: addl $1, %edx addl (%rax), %esi addq $4, %rax cmpl %ecx, %edx jne .L503 .L501: movl %esi, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret combine4p: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx call vec_length movl %eax, %ebx movq %r12, %rdi movslq %ebx,%rbx call get_vec_start leaq (%rax,%rbx,4), %rdx xorl %ecx, %ecx cmpq %rdx, %rax jae .L510 .L511: addl (%rax), %ecx addq $4, %rax cmpq %rax, %rdx ja .L511 .L510: movl %ecx, (%rbp) popq %rbx popq %rbp popq %r12 ret combine4: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx call vec_length movq %rbx, %rdi movl %eax, %ebp call get_vec_start xorl %ecx, %ecx testl %ebp, %ebp jle .L518 xorl %edx, %edx .L519: addl $1, %edx addl (%rax), %ecx addq $4, %rax cmpl %ebp, %edx jne .L519 .L518: popq %rbx popq %rbp movl %ecx, (%r12) popq %r12 ret combine3: pushq %r12 pushq %rbp movq %rsi, %rbp pushq %rbx movq %rdi, %rbx call vec_length movq %rbx, %rdi movl %eax, %r12d call get_vec_start testl %r12d, %r12d movl $0, (%rbp) jle .L526 movq %rax, %rdx xorl %ecx, %ecx .L525: addl $1, %ecx movl (%rdx), %eax addq $4, %rdx addl %eax, (%rbp) cmpl %r12d, %ecx jne .L525 .L526: popq %rbx popq %rbp popq %r12 ret combine2: pushq %r14 pushq %r13 movq %rdi, %r13 pushq %r12 pushq %rbp movq %rsi, %rbp pushq %rbx subq $16, %rsp call vec_length testl %eax, %eax movl %eax, %r12d movl $0, (%rbp) jle .L532 leaq 12(%rsp), %r14 xorl %ebx, %ebx .L531: movl %ebx, %esi movq %r14, %rdx movq %r13, %rdi addl $1, %ebx call get_vec_element movl 12(%rsp), %eax addl %eax, (%rbp) cmpl %r12d, %ebx jne .L531 .L532: addq $16, %rsp popq %rbx popq %rbp popq %r12 popq %r13 popq %r14 ret combine1: pushq %r13 pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx xorl %ebx, %ebx subq $24, %rsp movl $0, (%rsi) leaq 20(%rsp), %r13 jmp .L535 .L536: movl %ebx, %esi movq %r13, %rdx movq %r12, %rdi call get_vec_element movl 20(%rsp), %eax addl %eax, (%rbp) addl $1, %ebx .L535: movq %r12, %rdi call vec_length cmpl %eax, %ebx jl .L536 addq $24, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret combine1_descr: combine2_descr: combine3_descr: combine4_descr: combine4p_descr: unroll2a_descr: combine5_descr: combine5p_descr: unroll3aw_descr: unroll4a_descr: unroll8a_descr: unroll16a_descr: unroll2_descr: unroll3_descr: unroll4_descr: unroll8_descr: unroll16_descr: combine6_descr: unroll4x2a_descr: unroll8x2a_descr: unroll3x3a_descr: unroll4x4a_descr: unroll8x4a_descr: unroll12x6a_descr: unroll12x12a_descr: unroll6x6a_descr: unroll8x8a_descr: unroll10x10a_descr: unrollx2as_descr: unroll4x2as_descr: unroll8x2_descr: unroll9x3_descr: unroll8x4_descr: unroll8x8_descr: unroll2aa_descr: unroll3aa_descr: unroll4aa_descr: unroll6aa_descr: unroll8aa_descr: unrollv1_descr: unrollv2_descr: unrollv4_descr: unrollv8_descr: unrollv12_descr: unrollv2a_descr: unrollv4a_descr: unrollv8a_descr: .Lframe1: