combine1:
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$16, %rsp
	movq	%rdi, %r12
	movq	%rsi, %rbp
	movl	$0x3f800000, (%rsi)
	movl	$0, %ebx
	jmp	.L2
.L3:
	leaq	12(%rsp), %rdx
	movq	%rbx, %rsi
	movq	%r12, %rdi
	call	get_vec_element
	movss	0(%rbp), %xmm0
	mulss	12(%rsp), %xmm0
	movss	%xmm0, 0(%rbp)
	addq	$1, %rbx
.L2:
	movq	%r12, %rdi
	call	vec_length
	cmpq	%rax, %rbx
	jl	.L3
	addq	$16, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	ret

combine2:
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$24, %rsp
	movq	%rdi, %r13
	movq	%rsi, %rbp
	call	vec_length
	movq	%rax, %r12
	movl	$0x3f800000, 0(%rbp)
	testq	%rax, %rax
	jle	.L5
	movl	$0, %ebx
.L7:
	leaq	12(%rsp), %rdx
	movq	%rbx, %rsi
	movq	%r13, %rdi
	call	get_vec_element
	movss	0(%rbp), %xmm0
	mulss	12(%rsp), %xmm0
	movss	%xmm0, 0(%rbp)
	addq	$1, %rbx
	cmpq	%r12, %rbx
	jne	.L7
.L5:
	addq	$24, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	ret

combine4b:
	pushq	%rbp
	pushq	%rbx
	subq	$8, %rsp
	movq	%rdi, %rbx
	movq	%rsi, %rbp
	call	vec_length
	testq	%rax, %rax
	jle	.L13
	movss	.LC0(%rip), %xmm0
	movl	$0, %edx
.L12:
	testq	%rdx, %rdx
	js	.L11
	cmpq	%rdx, (%rbx)
	jle	.L11
	movq	8(%rbx), %rcx
	mulss	(%rcx,%rdx,4), %xmm0
.L11:
	addq	$1, %rdx
	cmpq	%rax, %rdx
	jne	.L12
	jmp	.L10
.L13:
	movss	.LC0(%rip), %xmm0
.L10:
	movss	%xmm0, 0(%rbp)
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	ret

combine3:
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	movq	%rdi, %rbp
	movq	%rsi, %rbx
	call	vec_length
	movq	%rax, %r12
	movq	%rbp, %rdi
	call	get_vec_start
	movl	$0x3f800000, (%rbx)
	testq	%r12, %r12
	jle	.L15
	movq	%rax, %rdx
	leaq	(%rax,%r12,4), %rax
.L17:
	movss	(%rbx), %xmm0
	mulss	(%rdx), %xmm0
	movss	%xmm0, (%rbx)
	addq	$4, %rdx
	cmpq	%rax, %rdx
	jne	.L17
.L15:
	popq	%rbx
	popq	%rbp
	popq	%r12
	ret

combine3w:
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	movq	%rdi, %rbp
	movq	%rsi, %rbx
	call	vec_length
	movq	%rax, %r12
	movq	%rbp, %rdi
	call	get_vec_start
	movl	$0x3f800000, (%rbx)
	testq	%r12, %r12
	jle	.L19
	movq	%rax, %rdx
	leaq	(%rax,%r12,4), %rax
	movss	.LC0(%rip), %xmm0
.L21:
	mulss	(%rdx), %xmm0
	movss	%xmm0, (%rbx)
	addq	$4, %rdx
	cmpq	%rax, %rdx
	jne	.L21
.L19:
	popq	%rbx
	popq	%rbp
	popq	%r12
	ret

combine4:
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	movq	%rdi, %rbx
	movq	%rsi, %r12
	call	vec_length
	movq	%rax, %rbp
	movq	%rbx, %rdi
	call	get_vec_start
	testq	%rbp, %rbp
	jle	.L26
	movq	%rax, %rdx
	leaq	(%rax,%rbp,4), %rax
	movss	.LC0(%rip), %xmm0
.L25:
	mulss	(%rdx), %xmm0
	addq	$4, %rdx
	cmpq	%rax, %rdx
	jne	.L25
	jmp	.L24
.L26:
	movss	.LC0(%rip), %xmm0
.L24:
	movss	%xmm0, (%r12)
	popq	%rbx
	popq	%rbp
	popq	%r12
	ret

combine4p:
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	movq	%rdi, %rbx
	movq	%rsi, %rbp
	call	vec_length
	movq	%rax, %r12
	movq	%rbx, %rdi
	call	get_vec_start
	movq	%rax, %rdx
	leaq	(%rax,%r12,4), %rax
	cmpq	%rax, %rdx
	jnb	.L31
	movss	.LC0(%rip), %xmm0
.L30:
	mulss	(%rdx), %xmm0
	addq	$4, %rdx
	cmpq	%rdx, %rax
	ja	.L30
	jmp	.L29
.L31:
	movss	.LC0(%rip), %xmm0
.L29:
	movss	%xmm0, 0(%rbp)
	popq	%rbx
	popq	%rbp
	popq	%r12
	ret

combine5:
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$8, %rsp
	movq	%rdi, %r12
	movq	%rsi, %r13
	call	vec_length
	movq	%rax, %rbx
	leaq	-1(%rax), %rbp
	movq	%r12, %rdi
	call	get_vec_start
	testq	%rbp, %rbp
	jle	.L38
	movss	.LC0(%rip), %xmm0
	movl	$0, %edx
.L35:
	mulss	(%rax,%rdx,4), %xmm0
	mulss	4(%rax,%rdx,4), %xmm0
	addq	$2, %rdx
	cmpq	%rdx, %rbp
	jg	.L35
	jmp	.L34
.L38:
	movss	.LC0(%rip), %xmm0
	movl	$0, %edx
.L34:
	cmpq	%rdx, %rbx
	jle	.L36
.L37:
	mulss	(%rax,%rdx,4), %xmm0
	addq	$1, %rdx
	cmpq	%rbx, %rdx
	jne	.L37
.L36:
	movss	%xmm0, 0(%r13)
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	ret

unroll3a_combine:
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$8, %rsp
	movq	%rdi, %r12
	movq	%rsi, %r13
	call	vec_length
	movq	%rax, %rbx
	leaq	-2(%rax), %rbp
	movq	%r12, %rdi
	call	get_vec_start
	testq	%rbp, %rbp
	jle	.L45
	movss	.LC0(%rip), %xmm0
	movl	$0, %edx
.L42:
	mulss	(%rax,%rdx,4), %xmm0
	mulss	4(%rax,%rdx,4), %xmm0
	mulss	8(%rax,%rdx,4), %xmm0
	addq	$3, %rdx
	cmpq	%rdx, %rbp
	jg	.L42
	jmp	.L41
.L45:
	movss	.LC0(%rip), %xmm0
	movl	$0, %edx
.L41:
	cmpq	%rdx, %rbx
	jle	.L43
.L44:
	mulss	(%rax,%rdx,4), %xmm0
	addq	$1, %rdx
	cmpq	%rbx, %rdx
	jne	.L44
.L43:
	movss	%xmm0, 0(%r13)
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	ret

combine5p:
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	movq	%rdi, %rbp
	movq	%rsi, %r12
	call	get_vec_start
	movq	%rax, %rbx
	movq	%rbp, %rdi
	call	vec_length
	leaq	(%rbx,%rax,4), %rax
	leaq	-4(%rax), %rcx
	cmpq	%rcx, %rbx
	jnb	.L53
	movq	%rbx, %rdx
	movss	.LC0(%rip), %xmm0
.L50:
	mulss	(%rdx), %xmm0
	mulss	4(%rdx), %xmm0
	addq	$8, %rdx
	cmpq	%rdx, %rcx
	ja	.L50
	movq	%rax, %rdx
	subq	%rbx, %rdx
	leaq	-5(%rdx), %rdx
	shrq	$3, %rdx
	leaq	8(%rbx,%rdx,8), %rbx
	jmp	.L48
.L53:
	movss	.LC0(%rip), %xmm0
.L48:
	cmpq	%rbx, %rax
	jbe	.L51
.L52:
	mulss	(%rbx), %xmm0
	addq	$4, %rbx
	cmpq	%rbx, %rax
	ja	.L52
.L51:
	movss	%xmm0, (%r12)
	popq	%rbx
	popq	%rbp
	popq	%r12
	ret

unroll2aw_combine:
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$8, %rsp
	movq	%rdi, %r12
	movq	%rsi, %r13
	call	vec_length
	movq	%rax, %rbx
	leaq	-1(%rax), %rbp
	movq	%r12, %rdi
	call	get_vec_start
	testq	%rbp, %rbp
	jle	.L60
	movss	.LC0(%rip), %xmm0
	movl	$0, %edx
.L57:
	mulss	(%rax,%rdx,4), %xmm0
	addq	$2, %rdx
	mulss	-4(%rax,%rdx,4), %xmm0
	cmpq	%rdx, %rbp
	jg	.L57
	jmp	.L56
.L60:
	movss	.LC0(%rip), %xmm0
	movl	$0, %edx
.L56:
	cmpq	%rdx, %rbx
	jle	.L58
.L59:
	mulss	(%rax,%rdx,4), %xmm0
	addq	$1, %rdx
	cmpq	%rbx, %rdx
	jne	.L59
.L58:
	movss	%xmm0, 0(%r13)
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	ret

unroll4a_combine:
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$8, %rsp
	movq	%rdi, %r12
	movq	%rsi, %r13
	call	vec_length
	movq	%rax, %rbx
	leaq	-3(%rax), %rbp
	movq	%r12, %rdi
	call	get_vec_start
	testq	%rbp, %rbp
	jle	.L67
	movss	.LC0(%rip), %xmm0
	movl	$0, %edx
.L64:
	mulss	(%rax,%rdx,4), %xmm0
	mulss	4(%rax,%rdx,4), %xmm0
	mulss	8(%rax,%rdx,4), %xmm0
	mulss	12(%rax,%rdx,4), %xmm0
	addq	$4, %rdx
	cmpq	%rdx, %rbp
	jg	.L64
	jmp	.L63
.L67:
	movss	.LC0(%rip), %xmm0
	movl	$0, %edx
.L63:
	cmpq	%rdx, %rbx
	jle	.L65
.L66:
	mulss	(%rax,%rdx,4), %xmm0
	addq	$1, %rdx
	cmpq	%rbx, %rdx
	jne	.L66
.L65:
	movss	%xmm0, 0(%r13)
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	ret

unroll5a_combine:
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$8, %rsp
	movq	%rdi, %r13
	movq	%rsi, %r12
	call	vec_length
	movq	%rax, %rbx
	leaq	-4(%rax), %rbp
	movq	%r13, %rdi
	call	get_vec_start
	testq	%rbp, %rbp
	jle	.L74
	movq	%rax, %rcx
	movss	.LC0(%rip), %xmm0
	movl	$0, %edx
.L71:
	mulss	(%rcx), %xmm0
	mulss	4(%rcx), %xmm0
	mulss	8(%rcx), %xmm0
	mulss	12(%rcx), %xmm0
	mulss	16(%rcx), %xmm0
	addq	$5, %rdx
	addq	$20, %rcx
	cmpq	%rdx, %rbp
	jg	.L71
	jmp	.L70
.L74:
	movss	.LC0(%rip), %xmm0
	movl	$0, %edx
.L70:
	cmpq	%rdx, %rbx
	jle	.L72
.L73:
	mulss	(%rax,%rdx,4), %xmm0
	addq	$1, %rdx
	cmpq	%rbx, %rdx
	jne	.L73
.L72:
	movss	%xmm0, (%r12)
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	ret

unroll6a_combine:
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$8, %rsp
	movq	%rdi, %r13
	movq	%rsi, %r12
	call	vec_length
	movq	%rax, %rbx
	leaq	-5(%rax), %rbp
	movq	%r13, %rdi
	call	get_vec_start
	testq	%rbp, %rbp
	jle	.L81
	movq	%rax, %rdx
	movss	.LC0(%rip), %xmm0
	movl	$0, %ecx
.L78:
	mulss	(%rdx), %xmm0
	mulss	4(%rdx), %xmm0
	mulss	8(%rdx), %xmm0
	mulss	12(%rdx), %xmm0
	mulss	16(%rdx), %xmm0
	mulss	20(%rdx), %xmm0
	addq	$6, %rcx
	addq	$24, %rdx
	cmpq	%rcx, %rbp
	jg	.L78
	jmp	.L77
.L81:
	movss	.LC0(%rip), %xmm0
	movl	$0, %ecx
.L77:
	cmpq	%rcx, %rbx
	jle	.L79
.L80:
	mulss	(%rax,%rcx,4), %xmm0
	addq	$1, %rcx
	cmpq	%rbx, %rcx
	jne	.L80
.L79:
	movss	%xmm0, (%r12)
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	ret

unroll7a_combine:
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$8, %rsp
	movq	%rdi, %r13
	movq	%rsi, %r12
	call	vec_length
	movq	%rax, %rbx
	leaq	-6(%rax), %rbp
	movq	%r13, %rdi
	call	get_vec_start
	testq	%rbp, %rbp
	jle	.L88
	movq	%rax, %rdx
	movss	.LC0(%rip), %xmm0
	movl	$0, %ecx
.L85:
	mulss	(%rdx), %xmm0
	mulss	4(%rdx), %xmm0
	mulss	8(%rdx), %xmm0
	mulss	12(%rdx), %xmm0
	mulss	16(%rdx), %xmm0
	mulss	20(%rdx), %xmm0
	mulss	24(%rdx), %xmm0
	addq	$7, %rcx
	addq	$28, %rdx
	cmpq	%rcx, %rbp
	jg	.L85
	jmp	.L84
.L88:
	movss	.LC0(%rip), %xmm0
	movl	$0, %ecx
.L84:
	cmpq	%rcx, %rbx
	jle	.L86
.L87:
	mulss	(%rax,%rcx,4), %xmm0
	addq	$1, %rcx
	cmpq	%rbx, %rcx
	jne	.L87
.L86:
	movss	%xmm0, (%r12)
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	ret

unroll8a_combine:
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$8, %rsp
	movq	%rdi, %r13
	movq	%rsi, %r12
	call	vec_length
	movq	%rax, %rbx
	leaq	-7(%rax), %rbp
	movq	%r13, %rdi
	call	get_vec_start
	testq	%rbp, %rbp
	jle	.L95
	movq	%rax, %rdx
	movss	.LC0(%rip), %xmm0
	movl	$0, %ecx
.L92:
	mulss	(%rdx), %xmm0
	mulss	4(%rdx), %xmm0
	mulss	8(%rdx), %xmm0
	mulss	12(%rdx), %xmm0
	mulss	16(%rdx), %xmm0
	mulss	20(%rdx), %xmm0
	mulss	24(%rdx), %xmm0
	mulss	28(%rdx), %xmm0
	addq	$8, %rcx
	addq	$32, %rdx
	cmpq	%rcx, %rbp
	jg	.L92
	jmp	.L91
.L95:
	movss	.LC0(%rip), %xmm0
	movl	$0, %ecx
.L91:
	cmpq	%rcx, %rbx
	jle	.L93
.L94:
	mulss	(%rax,%rcx,4), %xmm0
	addq	$1, %rcx
	cmpq	%rbx, %rcx
	jne	.L94
.L93:
	movss	%xmm0, (%r12)
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	ret

unroll9a_combine:
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$8, %rsp
	movq	%rdi, %r13
	movq	%rsi, %r12
	call	vec_length
	movq	%rax, %rbx
	leaq	-8(%rax), %rbp
	movq	%r13, %rdi
	call	get_vec_start
	testq	%rbp, %rbp
	jle	.L102
	movq	%rax, %rdx
	movss	.LC0(%rip), %xmm0
	movl	$0, %ecx
.L99:
	mulss	(%rdx), %xmm0
	mulss	4(%rdx), %xmm0
	mulss	8(%rdx), %xmm0
	mulss	12(%rdx), %xmm0
	mulss	16(%rdx), %xmm0
	mulss	20(%rdx), %xmm0
	mulss	24(%rdx), %xmm0
	mulss	28(%rdx), %xmm0
	mulss	32(%rdx), %xmm0
	addq	$9, %rcx
	addq	$36, %rdx
	cmpq	%rcx, %rbp
	jg	.L99
	jmp	.L98
.L102:
	movss	.LC0(%rip), %xmm0
	movl	$0, %ecx
.L98:
	cmpq	%rcx, %rbx
	jle	.L100
.L101:
	mulss	(%rax,%rcx,4), %xmm0
	addq	$1, %rcx
	cmpq	%rbx, %rcx
	jne	.L101
.L100:
	movss	%xmm0, (%r12)
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	ret

unroll10a_combine:
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$8, %rsp
	movq	%rdi, %r13
	movq	%rsi, %r12
	call	vec_length
	movq	%rax, %rbx
	leaq	-9(%rax), %rbp
	movq	%r13, %rdi
	call	get_vec_start
	testq	%rbp, %rbp
	jle	.L109
	movq	%rax, %rdx
	movss	.LC0(%rip), %xmm0
	movl	$0, %ecx
.L106:
	mulss	(%rdx), %xmm0
	mulss	4(%rdx), %xmm0
	mulss	8(%rdx), %xmm0
	mulss	12(%rdx), %xmm0
	mulss	16(%rdx), %xmm0
	mulss	20(%rdx), %xmm0
	mulss	24(%rdx), %xmm0
	mulss	28(%rdx), %xmm0
	mulss	32(%rdx), %xmm0
	mulss	36(%rdx), %xmm0
	addq	$10, %rcx
	addq	$40, %rdx
	cmpq	%rcx, %rbp
	jg	.L106
	jmp	.L105
.L109:
	movss	.LC0(%rip), %xmm0
	movl	$0, %ecx
.L105:
	cmpq	%rcx, %rbx
	jle	.L107
.L108:
	mulss	(%rax,%rcx,4), %xmm0
	addq	$1, %rcx
	cmpq	%rbx, %rcx
	jne	.L108
.L107:
	movss	%xmm0, (%r12)
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	ret

unroll16a_combine:
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$8, %rsp
	movq	%rdi, %r13
	movq	%rsi, %r12
	call	vec_length
	movq	%rax, %rbx
	leaq	-15(%rax), %rbp
	movq	%r13, %rdi
	call	get_vec_start
	testq	%rbp, %rbp
	jle	.L116
	movq	%rax, %rdx
	movss	.LC0(%rip), %xmm0
	movl	$0, %ecx
.L113:
	mulss	(%rdx), %xmm0
	mulss	4(%rdx), %xmm0
	mulss	8(%rdx), %xmm0
	mulss	12(%rdx), %xmm0
	mulss	16(%rdx), %xmm0
	mulss	20(%rdx), %xmm0
	mulss	24(%rdx), %xmm0
	mulss	28(%rdx), %xmm0
	mulss	32(%rdx), %xmm0
	mulss	36(%rdx), %xmm0
	mulss	40(%rdx), %xmm0
	mulss	44(%rdx), %xmm0
	mulss	48(%rdx), %xmm0
	mulss	52(%rdx), %xmm0
	mulss	56(%rdx), %xmm0
	mulss	60(%rdx), %xmm0
	addq	$16, %rcx
	addq	$64, %rdx
	cmpq	%rcx, %rbp
	jg	.L113
	jmp	.L112
.L116:
	movss	.LC0(%rip), %xmm0
	movl	$0, %ecx
.L112:
	cmpq	%rcx, %rbx
	jle	.L114
.L115:
	mulss	(%rax,%rcx,4), %xmm0
	addq	$1, %rcx
	cmpq	%rbx, %rcx
	jne	.L115
.L114:
	movss	%xmm0, (%r12)
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	ret

unroll2_combine:
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	movq	%rdi, %r12
	movq	%rsi, %rbp
	call	vec_length
	movq	%rax, %rbx
	movq	%r12, %rdi
	call	get_vec_start
	movq	%rax, %rcx
	movq	%rbx, %rax
	shrq	$63, %rax
	leaq	(%rbx,%rax), %rsi
	andl	$1, %esi
	subq	%rax, %rsi
	movslq	%esi, %rsi
	subq	%rsi, %rbx
	leaq	(%rcx,%rbx,4), %rax
	cmpq	%rax, %rcx
	jnb	.L124
	movq	%rcx, %rdx
	movss	.LC0(%rip), %xmm0
.L121:
	mulss	(%rdx), %xmm0
	mulss	4(%rdx), %xmm0
	addq	$8, %rdx
	cmpq	%rdx, %rax
	ja	.L121
	movq	%rcx, %rdx
	notq	%rdx
	addq	%rax, %rdx
	shrq	$3, %rdx
	leaq	8(%rcx,%rdx,8), %rcx
	jmp	.L119
.L124:
	movss	.LC0(%rip), %xmm0
.L119:
	leaq	(%rax,%rsi,4), %rax
	cmpq	%rcx, %rax
	jbe	.L122
.L123:
	mulss	(%rcx), %xmm0
	addq	$4, %rcx
	cmpq	%rcx, %rax
	ja	.L123
.L122:
	movss	%xmm0, 0(%rbp)
	popq	%rbx
	popq	%rbp
	popq	%r12
	ret

unroll3_combine:
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	movq	%rdi, %rbx
	movq	%rsi, %rbp
	call	vec_length
	movq	%rax, %r12
	movq	%rbx, %rdi
	call	get_vec_start
	movq	%rax, %rdx
	leaq	-8(%rax,%r12,4), %rax
	cmpq	%rax, %rdx
	jnb	.L131
	movss	.LC0(%rip), %xmm0
.L128:
	mulss	(%rdx), %xmm0
	mulss	4(%rdx), %xmm0
	mulss	8(%rdx), %xmm0
	addq	$12, %rdx
	cmpq	%rdx, %rax
	ja	.L128
	jmp	.L127
.L131:
	movss	.LC0(%rip), %xmm0
.L127:
	addq	$8, %rax
	cmpq	%rdx, %rax
	jbe	.L129
.L130:
	mulss	(%rdx), %xmm0
	addq	$4, %rdx
	cmpq	%rdx, %rax
	ja	.L130
.L129:
	movss	%xmm0, 0(%rbp)
	popq	%rbx
	popq	%rbp
	popq	%r12
	ret

unroll4_combine:
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	movq	%rdi, %rbx
	movq	%rsi, %rbp
	call	vec_length
	movq	%rax, %r12
	movq	%rbx, %rdi
	call	get_vec_start
	movq	%rax, %rcx
	leaq	-12(%rax,%r12,4), %rax
	cmpq	%rax, %rcx
	jnb	.L139
	movq	%rcx, %rdx
	movss	.LC0(%rip), %xmm0
.L136:
	mulss	(%rdx), %xmm0
	mulss	4(%rdx), %xmm0
	mulss	8(%rdx), %xmm0
	mulss	12(%rdx), %xmm0
	addq	$16, %rdx
	cmpq	%rdx, %rax
	ja	.L136
	movq	%rcx, %rdx
	notq	%rdx
	addq	%rax, %rdx
	andq	$-16, %rdx
	leaq	16(%rcx,%rdx), %rcx
	jmp	.L134
.L139:
	movss	.LC0(%rip), %xmm0
.L134:
	addq	$12, %rax
	cmpq	%rcx, %rax
	jbe	.L137
.L138:
	mulss	(%rcx), %xmm0
	addq	$4, %rcx
	cmpq	%rcx, %rax
	ja	.L138
.L137:
	movss	%xmm0, 0(%rbp)
	popq	%rbx
	popq	%rbp
	popq	%r12
	ret

unroll8_combine:
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	movq	%rdi, %r12
	movq	%rsi, %rbp
	call	vec_length
	movq	%rax, %rbx
	movq	%r12, %rdi
	call	get_vec_start
	movq	%rax, %rcx
	movq	%rbx, %rax
	sarq	$63, %rax
	shrq	$61, %rax
	leaq	(%rbx,%rax), %rsi
	andl	$7, %esi
	subq	%rax, %rsi
	movslq	%esi, %rsi
	subq	%rsi, %rbx
	leaq	(%rcx,%rbx,4), %rax
	cmpq	%rax, %rcx
	jnb	.L147
	movq	%rcx, %rdx
	movss	.LC0(%rip), %xmm0
.L144:
	mulss	(%rdx), %xmm0
	mulss	4(%rdx), %xmm0
	mulss	8(%rdx), %xmm0
	mulss	12(%rdx), %xmm0
	mulss	16(%rdx), %xmm0
	mulss	20(%rdx), %xmm0
	mulss	24(%rdx), %xmm0
	mulss	28(%rdx), %xmm0
	addq	$32, %rdx
	cmpq	%rdx, %rax
	ja	.L144
	movq	%rcx, %rdx
	notq	%rdx
	addq	%rax, %rdx
	andq	$-32, %rdx
	leaq	32(%rcx,%rdx), %rcx
	jmp	.L142
.L147:
	movss	.LC0(%rip), %xmm0
.L142:
	leaq	(%rax,%rsi,4), %rax
	cmpq	%rcx, %rax
	jbe	.L145
.L146:
	mulss	(%rcx), %xmm0
	addq	$4, %rcx
	cmpq	%rcx, %rax
	ja	.L146
.L145:
	movss	%xmm0, 0(%rbp)
	popq	%rbx
	popq	%rbp
	popq	%r12
	ret

unroll16_combine:
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	movq	%rdi, %r12
	movq	%rsi, %rbp
	call	vec_length
	movq	%rax, %rbx
	movq	%r12, %rdi
	call	get_vec_start
	movq	%rax, %rcx
	movq	%rbx, %rax
	sarq	$63, %rax
	shrq	$60, %rax
	leaq	(%rbx,%rax), %rsi
	andl	$15, %esi
	subq	%rax, %rsi
	movslq	%esi, %rsi
	subq	%rsi, %rbx
	leaq	(%rcx,%rbx,4), %rax
	cmpq	%rax, %rcx
	jnb	.L155
	movq	%rcx, %rdx
	movss	.LC0(%rip), %xmm0
.L152:
	mulss	(%rdx), %xmm0
	mulss	4(%rdx), %xmm0
	mulss	8(%rdx), %xmm0
	mulss	12(%rdx), %xmm0
	mulss	16(%rdx), %xmm0
	mulss	20(%rdx), %xmm0
	mulss	24(%rdx), %xmm0
	mulss	28(%rdx), %xmm0
	mulss	32(%rdx), %xmm0
	mulss	36(%rdx), %xmm0
	mulss	40(%rdx), %xmm0
	mulss	44(%rdx), %xmm0
	mulss	48(%rdx), %xmm0
	mulss	52(%rdx), %xmm0
	mulss	56(%rdx), %xmm0
	mulss	60(%rdx), %xmm0
	addq	$64, %rdx
	cmpq	%rdx, %rax
	ja	.L152
	movq	%rcx, %rdx
	notq	%rdx
	addq	%rax, %rdx
	andq	$-64, %rdx
	leaq	64(%rcx,%rdx), %rcx
	jmp	.L150
.L155:
	movss	.LC0(%rip), %xmm0
.L150:
	leaq	(%rax,%rsi,4), %rax
	cmpq	%rcx, %rax
	jbe	.L153
.L154:
	mulss	(%rcx), %xmm0
	addq	$4, %rcx
	cmpq	%rcx, %rax
	ja	.L154
.L153:
	movss	%xmm0, 0(%rbp)
	popq	%rbx
	popq	%rbp
	popq	%r12
	ret

combine6:
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$8, %rsp
	movq	%rdi, %r12
	movq	%rsi, %r13
	call	vec_length
	movq	%rax, %rbx
	leaq	-1(%rax), %rbp
	movq	%r12, %rdi
	call	get_vec_start
	testq	%rbp, %rbp
	jle	.L162
	movss	.LC0(%rip), %xmm1
	movaps	%xmm1, %xmm0
	movl	$0, %edx
.L159:
	mulss	(%rax,%rdx,4), %xmm0
	mulss	4(%rax,%rdx,4), %xmm1
	addq	$2, %rdx
	cmpq	%rdx, %rbp
	jg	.L159
	jmp	.L158
.L162:
	movss	.LC0(%rip), %xmm1
	movaps	%xmm1, %xmm0
	movl	$0, %edx
.L158:
	cmpq	%rdx, %rbx
	jle	.L160
.L161:
	mulss	(%rax,%rdx,4), %xmm0
	addq	$1, %rdx
	cmpq	%rbx, %rdx
	jne	.L161
.L160:
	mulss	%xmm1, %xmm0
	movss	%xmm0, 0(%r13)
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	ret

unroll4x2a_combine:
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$8, %rsp
	movq	%rdi, %r12
	movq	%rsi, %r13
	call	vec_length
	movq	%rax, %rbx
	leaq	-3(%rax), %rbp
	movq	%r12, %rdi
	call	get_vec_start
	testq	%rbp, %rbp
	jle	.L169
	movss	.LC0(%rip), %xmm1
	movaps	%xmm1, %xmm0
	movl	$0, %edx
.L166:
	mulss	(%rax,%rdx,4), %xmm0
	mulss	4(%rax,%rdx,4), %xmm1
	mulss	8(%rax,%rdx,4), %xmm0
	mulss	12(%rax,%rdx,4), %xmm1
	addq	$4, %rdx
	cmpq	%rdx, %rbp
	jg	.L166
	jmp	.L165
.L169:
	movss	.LC0(%rip), %xmm1
	movaps	%xmm1, %xmm0
	movl	$0, %edx
.L165:
	cmpq	%rdx, %rbx
	jle	.L167
.L168:
	mulss	(%rax,%rdx,4), %xmm0
	addq	$1, %rdx
	cmpq	%rbx, %rdx
	jne	.L168
.L167:
	mulss	%xmm1, %xmm0
	movss	%xmm0, 0(%r13)
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	ret

unroll8x2a_combine:
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$8, %rsp
	movq	%rdi, %r13
	movq	%rsi, %r12
	call	vec_length
	movq	%rax, %rbx
	leaq	-7(%rax), %rbp
	movq	%r13, %rdi
	call	get_vec_start
	testq	%rbp, %rbp
	jle	.L176
	movq	%rax, %rdx
	movss	.LC0(%rip), %xmm1
	movaps	%xmm1, %xmm0
	movl	$0, %ecx
.L173:
	mulss	(%rdx), %xmm0
	mulss	4(%rdx), %xmm1
	mulss	8(%rdx), %xmm0
	mulss	12(%rdx), %xmm1
	mulss	16(%rdx), %xmm0
	mulss	20(%rdx), %xmm1
	mulss	24(%rdx), %xmm0
	mulss	28(%rdx), %xmm1
	addq	$8, %rcx
	addq	$32, %rdx
	cmpq	%rcx, %rbp
	jg	.L173
	jmp	.L172
.L176:
	movss	.LC0(%rip), %xmm1
	movaps	%xmm1, %xmm0
	movl	$0, %ecx
.L172:
	cmpq	%rcx, %rbx
	jle	.L174
.L175:
	mulss	(%rax,%rcx,4), %xmm0
	addq	$1, %rcx
	cmpq	%rbx, %rcx
	jne	.L175
.L174:
	mulss	%xmm1, %xmm0
	movss	%xmm0, (%r12)
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	ret

unroll3x3a_combine:
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$8, %rsp
	movq	%rdi, %r12
	movq	%rsi, %r13
	call	vec_length
	movq	%rax, %rbx
	leaq	-2(%rax), %rbp
	movq	%r12, %rdi
	call	get_vec_start
	testq	%rbp, %rbp
	jle	.L183
	movss	.LC0(%rip), %xmm1
	movaps	%xmm1, %xmm2
	movaps	%xmm1, %xmm0
	movl	$0, %edx
.L180:
	mulss	(%rax,%rdx,4), %xmm0
	mulss	4(%rax,%rdx,4), %xmm2
	mulss	8(%rax,%rdx,4), %xmm1
	addq	$3, %rdx
	cmpq	%rdx, %rbp
	jg	.L180
	jmp	.L179
.L183:
	movss	.LC0(%rip), %xmm1
	movaps	%xmm1, %xmm2
	movaps	%xmm1, %xmm0
	movl	$0, %edx
.L179:
	cmpq	%rdx, %rbx
	jle	.L181
.L182:
	mulss	(%rax,%rdx,4), %xmm0
	addq	$1, %rdx
	cmpq	%rbx, %rdx
	jne	.L182
.L181:
	mulss	%xmm2, %xmm0
	mulss	%xmm0, %xmm1
	movss	%xmm1, 0(%r13)
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	ret

unroll4x4a_combine:
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$8, %rsp
	movq	%rdi, %r12
	movq	%rsi, %r13
	call	vec_length
	movq	%rax, %rbx
	leaq	-3(%rax), %rbp
	movq	%r12, %rdi
	call	get_vec_start
	testq	%rbp, %rbp
	jle	.L190
	movss	.LC0(%rip), %xmm1
	movaps	%xmm1, %xmm2
	movaps	%xmm1, %xmm3
	movaps	%xmm1, %xmm0
	movl	$0, %edx
.L187:
	mulss	(%rax,%rdx,4), %xmm0
	mulss	4(%rax,%rdx,4), %xmm3
	mulss	8(%rax,%rdx,4), %xmm2
	mulss	12(%rax,%rdx,4), %xmm1
	addq	$4, %rdx
	cmpq	%rdx, %rbp
	jg	.L187
	jmp	.L186
.L190:
	movss	.LC0(%rip), %xmm1
	movaps	%xmm1, %xmm2
	movaps	%xmm1, %xmm3
	movaps	%xmm1, %xmm0
	movl	$0, %edx
.L186:
	cmpq	%rdx, %rbx
	jle	.L188
.L189:
	mulss	(%rax,%rdx,4), %xmm0
	addq	$1, %rdx
	cmpq	%rbx, %rdx
	jne	.L189
.L188:
	mulss	%xmm3, %xmm0
	mulss	%xmm2, %xmm1
	mulss	%xmm1, %xmm0
	movss	%xmm0, 0(%r13)
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	ret

unroll8x4a_combine:
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$8, %rsp
	movq	%rdi, %r13
	movq	%rsi, %r12
	call	vec_length
	movq	%rax, %rbx
	leaq	-7(%rax), %rbp
	movq	%r13, %rdi
	call	get_vec_start
	testq	%rbp, %rbp
	jle	.L197
	movq	%rax, %rdx
	movss	.LC0(%rip), %xmm1
	movaps	%xmm1, %xmm2
	movaps	%xmm1, %xmm3
	movaps	%xmm1, %xmm0
	movl	$0, %ecx
.L194:
	mulss	(%rdx), %xmm0
	mulss	4(%rdx), %xmm3
	mulss	8(%rdx), %xmm2
	mulss	12(%rdx), %xmm1
	mulss	16(%rdx), %xmm0
	mulss	20(%rdx), %xmm3
	mulss	24(%rdx), %xmm2
	mulss	28(%rdx), %xmm1
	addq	$8, %rcx
	addq	$32, %rdx
	cmpq	%rcx, %rbp
	jg	.L194
	jmp	.L193
.L197:
	movss	.LC0(%rip), %xmm1
	movaps	%xmm1, %xmm2
	movaps	%xmm1, %xmm3
	movaps	%xmm1, %xmm0
	movl	$0, %ecx
.L193:
	cmpq	%rcx, %rbx
	jle	.L195
.L196:
	mulss	(%rax,%rcx,4), %xmm0
	addq	$1, %rcx
	cmpq	%rbx, %rcx
	jne	.L196
.L195:
	mulss	%xmm3, %xmm0
	mulss	%xmm0, %xmm2
	mulss	%xmm2, %xmm1
	movss	%xmm1, (%r12)
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	ret

unroll12x6a_combine:
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$8, %rsp
	movq	%rdi, %r13
	movq	%rsi, %r12
	call	vec_length
	movq	%rax, %rbx
	leaq	-11(%rax), %rbp
	movq	%r13, %rdi
	call	get_vec_start
	testq	%rbp, %rbp
	jle	.L204
	movq	%rax, %rdx
	movss	.LC0(%rip), %xmm1
	movaps	%xmm1, %xmm2
	movaps	%xmm1, %xmm3
	movaps	%xmm1, %xmm4
	movaps	%xmm1, %xmm5
	movaps	%xmm1, %xmm0
	movl	$0, %ecx
.L201:
	mulss	(%rdx), %xmm0
	mulss	24(%rdx), %xmm0
	mulss	4(%rdx), %xmm5
	mulss	28(%rdx), %xmm5
	mulss	8(%rdx), %xmm4
	mulss	32(%rdx), %xmm4
	mulss	12(%rdx), %xmm3
	mulss	36(%rdx), %xmm3
	mulss	16(%rdx), %xmm2
	mulss	40(%rdx), %xmm2
	mulss	20(%rdx), %xmm1
	mulss	44(%rdx), %xmm1
	addq	$12, %rcx
	addq	$48, %rdx
	cmpq	%rcx, %rbp
	jg	.L201
	jmp	.L200
.L204:
	movss	.LC0(%rip), %xmm1
	movaps	%xmm1, %xmm2
	movaps	%xmm1, %xmm3
	movaps	%xmm1, %xmm4
	movaps	%xmm1, %xmm5
	movaps	%xmm1, %xmm0
	movl	$0, %ecx
.L200:
	cmpq	%rcx, %rbx
	jle	.L202
.L203:
	mulss	(%rax,%rcx,4), %xmm0
	addq	$1, %rcx
	cmpq	%rbx, %rcx
	jne	.L203
.L202:
	mulss	%xmm5, %xmm0
	mulss	%xmm4, %xmm3
	mulss	%xmm3, %xmm0
	mulss	%xmm1, %xmm2
	movaps	%xmm0, %xmm1
	mulss	%xmm2, %xmm1
	movss	%xmm1, (%r12)
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	ret

unroll12x12a_combine:
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$8, %rsp
	movq	%rdi, %r13
	movq	%rsi, %r12
	call	vec_length
	movq	%rax, %rbx
	leaq	-11(%rax), %rbp
	movq	%r13, %rdi
	call	get_vec_start
	testq	%rbp, %rbp
	jle	.L211
	movq	%rax, %rdx
	movss	.LC0(%rip), %xmm1
	movaps	%xmm1, %xmm2
	movaps	%xmm1, %xmm3
	movaps	%xmm1, %xmm4
	movaps	%xmm1, %xmm5
	movaps	%xmm1, %xmm6
	movaps	%xmm1, %xmm7
	movaps	%xmm1, %xmm8
	movaps	%xmm1, %xmm9
	movaps	%xmm1, %xmm10
	movaps	%xmm1, %xmm11
	movaps	%xmm1, %xmm0
	movl	$0, %ecx
.L208:
	mulss	(%rdx), %xmm0
	mulss	24(%rdx), %xmm6
	mulss	4(%rdx), %xmm11
	mulss	28(%rdx), %xmm5
	mulss	8(%rdx), %xmm10
	mulss	32(%rdx), %xmm4
	mulss	12(%rdx), %xmm9
	mulss	36(%rdx), %xmm3
	mulss	16(%rdx), %xmm8
	mulss	40(%rdx), %xmm2
	mulss	20(%rdx), %xmm7
	mulss	44(%rdx), %xmm1
	addq	$12, %rcx
	addq	$48, %rdx
	cmpq	%rcx, %rbp
	jg	.L208
	jmp	.L207
.L211:
	movss	.LC0(%rip), %xmm1
	movaps	%xmm1, %xmm2
	movaps	%xmm1, %xmm3
	movaps	%xmm1, %xmm4
	movaps	%xmm1, %xmm5
	movaps	%xmm1, %xmm6
	movaps	%xmm1, %xmm7
	movaps	%xmm1, %xmm8
	movaps	%xmm1, %xmm9
	movaps	%xmm1, %xmm10
	movaps	%xmm1, %xmm11
	movaps	%xmm1, %xmm0
	movl	$0, %ecx
.L207:
	cmpq	%rcx, %rbx
	jle	.L209
.L210:
	mulss	(%rax,%rcx,4), %xmm0
	addq	$1, %rcx
	cmpq	%rbx, %rcx
	jne	.L210
.L209:
	mulss	%xmm11, %xmm0
	mulss	%xmm10, %xmm9
	mulss	%xmm9, %xmm0
	mulss	%xmm7, %xmm8
	movaps	%xmm0, %xmm7
	mulss	%xmm8, %xmm7
	mulss	%xmm5, %xmm6
	movaps	%xmm7, %xmm5
	mulss	%xmm6, %xmm5
	mulss	%xmm3, %xmm4
	movaps	%xmm5, %xmm3
	mulss	%xmm4, %xmm3
	mulss	%xmm1, %xmm2
	mulss	%xmm2, %xmm3
	movss	%xmm3, (%r12)
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	ret

unroll16x16a_combine:
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$8, %rsp
	movq	%rdi, %r13
	movq	%rsi, %r12
	call	vec_length
	movq	%rax, %rbx
	leaq	-15(%rax), %rbp
	movq	%r13, %rdi
	call	get_vec_start
	testq	%rbp, %rbp
	jle	.L218
	movq	%rax, %rdx
	movss	.LC0(%rip), %xmm1
	movaps	%xmm1, %xmm2
	movaps	%xmm1, %xmm3
	movaps	%xmm1, %xmm4
	movaps	%xmm1, %xmm5
	movaps	%xmm1, %xmm6
	movaps	%xmm1, %xmm7
	movaps	%xmm1, %xmm8
	movaps	%xmm1, %xmm9
	movaps	%xmm1, %xmm10
	movaps	%xmm1, %xmm11
	movaps	%xmm1, %xmm12
	movaps	%xmm1, %xmm13
	movaps	%xmm1, %xmm14
	movaps	%xmm1, %xmm15
	movaps	%xmm1, %xmm0
	movl	$0, %ecx
.L215:
	mulss	(%rdx), %xmm0
	mulss	24(%rdx), %xmm10
	mulss	4(%rdx), %xmm15
	mulss	28(%rdx), %xmm9
	mulss	8(%rdx), %xmm14
	mulss	32(%rdx), %xmm8
	mulss	12(%rdx), %xmm13
	mulss	36(%rdx), %xmm7
	mulss	16(%rdx), %xmm12
	mulss	40(%rdx), %xmm6
	mulss	20(%rdx), %xmm11
	mulss	44(%rdx), %xmm5
	mulss	48(%rdx), %xmm4
	mulss	52(%rdx), %xmm3
	mulss	56(%rdx), %xmm2
	mulss	60(%rdx), %xmm1
	addq	$16, %rcx
	addq	$64, %rdx
	cmpq	%rcx, %rbp
	jg	.L215
	jmp	.L214
.L218:
	movss	.LC0(%rip), %xmm1
	movaps	%xmm1, %xmm2
	movaps	%xmm1, %xmm3
	movaps	%xmm1, %xmm4
	movaps	%xmm1, %xmm5
	movaps	%xmm1, %xmm6
	movaps	%xmm1, %xmm7
	movaps	%xmm1, %xmm8
	movaps	%xmm1, %xmm9
	movaps	%xmm1, %xmm10
	movaps	%xmm1, %xmm11
	movaps	%xmm1, %xmm12
	movaps	%xmm1, %xmm13
	movaps	%xmm1, %xmm14
	movaps	%xmm1, %xmm15
	movaps	%xmm1, %xmm0
	movl	$0, %ecx
.L214:
	cmpq	%rcx, %rbx
	jle	.L216
.L217:
	mulss	(%rax,%rcx,4), %xmm0
	addq	$1, %rcx
	cmpq	%rbx, %rcx
	jne	.L217
.L216:
	mulss	%xmm15, %xmm0
	mulss	%xmm14, %xmm13
	mulss	%xmm13, %xmm0
	mulss	%xmm11, %xmm12
	mulss	%xmm12, %xmm0
	mulss	%xmm10, %xmm9
	mulss	%xmm8, %xmm7
	mulss	%xmm9, %xmm7
	mulss	%xmm5, %xmm6
	movaps	%xmm7, %xmm5
	mulss	%xmm6, %xmm5
	mulss	%xmm0, %xmm5
	mulss	%xmm4, %xmm3
	mulss	%xmm2, %xmm1
	mulss	%xmm1, %xmm3
	mulss	%xmm3, %xmm5
	movss	%xmm5, (%r12)
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	ret

unroll20x20a_combine:
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$40, %rsp
	movq	%rdi, %r13
	movq	%rsi, %r12
	call	vec_length
	movq	%rax, %rbx
	leaq	-19(%rax), %rbp
	movq	%r13, %rdi
	call	get_vec_start
	testq	%rbp, %rbp
	jle	.L225
	movq	%rax, %rdx
	movss	.LC0(%rip), %xmm1
	movaps	%xmm1, %xmm2
	movaps	%xmm1, %xmm3
	movaps	%xmm1, %xmm4
	movaps	%xmm1, %xmm5
	movaps	%xmm1, %xmm6
	movaps	%xmm1, %xmm7
	movaps	%xmm1, %xmm8
	movaps	%xmm1, %xmm9
	movaps	%xmm1, %xmm10
	movss	%xmm1, 24(%rsp)
	movss	%xmm1, 20(%rsp)
	movss	%xmm1, 16(%rsp)
	movss	%xmm1, 12(%rsp)
	movaps	%xmm1, %xmm11
	movaps	%xmm1, %xmm12
	movaps	%xmm1, %xmm13
	movaps	%xmm1, %xmm14
	movaps	%xmm1, %xmm15
	movl	$0, %ecx
	movss	%xmm1, 28(%rsp)
.L222:
	movss	28(%rsp), %xmm0
	mulss	(%rdx), %xmm0
	movss	%xmm0, 28(%rsp)
	movss	12(%rsp), %xmm0
	mulss	24(%rdx), %xmm0
	movss	%xmm0, 12(%rsp)
	mulss	4(%rdx), %xmm15
	movss	16(%rsp), %xmm0
	mulss	28(%rdx), %xmm0
	movss	%xmm0, 16(%rsp)
	mulss	8(%rdx), %xmm14
	movss	20(%rsp), %xmm0
	mulss	32(%rdx), %xmm0
	movss	%xmm0, 20(%rsp)
	mulss	12(%rdx), %xmm13
	movss	24(%rsp), %xmm0
	mulss	36(%rdx), %xmm0
	movss	%xmm0, 24(%rsp)
	mulss	16(%rdx), %xmm12
	mulss	40(%rdx), %xmm10
	mulss	20(%rdx), %xmm11
	mulss	44(%rdx), %xmm9
	mulss	48(%rdx), %xmm8
	mulss	52(%rdx), %xmm7
	mulss	56(%rdx), %xmm6
	mulss	60(%rdx), %xmm5
	mulss	64(%rdx), %xmm4
	mulss	68(%rdx), %xmm3
	mulss	72(%rdx), %xmm2
	mulss	76(%rdx), %xmm1
	addq	$20, %rcx
	addq	$80, %rdx
	cmpq	%rcx, %rbp
	jg	.L222
	movss	28(%rsp), %xmm0
	jmp	.L221
.L225:
	movss	.LC0(%rip), %xmm1
	movaps	%xmm1, %xmm2
	movaps	%xmm1, %xmm3
	movaps	%xmm1, %xmm4
	movaps	%xmm1, %xmm5
	movaps	%xmm1, %xmm6
	movaps	%xmm1, %xmm7
	movaps	%xmm1, %xmm8
	movaps	%xmm1, %xmm9
	movaps	%xmm1, %xmm10
	movss	%xmm1, 24(%rsp)
	movss	%xmm1, 20(%rsp)
	movss	%xmm1, 16(%rsp)
	movss	%xmm1, 12(%rsp)
	movaps	%xmm1, %xmm11
	movaps	%xmm1, %xmm12
	movaps	%xmm1, %xmm13
	movaps	%xmm1, %xmm14
	movaps	%xmm1, %xmm15
	movaps	%xmm1, %xmm0
	movl	$0, %ecx
.L221:
	cmpq	%rcx, %rbx
	jle	.L223
.L224:
	mulss	(%rax,%rcx,4), %xmm0
	addq	$1, %rcx
	cmpq	%rbx, %rcx
	jne	.L224
.L223:
	mulss	%xmm15, %xmm0
	mulss	%xmm14, %xmm13
	mulss	%xmm13, %xmm0
	mulss	%xmm12, %xmm11
	mulss	%xmm11, %xmm0
	movss	12(%rsp), %xmm12
	mulss	16(%rsp), %xmm12
	movss	20(%rsp), %xmm11
	mulss	24(%rsp), %xmm11
	mulss	%xmm12, %xmm11
	mulss	%xmm9, %xmm10
	movaps	%xmm11, %xmm9
	mulss	%xmm10, %xmm9
	mulss	%xmm9, %xmm0
	mulss	%xmm8, %xmm7
	mulss	%xmm6, %xmm5
	mulss	%xmm7, %xmm5
	mulss	%xmm4, %xmm3
	mulss	%xmm2, %xmm1
	mulss	%xmm3, %xmm1
	mulss	%xmm5, %xmm1
	mulss	%xmm1, %xmm0
	movss	%xmm0, (%r12)
	addq	$40, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	ret

unroll5x5a_combine:
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$8, %rsp
	movq	%rdi, %r13
	movq	%rsi, %r12
	call	vec_length
	movq	%rax, %rbx
	leaq	-4(%rax), %rbp
	movq	%r13, %rdi
	call	get_vec_start
	testq	%rbp, %rbp
	jle	.L232
	movq	%rax, %rcx
	movss	.LC0(%rip), %xmm1
	movaps	%xmm1, %xmm2
	movaps	%xmm1, %xmm3
	movaps	%xmm1, %xmm4
	movaps	%xmm1, %xmm0
	movl	$0, %edx
.L229:
	mulss	(%rcx), %xmm0
	mulss	4(%rcx), %xmm4
	mulss	8(%rcx), %xmm3
	mulss	12(%rcx), %xmm2
	mulss	16(%rcx), %xmm1
	addq	$5, %rdx
	addq	$20, %rcx
	cmpq	%rdx, %rbp
	jg	.L229
	jmp	.L228
.L232:
	movss	.LC0(%rip), %xmm1
	movaps	%xmm1, %xmm2
	movaps	%xmm1, %xmm3
	movaps	%xmm1, %xmm4
	movaps	%xmm1, %xmm0
	movl	$0, %edx
.L228:
	cmpq	%rdx, %rbx
	jle	.L230
.L231:
	mulss	(%rax,%rdx,4), %xmm0
	addq	$1, %rdx
	cmpq	%rbx, %rdx
	jne	.L231
.L230:
	mulss	%xmm4, %xmm0
	mulss	%xmm3, %xmm2
	mulss	%xmm2, %xmm1
	mulss	%xmm0, %xmm1
	movss	%xmm1, (%r12)
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	ret

unroll6x6a_combine:
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$8, %rsp
	movq	%rdi, %r13
	movq	%rsi, %r12
	call	vec_length
	movq	%rax, %rbx
	leaq	-5(%rax), %rbp
	movq	%r13, %rdi
	call	get_vec_start
	testq	%rbp, %rbp
	jle	.L239
	movq	%rax, %rdx
	movss	.LC0(%rip), %xmm1
	movaps	%xmm1, %xmm2
	movaps	%xmm1, %xmm3
	movaps	%xmm1, %xmm4
	movaps	%xmm1, %xmm5
	movaps	%xmm1, %xmm0
	movl	$0, %ecx
.L236:
	mulss	(%rdx), %xmm0
	mulss	4(%rdx), %xmm5
	mulss	8(%rdx), %xmm4
	mulss	12(%rdx), %xmm3
	mulss	16(%rdx), %xmm2
	mulss	20(%rdx), %xmm1
	addq	$6, %rcx
	addq	$24, %rdx
	cmpq	%rcx, %rbp
	jg	.L236
	jmp	.L235
.L239:
	movss	.LC0(%rip), %xmm1
	movaps	%xmm1, %xmm2
	movaps	%xmm1, %xmm3
	movaps	%xmm1, %xmm4
	movaps	%xmm1, %xmm5
	movaps	%xmm1, %xmm0
	movl	$0, %ecx
.L235:
	cmpq	%rcx, %rbx
	jle	.L237
.L238:
	mulss	(%rax,%rcx,4), %xmm0
	addq	$1, %rcx
	cmpq	%rbx, %rcx
	jne	.L238
.L237:
	mulss	%xmm5, %xmm0
	mulss	%xmm4, %xmm3
	mulss	%xmm3, %xmm0
	mulss	%xmm1, %xmm2
	movaps	%xmm0, %xmm1
	mulss	%xmm2, %xmm1
	movss	%xmm1, (%r12)
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	ret

unroll7x7a_combine:
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$8, %rsp
	movq	%rdi, %r13
	movq	%rsi, %r12
	call	vec_length
	movq	%rax, %rbx
	leaq	-6(%rax), %rbp
	movq	%r13, %rdi
	call	get_vec_start
	testq	%rbp, %rbp
	jle	.L246
	movq	%rax, %rdx
	movss	.LC0(%rip), %xmm1
	movaps	%xmm1, %xmm2
	movaps	%xmm1, %xmm3
	movaps	%xmm1, %xmm4
	movaps	%xmm1, %xmm5
	movaps	%xmm1, %xmm6
	movaps	%xmm1, %xmm0
	movl	$0, %ecx
.L243:
	mulss	(%rdx), %xmm0
	mulss	4(%rdx), %xmm6
	mulss	8(%rdx), %xmm5
	mulss	12(%rdx), %xmm4
	mulss	16(%rdx), %xmm3
	mulss	20(%rdx), %xmm2
	mulss	24(%rdx), %xmm1
	addq	$7, %rcx
	addq	$28, %rdx
	cmpq	%rcx, %rbp
	jg	.L243
	jmp	.L242
.L246:
	movss	.LC0(%rip), %xmm1
	movaps	%xmm1, %xmm2
	movaps	%xmm1, %xmm3
	movaps	%xmm1, %xmm4
	movaps	%xmm1, %xmm5
	movaps	%xmm1, %xmm6
	movaps	%xmm1, %xmm0
	movl	$0, %ecx
.L242:
	cmpq	%rcx, %rbx
	jle	.L244
.L245:
	mulss	(%rax,%rcx,4), %xmm0
	addq	$1, %rcx
	cmpq	%rbx, %rcx
	jne	.L245
.L244:
	mulss	%xmm6, %xmm0
	mulss	%xmm5, %xmm4
	mulss	%xmm4, %xmm0
	mulss	%xmm3, %xmm2
	mulss	%xmm2, %xmm1
	mulss	%xmm0, %xmm1
	movss	%xmm1, (%r12)
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	ret

unroll8x8a_combine:
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$8, %rsp
	movq	%rdi, %r13
	movq	%rsi, %r12
	call	vec_length
	movq	%rax, %rbx
	leaq	-7(%rax), %rbp
	movq	%r13, %rdi
	call	get_vec_start
	testq	%rbp, %rbp
	jle	.L253
	movq	%rax, %rdx
	movss	.LC0(%rip), %xmm1
	movaps	%xmm1, %xmm2
	movaps	%xmm1, %xmm3
	movaps	%xmm1, %xmm4
	movaps	%xmm1, %xmm5
	movaps	%xmm1, %xmm6
	movaps	%xmm1, %xmm7
	movaps	%xmm1, %xmm0
	movl	$0, %ecx
.L250:
	mulss	(%rdx), %xmm0
	mulss	4(%rdx), %xmm7
	mulss	8(%rdx), %xmm6
	mulss	12(%rdx), %xmm5
	mulss	16(%rdx), %xmm4
	mulss	20(%rdx), %xmm3
	mulss	24(%rdx), %xmm2
	mulss	28(%rdx), %xmm1
	addq	$8, %rcx
	addq	$32, %rdx
	cmpq	%rcx, %rbp
	jg	.L250
	jmp	.L249
.L253:
	movss	.LC0(%rip), %xmm1
	movaps	%xmm1, %xmm2
	movaps	%xmm1, %xmm3
	movaps	%xmm1, %xmm4
	movaps	%xmm1, %xmm5
	movaps	%xmm1, %xmm6
	movaps	%xmm1, %xmm7
	movaps	%xmm1, %xmm0
	movl	$0, %ecx
.L249:
	cmpq	%rcx, %rbx
	jle	.L251
.L252:
	mulss	(%rax,%rcx,4), %xmm0
	addq	$1, %rcx
	cmpq	%rbx, %rcx
	jne	.L252
.L251:
	mulss	%xmm7, %xmm0
	mulss	%xmm6, %xmm5
	mulss	%xmm5, %xmm0
	mulss	%xmm4, %xmm3
	mulss	%xmm2, %xmm1
	mulss	%xmm3, %xmm1
	mulss	%xmm1, %xmm0
	movss	%xmm0, (%r12)
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	ret

unroll9x9a_combine:
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$8, %rsp
	movq	%rdi, %r13
	movq	%rsi, %r12
	call	vec_length
	movq	%rax, %rbx
	leaq	-8(%rax), %rbp
	movq	%r13, %rdi
	call	get_vec_start
	testq	%rbp, %rbp
	jle	.L260
	movq	%rax, %rdx
	movss	.LC0(%rip), %xmm1
	movaps	%xmm1, %xmm2
	movaps	%xmm1, %xmm3
	movaps	%xmm1, %xmm4
	movaps	%xmm1, %xmm5
	movaps	%xmm1, %xmm6
	movaps	%xmm1, %xmm7
	movaps	%xmm1, %xmm8
	movaps	%xmm1, %xmm0
	movl	$0, %ecx
.L257:
	mulss	(%rdx), %xmm0
	mulss	4(%rdx), %xmm8
	mulss	8(%rdx), %xmm7
	mulss	12(%rdx), %xmm6
	mulss	16(%rdx), %xmm5
	mulss	20(%rdx), %xmm4
	mulss	24(%rdx), %xmm3
	mulss	28(%rdx), %xmm2
	mulss	32(%rdx), %xmm1
	addq	$9, %rcx
	addq	$36, %rdx
	cmpq	%rcx, %rbp
	jg	.L257
	jmp	.L256
.L260:
	movss	.LC0(%rip), %xmm1
	movaps	%xmm1, %xmm2
	movaps	%xmm1, %xmm3
	movaps	%xmm1, %xmm4
	movaps	%xmm1, %xmm5
	movaps	%xmm1, %xmm6
	movaps	%xmm1, %xmm7
	movaps	%xmm1, %xmm8
	movaps	%xmm1, %xmm0
	movl	$0, %ecx
.L256:
	cmpq	%rcx, %rbx
	jle	.L258
.L259:
	mulss	(%rax,%rcx,4), %xmm0
	addq	$1, %rcx
	cmpq	%rbx, %rcx
	jne	.L259
.L258:
	mulss	%xmm8, %xmm0
	mulss	%xmm7, %xmm6
	mulss	%xmm6, %xmm0
	mulss	%xmm5, %xmm4
	mulss	%xmm3, %xmm2
	mulss	%xmm4, %xmm2
	mulss	%xmm2, %xmm1
	mulss	%xmm0, %xmm1
	movss	%xmm1, (%r12)
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	ret

unroll10x10a_combine:
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$8, %rsp
	movq	%rdi, %r13
	movq	%rsi, %r12
	call	vec_length
	movq	%rax, %rbx
	leaq	-9(%rax), %rbp
	movq	%r13, %rdi
	call	get_vec_start
	testq	%rbp, %rbp
	jle	.L267
	movq	%rax, %rdx
	movss	.LC0(%rip), %xmm1
	movaps	%xmm1, %xmm2
	movaps	%xmm1, %xmm3
	movaps	%xmm1, %xmm4
	movaps	%xmm1, %xmm5
	movaps	%xmm1, %xmm6
	movaps	%xmm1, %xmm7
	movaps	%xmm1, %xmm8
	movaps	%xmm1, %xmm9
	movaps	%xmm1, %xmm0
	movl	$0, %ecx
.L264:
	mulss	(%rdx), %xmm0
	mulss	4(%rdx), %xmm9
	mulss	8(%rdx), %xmm8
	mulss	12(%rdx), %xmm7
	mulss	16(%rdx), %xmm6
	mulss	20(%rdx), %xmm5
	mulss	24(%rdx), %xmm4
	mulss	28(%rdx), %xmm3
	mulss	32(%rdx), %xmm2
	mulss	36(%rdx), %xmm1
	addq	$10, %rcx
	addq	$40, %rdx
	cmpq	%rcx, %rbp
	jg	.L264
	jmp	.L263
.L267:
	movss	.LC0(%rip), %xmm1
	movaps	%xmm1, %xmm2
	movaps	%xmm1, %xmm3
	movaps	%xmm1, %xmm4
	movaps	%xmm1, %xmm5
	movaps	%xmm1, %xmm6
	movaps	%xmm1, %xmm7
	movaps	%xmm1, %xmm8
	movaps	%xmm1, %xmm9
	movaps	%xmm1, %xmm0
	movl	$0, %ecx
.L263:
	cmpq	%rcx, %rbx
	jle	.L265
.L266:
	mulss	(%rax,%rcx,4), %xmm0
	addq	$1, %rcx
	cmpq	%rbx, %rcx
	jne	.L266
.L265:
	mulss	%xmm9, %xmm0
	mulss	%xmm8, %xmm7
	mulss	%xmm7, %xmm0
	mulss	%xmm6, %xmm5
	mulss	%xmm4, %xmm3
	mulss	%xmm5, %xmm3
	mulss	%xmm3, %xmm0
	mulss	%xmm1, %xmm2
	movaps	%xmm0, %xmm1
	mulss	%xmm2, %xmm1
	movss	%xmm1, (%r12)
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	ret

unrollx2as_combine:
	pushq	%r14
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	movq	%rdi, %r14
	movq	%rsi, %r12
	call	vec_length
	movq	%rax, %rbx
	movq	%rax, %r13
	shrq	$63, %r13
	addq	%rax, %r13
	sarq	%r13
	movq	%r14, %rdi
	call	get_vec_start
	movq	%rax, %rcx
	leaq	(%rax,%r13,4), %rax
	testq	%r13, %r13
	jle	.L274
	movq	%r13, %rbp
	movss	.LC0(%rip), %xmm0
	movaps	%xmm0, %xmm1
	movl	$0, %edx
.L271:
	mulss	(%rcx,%rdx,4), %xmm1
	mulss	(%rax,%rdx,4), %xmm0
	addq	$1, %rdx
	cmpq	%rbp, %rdx
	jne	.L271
	jmp	.L270
.L274:
	movss	.LC0(%rip), %xmm0
	movaps	%xmm0, %xmm1
.L270:
	leaq	(%r13,%r13), %rdx
	cmpq	%rdx, %rbx
	jle	.L272
.L273:
	mulss	(%rcx,%rdx,4), %xmm0
	addq	$1, %rdx
	cmpq	%rbx, %rdx
	jne	.L273
.L272:
	mulss	%xmm1, %xmm0
	movss	%xmm0, (%r12)
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	popq	%r14
	ret

unroll8x2_combine:
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	movq	%rdi, %rbx
	movq	%rsi, %rbp
	call	vec_length
	movq	%rax, %r12
	movq	%rbx, %rdi
	call	get_vec_start
	movq	%rax, %rcx
	leaq	-28(%rax,%r12,4), %rax
	cmpq	%rax, %rcx
	jnb	.L282
	movq	%rcx, %rdx
	movss	.LC0(%rip), %xmm1
	movaps	%xmm1, %xmm0
.L279:
	mulss	(%rdx), %xmm0
	mulss	4(%rdx), %xmm1
	mulss	8(%rdx), %xmm0
	mulss	12(%rdx), %xmm1
	mulss	16(%rdx), %xmm0
	mulss	20(%rdx), %xmm1
	mulss	24(%rdx), %xmm0
	mulss	28(%rdx), %xmm1
	addq	$32, %rdx
	cmpq	%rdx, %rax
	ja	.L279
	movq	%rcx, %rdx
	notq	%rdx
	addq	%rax, %rdx
	andq	$-32, %rdx
	leaq	32(%rcx,%rdx), %rcx
	jmp	.L277
.L282:
	movss	.LC0(%rip), %xmm1
	movaps	%xmm1, %xmm0
.L277:
	addq	$28, %rax
	cmpq	%rcx, %rax
	jbe	.L280
.L281:
	mulss	(%rcx), %xmm0
	addq	$4, %rcx
	cmpq	%rcx, %rax
	ja	.L281
.L280:
	mulss	%xmm1, %xmm0
	movss	%xmm0, 0(%rbp)
	popq	%rbx
	popq	%rbp
	popq	%r12
	ret

unroll9x3_combine:
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	movq	%rdi, %rbx
	movq	%rsi, %rbp
	call	vec_length
	movq	%rax, %r12
	movq	%rbx, %rdi
	call	get_vec_start
	movq	%rax, %rdx
	leaq	-32(%rax,%r12,4), %rax
	cmpq	%rax, %rdx
	jnb	.L289
	movss	.LC0(%rip), %xmm1
	movaps	%xmm1, %xmm2
	movaps	%xmm1, %xmm0
.L286:
	mulss	(%rdx), %xmm0
	mulss	4(%rdx), %xmm2
	mulss	8(%rdx), %xmm1
	mulss	12(%rdx), %xmm0
	mulss	16(%rdx), %xmm2
	mulss	20(%rdx), %xmm1
	mulss	24(%rdx), %xmm0
	mulss	28(%rdx), %xmm2
	mulss	32(%rdx), %xmm1
	addq	$36, %rdx
	cmpq	%rdx, %rax
	ja	.L286
	jmp	.L285
.L289:
	movss	.LC0(%rip), %xmm1
	movaps	%xmm1, %xmm2
	movaps	%xmm1, %xmm0
.L285:
	addq	$32, %rax
	cmpq	%rdx, %rax
	jbe	.L287
.L288:
	mulss	(%rdx), %xmm0
	addq	$4, %rdx
	cmpq	%rdx, %rax
	ja	.L288
.L287:
	mulss	%xmm2, %xmm0
	mulss	%xmm0, %xmm1
	movss	%xmm1, 0(%rbp)
	popq	%rbx
	popq	%rbp
	popq	%r12
	ret

unroll8x4_combine:
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	movq	%rdi, %rbx
	movq	%rsi, %rbp
	call	vec_length
	movq	%rax, %r12
	movq	%rbx, %rdi
	call	get_vec_start
	movq	%rax, %rcx
	leaq	-28(%rax,%r12,4), %rax
	cmpq	%rax, %rcx
	jnb	.L297
	movq	%rcx, %rdx
	movss	.LC0(%rip), %xmm0
	movaps	%xmm0, %xmm1
	movaps	%xmm0, %xmm2
	movaps	%xmm0, %xmm3
.L294:
	mulss	(%rdx), %xmm3
	mulss	4(%rdx), %xmm2
	mulss	8(%rdx), %xmm1
	mulss	12(%rdx), %xmm0
	mulss	16(%rdx), %xmm3
	mulss	20(%rdx), %xmm2
	mulss	24(%rdx), %xmm1
	mulss	28(%rdx), %xmm0
	addq	$32, %rdx
	cmpq	%rdx, %rax
	ja	.L294
	movq	%rcx, %rdx
	notq	%rdx
	addq	%rax, %rdx
	andq	$-32, %rdx
	leaq	32(%rcx,%rdx), %rcx
	jmp	.L292
.L297:
	movss	.LC0(%rip), %xmm0
	movaps	%xmm0, %xmm1
	movaps	%xmm0, %xmm2
	movaps	%xmm0, %xmm3
.L292:
	addq	$28, %rax
	cmpq	%rcx, %rax
	jbe	.L295
.L296:
	mulss	(%rcx), %xmm0
	addq	$4, %rcx
	cmpq	%rcx, %rax
	ja	.L296
.L295:
	mulss	%xmm3, %xmm0
	mulss	%xmm0, %xmm2
	mulss	%xmm2, %xmm1
	movss	%xmm1, 0(%rbp)
	popq	%rbx
	popq	%rbp
	popq	%r12
	ret

unroll8x8_combine:
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	movq	%rdi, %rbx
	movq	%rsi, %rbp
	call	vec_length
	movq	%rax, %r12
	movq	%rbx, %rdi
	call	get_vec_start
	movq	%rax, %rcx
	leaq	-28(%rax,%r12,4), %rax
	cmpq	%rax, %rcx
	jnb	.L305
	movq	%rcx, %rdx
	movss	.LC0(%rip), %xmm1
	movaps	%xmm1, %xmm2
	movaps	%xmm1, %xmm3
	movaps	%xmm1, %xmm4
	movaps	%xmm1, %xmm5
	movaps	%xmm1, %xmm6
	movaps	%xmm1, %xmm7
	movaps	%xmm1, %xmm0
.L302:
	mulss	(%rdx), %xmm0
	mulss	4(%rdx), %xmm7
	mulss	8(%rdx), %xmm6
	mulss	12(%rdx), %xmm5
	mulss	16(%rdx), %xmm4
	mulss	20(%rdx), %xmm3
	mulss	24(%rdx), %xmm2
	mulss	28(%rdx), %xmm1
	addq	$32, %rdx
	cmpq	%rdx, %rax
	ja	.L302
	movq	%rcx, %rdx
	notq	%rdx
	addq	%rax, %rdx
	andq	$-32, %rdx
	leaq	32(%rcx,%rdx), %rcx
	jmp	.L300
.L305:
	movss	.LC0(%rip), %xmm1
	movaps	%xmm1, %xmm2
	movaps	%xmm1, %xmm3
	movaps	%xmm1, %xmm4
	movaps	%xmm1, %xmm5
	movaps	%xmm1, %xmm6
	movaps	%xmm1, %xmm7
	movaps	%xmm1, %xmm0
.L300:
	addq	$28, %rax
	cmpq	%rcx, %rax
	jbe	.L303
.L304:
	mulss	(%rcx), %xmm0
	addq	$4, %rcx
	cmpq	%rcx, %rax
	ja	.L304
.L303:
	mulss	%xmm7, %xmm0
	mulss	%xmm0, %xmm6
	mulss	%xmm6, %xmm5
	mulss	%xmm5, %xmm4
	mulss	%xmm4, %xmm3
	mulss	%xmm3, %xmm2
	mulss	%xmm2, %xmm1
	movss	%xmm1, 0(%rbp)
	popq	%rbx
	popq	%rbp
	popq	%r12
	ret

combine7:
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$8, %rsp
	movq	%rdi, %r12
	movq	%rsi, %r13
	call	vec_length
	movq	%rax, %rbx
	leaq	-1(%rax), %rbp
	movq	%r12, %rdi
	call	get_vec_start
	testq	%rbp, %rbp
	jle	.L312
	movss	.LC0(%rip), %xmm0
	movl	$0, %edx
.L309:
	movss	(%rax,%rdx,4), %xmm1
	mulss	4(%rax,%rdx,4), %xmm1
	mulss	%xmm1, %xmm0
	addq	$2, %rdx
	cmpq	%rdx, %rbp
	jg	.L309
	jmp	.L308
.L312:
	movss	.LC0(%rip), %xmm0
	movl	$0, %edx
.L308:
	cmpq	%rdx, %rbx
	jle	.L310
.L311:
	mulss	(%rax,%rdx,4), %xmm0
	addq	$1, %rdx
	cmpq	%rbx, %rdx
	jne	.L311
.L310:
	movss	%xmm0, 0(%r13)
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	ret

unroll3aa_combine:
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$8, %rsp
	movq	%rdi, %r12
	movq	%rsi, %r13
	call	vec_length
	movq	%rax, %rbx
	leaq	-2(%rax), %rbp
	movq	%r12, %rdi
	call	get_vec_start
	testq	%rbp, %rbp
	jle	.L319
	movss	.LC0(%rip), %xmm0
	movl	$0, %edx
.L316:
	movss	(%rax,%rdx,4), %xmm1
	mulss	4(%rax,%rdx,4), %xmm1
	mulss	8(%rax,%rdx,4), %xmm1
	mulss	%xmm1, %xmm0
	addq	$3, %rdx
	cmpq	%rdx, %rbp
	jg	.L316
	jmp	.L315
.L319:
	movss	.LC0(%rip), %xmm0
	movl	$0, %edx
.L315:
	cmpq	%rdx, %rbx
	jle	.L317
.L318:
	mulss	(%rax,%rdx,4), %xmm0
	addq	$1, %rdx
	cmpq	%rbx, %rdx
	jne	.L318
.L317:
	movss	%xmm0, 0(%r13)
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	ret

unroll4aa_combine:
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$8, %rsp
	movq	%rdi, %r12
	movq	%rsi, %r13
	call	vec_length
	movq	%rax, %rbx
	leaq	-3(%rax), %rbp
	movq	%r12, %rdi
	call	get_vec_start
	testq	%rbp, %rbp
	jle	.L326
	movss	.LC0(%rip), %xmm0
	movl	$0, %edx
.L323:
	movss	(%rax,%rdx,4), %xmm2
	mulss	4(%rax,%rdx,4), %xmm2
	movss	8(%rax,%rdx,4), %xmm1
	mulss	12(%rax,%rdx,4), %xmm1
	mulss	%xmm2, %xmm1
	mulss	%xmm1, %xmm0
	addq	$4, %rdx
	cmpq	%rdx, %rbp
	jg	.L323
	jmp	.L322
.L326:
	movss	.LC0(%rip), %xmm0
	movl	$0, %edx
.L322:
	cmpq	%rdx, %rbx
	jle	.L324
.L325:
	mulss	(%rax,%rdx,4), %xmm0
	addq	$1, %rdx
	cmpq	%rbx, %rdx
	jne	.L325
.L324:
	movss	%xmm0, 0(%r13)
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	ret

unroll5aa_combine:
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$8, %rsp
	movq	%rdi, %r13
	movq	%rsi, %r12
	call	vec_length
	movq	%rax, %rbx
	leaq	-4(%rax), %rbp
	movq	%r13, %rdi
	call	get_vec_start
	testq	%rbp, %rbp
	jle	.L333
	movq	%rax, %rcx
	movss	.LC0(%rip), %xmm0
	movl	$0, %edx
.L330:
	movss	(%rcx), %xmm2
	mulss	4(%rcx), %xmm2
	movss	8(%rcx), %xmm1
	mulss	12(%rcx), %xmm1
	mulss	%xmm2, %xmm1
	mulss	16(%rcx), %xmm1
	mulss	%xmm1, %xmm0
	addq	$5, %rdx
	addq	$20, %rcx
	cmpq	%rdx, %rbp
	jg	.L330
	jmp	.L329
.L333:
	movss	.LC0(%rip), %xmm0
	movl	$0, %edx
.L329:
	cmpq	%rdx, %rbx
	jle	.L331
.L332:
	mulss	(%rax,%rdx,4), %xmm0
	addq	$1, %rdx
	cmpq	%rbx, %rdx
	jne	.L332
.L331:
	movss	%xmm0, (%r12)
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	ret

unroll6aa_combine:
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$8, %rsp
	movq	%rdi, %r13
	movq	%rsi, %r12
	call	vec_length
	movq	%rax, %rbx
	leaq	-5(%rax), %rbp
	movq	%r13, %rdi
	call	get_vec_start
	testq	%rbp, %rbp
	jle	.L340
	movq	%rax, %rdx
	movss	.LC0(%rip), %xmm0
	movl	$0, %ecx
.L337:
	movss	(%rdx), %xmm2
	mulss	4(%rdx), %xmm2
	movss	8(%rdx), %xmm1
	mulss	12(%rdx), %xmm1
	mulss	%xmm2, %xmm1
	movss	16(%rdx), %xmm2
	mulss	20(%rdx), %xmm2
	mulss	%xmm2, %xmm1
	mulss	%xmm1, %xmm0
	addq	$6, %rcx
	addq	$24, %rdx
	cmpq	%rcx, %rbp
	jg	.L337
	jmp	.L336
.L340:
	movss	.LC0(%rip), %xmm0
	movl	$0, %ecx
.L336:
	cmpq	%rcx, %rbx
	jle	.L338
.L339:
	mulss	(%rax,%rcx,4), %xmm0
	addq	$1, %rcx
	cmpq	%rbx, %rcx
	jne	.L339
.L338:
	movss	%xmm0, (%r12)
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	ret

unroll7aa_combine:
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$8, %rsp
	movq	%rdi, %r13
	movq	%rsi, %r12
	call	vec_length
	movq	%rax, %rbx
	leaq	-6(%rax), %rbp
	movq	%r13, %rdi
	call	get_vec_start
	testq	%rbp, %rbp
	jle	.L347
	movq	%rax, %rdx
	movss	.LC0(%rip), %xmm0
	movl	$0, %ecx
.L344:
	movss	(%rdx), %xmm2
	mulss	4(%rdx), %xmm2
	movss	8(%rdx), %xmm1
	mulss	12(%rdx), %xmm1
	mulss	%xmm2, %xmm1
	movss	16(%rdx), %xmm2
	mulss	20(%rdx), %xmm2
	mulss	24(%rdx), %xmm2
	mulss	%xmm2, %xmm1
	mulss	%xmm1, %xmm0
	addq	$7, %rcx
	addq	$28, %rdx
	cmpq	%rcx, %rbp
	jg	.L344
	jmp	.L343
.L347:
	movss	.LC0(%rip), %xmm0
	movl	$0, %ecx
.L343:
	cmpq	%rcx, %rbx
	jle	.L345
.L346:
	mulss	(%rax,%rcx,4), %xmm0
	addq	$1, %rcx
	cmpq	%rbx, %rcx
	jne	.L346
.L345:
	movss	%xmm0, (%r12)
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	ret

unroll8aa_combine:
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$8, %rsp
	movq	%rdi, %r13
	movq	%rsi, %r12
	call	vec_length
	movq	%rax, %rbx
	leaq	-7(%rax), %rbp
	movq	%r13, %rdi
	call	get_vec_start
	testq	%rbp, %rbp
	jle	.L354
	movq	%rax, %rdx
	movss	.LC0(%rip), %xmm0
	movl	$0, %ecx
.L351:
	movss	(%rdx), %xmm2
	mulss	4(%rdx), %xmm2
	movss	8(%rdx), %xmm1
	mulss	12(%rdx), %xmm1
	mulss	%xmm1, %xmm2
	movss	16(%rdx), %xmm3
	mulss	20(%rdx), %xmm3
	movss	24(%rdx), %xmm1
	mulss	28(%rdx), %xmm1
	mulss	%xmm3, %xmm1
	mulss	%xmm2, %xmm1
	mulss	%xmm1, %xmm0
	addq	$8, %rcx
	addq	$32, %rdx
	cmpq	%rcx, %rbp
	jg	.L351
	jmp	.L350
.L354:
	movss	.LC0(%rip), %xmm0
	movl	$0, %ecx
.L350:
	cmpq	%rcx, %rbx
	jle	.L352
.L353:
	mulss	(%rax,%rcx,4), %xmm0
	addq	$1, %rcx
	cmpq	%rbx, %rcx
	jne	.L353
.L352:
	movss	%xmm0, (%r12)
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	ret

unroll9aa_combine:
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$8, %rsp
	movq	%rdi, %r13
	movq	%rsi, %r12
	call	vec_length
	movq	%rax, %rbx
	leaq	-8(%rax), %rbp
	movq	%r13, %rdi
	call	get_vec_start
	testq	%rbp, %rbp
	jle	.L361
	movq	%rax, %rdx
	movss	.LC0(%rip), %xmm0
	movl	$0, %ecx
.L358:
	movss	(%rdx), %xmm2
	mulss	4(%rdx), %xmm2
	movss	8(%rdx), %xmm1
	mulss	12(%rdx), %xmm1
	mulss	%xmm2, %xmm1
	movss	16(%rdx), %xmm3
	mulss	20(%rdx), %xmm3
	movss	24(%rdx), %xmm2
	mulss	28(%rdx), %xmm2
	mulss	%xmm3, %xmm2
	mulss	32(%rdx), %xmm2
	mulss	%xmm2, %xmm1
	mulss	%xmm1, %xmm0
	addq	$9, %rcx
	addq	$36, %rdx
	cmpq	%rcx, %rbp
	jg	.L358
	jmp	.L357
.L361:
	movss	.LC0(%rip), %xmm0
	movl	$0, %ecx
.L357:
	cmpq	%rcx, %rbx
	jle	.L359
.L360:
	mulss	(%rax,%rcx,4), %xmm0
	addq	$1, %rcx
	cmpq	%rbx, %rcx
	jne	.L360
.L359:
	movss	%xmm0, (%r12)
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	ret

unroll10aa_combine:
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$8, %rsp
	movq	%rdi, %r13
	movq	%rsi, %r12
	call	vec_length
	movq	%rax, %rbx
	leaq	-9(%rax), %rbp
	movq	%r13, %rdi
	call	get_vec_start
	testq	%rbp, %rbp
	jle	.L368
	movq	%rax, %rdx
	movss	.LC0(%rip), %xmm0
	movl	$0, %ecx
.L365:
	movss	(%rdx), %xmm2
	mulss	4(%rdx), %xmm2
	movss	8(%rdx), %xmm1
	mulss	12(%rdx), %xmm1
	mulss	%xmm1, %xmm2
	movss	16(%rdx), %xmm3
	mulss	20(%rdx), %xmm3
	movss	24(%rdx), %xmm1
	mulss	28(%rdx), %xmm1
	mulss	%xmm3, %xmm1
	movss	32(%rdx), %xmm3
	mulss	36(%rdx), %xmm3
	mulss	%xmm3, %xmm1
	mulss	%xmm2, %xmm1
	mulss	%xmm1, %xmm0
	addq	$10, %rcx
	addq	$40, %rdx
	cmpq	%rcx, %rbp
	jg	.L365
	jmp	.L364
.L368:
	movss	.LC0(%rip), %xmm0
	movl	$0, %ecx
.L364:
	cmpq	%rcx, %rbx
	jle	.L366
.L367:
	mulss	(%rax,%rcx,4), %xmm0
	addq	$1, %rcx
	cmpq	%rbx, %rcx
	jne	.L367
.L366:
	movss	%xmm0, (%r12)
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	ret

unroll12aa_combine:
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	subq	$8, %rsp
	movq	%rdi, %r13
	movq	%rsi, %r12
	call	vec_length
	movq	%rax, %rbx
	leaq	-11(%rax), %rbp
	movq	%r13, %rdi
	call	get_vec_start
	testq	%rbp, %rbp
	jle	.L375
	movq	%rax, %rdx
	movss	.LC0(%rip), %xmm0
	movl	$0, %ecx
.L372:
	movss	(%rdx), %xmm2
	mulss	4(%rdx), %xmm2
	movss	8(%rdx), %xmm1
	mulss	12(%rdx), %xmm1
	mulss	%xmm1, %xmm2
	movss	16(%rdx), %xmm3
	mulss	20(%rdx), %xmm3
	movss	24(%rdx), %xmm1
	mulss	28(%rdx), %xmm1
	mulss	%xmm3, %xmm1
	mulss	%xmm2, %xmm1
	movss	32(%rdx), %xmm3
	mulss	36(%rdx), %xmm3
	movss	40(%rdx), %xmm2
	mulss	44(%rdx), %xmm2
	mulss	%xmm3, %xmm2
	mulss	%xmm2, %xmm1
	mulss	%xmm1, %xmm0
	addq	$12, %rcx
	addq	$48, %rdx
	cmpq	%rcx, %rbp
	jg	.L372
	jmp	.L371
.L375:
	movss	.LC0(%rip), %xmm0
	movl	$0, %ecx
.L371:
	cmpq	%rcx, %rbx
	jle	.L373
.L374:
	mulss	(%rax,%rcx,4), %xmm0
	addq	$1, %rcx
	cmpq	%rbx, %rcx
	jne	.L374
.L373:
	movss	%xmm0, (%r12)
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	ret

simd_v1_combine:
	pushq	%rbp
	movq	%rsp, %rbp
	pushq	%r13
	pushq	%r12
	pushq	%rbx
	andq	$-32, %rsp
	subq	$208, %rsp
	movq	%rdi, %r13
	movq	%rsi, %r12
	call	get_vec_start
	movq	%rax, %rbx
	movq	%r13, %rdi
	call	vec_length
	movl	%eax, %edx
	movl	$0, %esi
	movl	$0, %ecx
	movss	.LC0(%rip), %xmm0
.L379:
	movss	%xmm0, 48(%rsp,%rsi,4)
	addq	$1, %rcx
	movq	%rcx, %rsi
	cmpq	$8, %rcx
	jne	.L379
	testb	$31, %bl
	je	.L392
	testl	%eax, %eax
	je	.L393
	movss	.LC0(%rip), %xmm0
.L385:
	addq	$4, %rbx
	mulss	-4(%rbx), %xmm0
	subl	$1, %edx
	testb	$31, %bl
	jne	.L382
	jmp	.L380
.L392:
	movss	.LC0(%rip), %xmm0
.L380:
	movl	%edx, %edi
	cmpl	$7, %edx
	ja	.L383
	jmp	.L384
.L382:
	testl	%edx, %edx
	jne	.L385
	jmp	.L384
.L383:
	movq	48(%rsp), %rax
	movq	%rax, 112(%rsp)
	movq	56(%rsp), %rax
	movq	%rax, 120(%rsp)
	movq	64(%rsp), %rax
	movq	%rax, 128(%rsp)
	movq	72(%rsp), %rax
	movq	%rax, 136(%rsp)
	movq	%rbx, %rax
.L387:
	movq	(%rax), %rcx
	movq	%rcx, 176(%rsp)
	movq	8(%rax), %rcx
	movq	%rcx, 184(%rsp)
	movq	16(%rax), %rcx
	movq	%rcx, 192(%rsp)
	movq	24(%rax), %rcx
	movq	%rcx, 200(%rsp)
	movaps	112(%rsp), %xmm1
	mulps	176(%rsp), %xmm1
	movaps	%xmm1, (%rsp)
	movq	(%rsp), %rsi
	movaps	%xmm1, 16(%rsp)
	movaps	128(%rsp), %xmm1
	mulps	192(%rsp), %xmm1
	movaps	%xmm1, (%rsp)
	movq	(%rsp), %rcx
	movaps	%xmm1, 32(%rsp)
	movq	%rsi, 144(%rsp)
	movq	24(%rsp), %rsi
	movq	%rsi, 152(%rsp)
	movq	%rcx, 160(%rsp)
	movq	40(%rsp), %rcx
	movq	%rcx, 168(%rsp)
	addq	$32, %rax
	subl	$8, %edx
	cmpl	$7, %edx
	jbe	.L386
	movq	144(%rsp), %rcx
	movq	%rcx, 112(%rsp)
	movq	%rsi, 120(%rsp)
	movq	160(%rsp), %rcx
	movq	%rcx, 128(%rsp)
	movq	168(%rsp), %rcx
	movq	%rcx, 136(%rsp)
	jmp	.L387
.L386:
	movq	144(%rsp), %r8
	movq	%r8, 80(%rsp)
	movq	152(%rsp), %rsi
	movq	%rsi, 88(%rsp)
	movq	160(%rsp), %rcx
	movq	%rcx, 96(%rsp)
	movq	168(%rsp), %rdx
	movq	%rdx, 104(%rsp)
	subl	$8, %edi
	movl	%edi, %eax
	shrl	$3, %eax
	movl	%eax, %r9d
	addq	$1, %r9
	salq	$5, %r9
	addq	%r9, %rbx
	movq	%r8, 48(%rsp)
	movq	%rsi, 56(%rsp)
	movq	%rcx, 64(%rsp)
	movq	%rdx, 72(%rsp)
	negl	%eax
	leal	(%rdi,%rax,8), %edx
.L384:
	testl	%edx, %edx
	je	.L388
.L389:
	addq	$4, %rbx
	mulss	-4(%rbx), %xmm0
	subl	$1, %edx
	jne	.L389
	jmp	.L388
.L391:
	mulss	48(%rsp,%rdx,4), %xmm0
	addq	$1, %rax
	movq	%rax, %rdx
	cmpq	$8, %rax
	jne	.L391
	movss	%xmm0, (%r12)
	jmp	.L394
.L393:
	movss	.LC0(%rip), %xmm0
	jmp	.L384
.L388:
	movl	$0, %edx
	movl	$0, %eax
	jmp	.L391
.L394:
	leaq	-24(%rbp), %rsp
	popq	%rbx
	popq	%r12
	popq	%r13
	popq	%rbp
	ret

simd_v2_combine:
	pushq	%rbp
	movq	%rsp, %rbp
	pushq	%r13
	pushq	%r12
	pushq	%rbx
	andq	$-32, %rsp
	subq	$352, %rsp
	movq	%rdi, %r13
	movq	%rsi, %r12
	call	get_vec_start
	movq	%rax, %rbx
	movq	%r13, %rdi
	call	vec_length
	movl	%eax, %edx
	movl	$0, %esi
	movl	$0, %ecx
	movss	.LC0(%rip), %xmm0
.L398:
	movss	%xmm0, 64(%rsp,%rsi,4)
	addq	$1, %rcx
	movq	%rcx, %rsi
	cmpq	$8, %rcx
	jne	.L398
	movq	64(%rsp), %rcx
	movq	%rcx, 128(%rsp)
	movq	72(%rsp), %rcx
	movq	%rcx, 136(%rsp)
	movq	80(%rsp), %rcx
	movq	%rcx, 144(%rsp)
	movq	88(%rsp), %rcx
	movq	%rcx, 152(%rsp)
	testb	$31, %bl
	je	.L411
	testl	%eax, %eax
	je	.L412
	movss	.LC0(%rip), %xmm0
.L404:
	addq	$4, %rbx
	mulss	-4(%rbx), %xmm0
	subl	$1, %edx
	testb	$31, %bl
	jne	.L401
	jmp	.L399
.L411:
	movss	.LC0(%rip), %xmm0
.L399:
	movl	%edx, %edi
	cmpl	$15, %edx
	ja	.L402
	jmp	.L403
.L401:
	testl	%edx, %edx
	jne	.L404
	jmp	.L403
.L402:
	movq	128(%rsp), %rax
	movq	%rax, 192(%rsp)
	movq	136(%rsp), %rax
	movq	%rax, 200(%rsp)
	movq	144(%rsp), %rax
	movq	%rax, 208(%rsp)
	movq	152(%rsp), %rax
	movq	%rax, 216(%rsp)
	movl	%edx, %eax
	movq	%rbx, %rdx
.L406:
	movq	(%rdx), %rcx
	movq	%rcx, 256(%rsp)
	movq	8(%rdx), %rcx
	movq	%rcx, 264(%rsp)
	movq	16(%rdx), %rcx
	movq	%rcx, 272(%rsp)
	movq	24(%rdx), %rcx
	movq	%rcx, 280(%rsp)
	movq	32(%rdx), %rcx
	movq	%rcx, 224(%rsp)
	movq	40(%rdx), %rcx
	movq	%rcx, 232(%rsp)
	movq	48(%rdx), %rcx
	movq	%rcx, 240(%rsp)
	movq	56(%rdx), %rcx
	movq	%rcx, 248(%rsp)
	movaps	192(%rsp), %xmm1
	mulps	256(%rsp), %xmm1
	movaps	%xmm1, 16(%rsp)
	movq	16(%rsp), %rsi
	movaps	%xmm1, 32(%rsp)
	movaps	208(%rsp), %xmm1
	mulps	272(%rsp), %xmm1
	movaps	%xmm1, 16(%rsp)
	movq	16(%rsp), %rcx
	movaps	%xmm1, 48(%rsp)
	movq	%rsi, 160(%rsp)
	movq	40(%rsp), %rsi
	movq	%rsi, 168(%rsp)
	movq	%rcx, 176(%rsp)
	movq	56(%rsp), %rcx
	movq	%rcx, 184(%rsp)
	movaps	224(%rsp), %xmm1
	mulps	128(%rsp), %xmm1
	movaps	%xmm1, 16(%rsp)
	movaps	144(%rsp), %xmm1
	mulps	240(%rsp), %xmm1
	movaps	%xmm1, (%rsp)
	movq	16(%rsp), %rsi
	movaps	16(%rsp), %xmm4
	movaps	%xmm4, 32(%rsp)
	movq	(%rsp), %rcx
	movaps	%xmm1, 48(%rsp)
	movq	%rsi, 128(%rsp)
	movq	40(%rsp), %rsi
	movq	%rsi, 136(%rsp)
	movq	%rcx, 144(%rsp)
	movq	56(%rsp), %rcx
	movq	%rcx, 152(%rsp)
	addq	$64, %rdx
	subl	$16, %eax
	cmpl	$15, %eax
	jbe	.L405
	movq	160(%rsp), %rcx
	movq	%rcx, 192(%rsp)
	movq	168(%rsp), %rcx
	movq	%rcx, 200(%rsp)
	movq	176(%rsp), %rcx
	movq	%rcx, 208(%rsp)
	movq	184(%rsp), %rcx
	movq	%rcx, 216(%rsp)
	jmp	.L406
.L405:
	movq	160(%rsp), %r9
	movq	%r9, 96(%rsp)
	movq	168(%rsp), %r8
	movq	%r8, 104(%rsp)
	movq	176(%rsp), %rsi
	movq	%rsi, 112(%rsp)
	movq	184(%rsp), %rcx
	movq	%rcx, 120(%rsp)
	leal	-16(%rdi), %edx
	movl	%edx, %eax
	shrl	$4, %eax
	movl	%eax, %edi
	addq	$1, %rdi
	salq	$6, %rdi
	addq	%rdi, %rbx
	movq	%r9, 64(%rsp)
	movq	%r8, 72(%rsp)
	movq	%rsi, 80(%rsp)
	movq	%rcx, 88(%rsp)
	sall	$4, %eax
	subl	%eax, %edx
.L403:
	testl	%edx, %edx
	je	.L407
.L408:
	addq	$4, %rbx
	mulss	-4(%rbx), %xmm0
	subl	$1, %edx
	jne	.L408
	jmp	.L407
.L410:
	mulss	64(%rsp,%rdx,4), %xmm0
	addq	$1, %rax
	movq	%rax, %rdx
	cmpq	$8, %rax
	jne	.L410
	movss	%xmm0, (%r12)
	jmp	.L413
.L412:
	movss	.LC0(%rip), %xmm0
	jmp	.L403
.L407:
	movq	64(%rsp), %rax
	movq	%rax, 288(%rsp)
	movq	72(%rsp), %rax
	movq	%rax, 296(%rsp)
	movq	80(%rsp), %rax
	movq	%rax, 304(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 312(%rsp)
	movaps	288(%rsp), %xmm1
	mulps	128(%rsp), %xmm1
	movaps	%xmm1, 16(%rsp)
	movq	16(%rsp), %rdx
	movaps	%xmm1, 32(%rsp)
	movaps	304(%rsp), %xmm1
	mulps	144(%rsp), %xmm1
	movaps	%xmm1, 16(%rsp)
	movq	16(%rsp), %rax
	movaps	%xmm1, 48(%rsp)
	movq	%rdx, 64(%rsp)
	movq	40(%rsp), %rdx
	movq	%rdx, 72(%rsp)
	movq	%rax, 80(%rsp)
	movq	56(%rsp), %rax
	movq	%rax, 88(%rsp)
	movl	$0, %edx
	movl	$0, %eax
	jmp	.L410
.L413:
	leaq	-24(%rbp), %rsp
	popq	%rbx
	popq	%r12
	popq	%r13
	popq	%rbp
	ret

simd_v4_combine:
	pushq	%rbp
	movq	%rsp, %rbp
	pushq	%r13
	pushq	%r12
	pushq	%rbx
	andq	$-32, %rsp
	subq	$576, %rsp
	movq	%rdi, %r13
	movq	%rsi, %r12
	call	get_vec_start
	movq	%rax, %rbx
	movq	%r13, %rdi
	call	vec_length
	movl	%eax, %edx
	movl	$0, %esi
	movl	$0, %ecx
	movss	.LC0(%rip), %xmm0
.L417:
	movss	%xmm0, 96(%rsp,%rsi,4)
	addq	$1, %rcx
	movq	%rcx, %rsi
	cmpq	$8, %rcx
	jne	.L417
	movq	96(%rsp), %rcx
	movq	%rcx, 320(%rsp)
	movq	104(%rsp), %rcx
	movq	%rcx, 328(%rsp)
	movq	112(%rsp), %rcx
	movq	%rcx, 336(%rsp)
	movq	120(%rsp), %rcx
	movq	%rcx, 344(%rsp)
	testb	$31, %bl
	je	.L430
	testl	%eax, %eax
	je	.L431
	movss	.LC0(%rip), %xmm0
.L423:
	addq	$4, %rbx
	mulss	-4(%rbx), %xmm0
	subl	$1, %edx
	testb	$31, %bl
	jne	.L420
	jmp	.L418
.L430:
	movss	.LC0(%rip), %xmm0
.L418:
	movl	%edx, %edi
	cmpl	$31, %edx
	ja	.L421
	movq	320(%rsp), %rdi
	movq	%rdi, 256(%rsp)
	movq	328(%rsp), %rsi
	movq	%rsi, 264(%rsp)
	movq	336(%rsp), %rcx
	movq	%rcx, 272(%rsp)
	movq	344(%rsp), %rax
	movq	%rax, 280(%rsp)
	movq	%rdi, 288(%rsp)
	movq	%rsi, 296(%rsp)
	movq	%rcx, 304(%rsp)
	movq	%rax, 312(%rsp)
	jmp	.L422
.L420:
	testl	%edx, %edx
	jne	.L423
	jmp	.L419
.L421:
	movq	320(%rsp), %r9
	movq	%r9, 384(%rsp)
	movq	328(%rsp), %r8
	movq	%r8, 392(%rsp)
	movq	336(%rsp), %rsi
	movq	%rsi, 400(%rsp)
	movq	344(%rsp), %rcx
	movq	%rcx, 408(%rsp)
	movl	%edx, %eax
	movq	%r9, 256(%rsp)
	movq	%r8, 264(%rsp)
	movq	%rsi, 272(%rsp)
	movq	%rcx, 280(%rsp)
	movq	%r9, 288(%rsp)
	movq	%r8, 296(%rsp)
	movq	%rsi, 304(%rsp)
	movq	%rcx, 312(%rsp)
	movq	%rbx, %rdx
.L425:
	movq	(%rdx), %rcx
	movq	%rcx, 512(%rsp)
	movq	8(%rdx), %rcx
	movq	%rcx, 520(%rsp)
	movq	16(%rdx), %rcx
	movq	%rcx, 528(%rsp)
	movq	24(%rdx), %rcx
	movq	%rcx, 536(%rsp)
	movq	32(%rdx), %rcx
	movq	%rcx, 480(%rsp)
	movq	40(%rdx), %rcx
	movq	%rcx, 488(%rsp)
	movq	48(%rdx), %rcx
	movq	%rcx, 496(%rsp)
	movq	56(%rdx), %rcx
	movq	%rcx, 504(%rsp)
	movq	64(%rdx), %rcx
	movq	%rcx, 448(%rsp)
	movq	72(%rdx), %rcx
	movq	%rcx, 456(%rsp)
	movq	80(%rdx), %rcx
	movq	%rcx, 464(%rsp)
	movq	88(%rdx), %rcx
	movq	%rcx, 472(%rsp)
	movq	96(%rdx), %rcx
	movq	%rcx, 416(%rsp)
	movq	104(%rdx), %rcx
	movq	%rcx, 424(%rsp)
	movq	112(%rdx), %rcx
	movq	%rcx, 432(%rsp)
	movq	120(%rdx), %rcx
	movq	%rcx, 440(%rsp)
	movaps	384(%rsp), %xmm1
	mulps	512(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movq	48(%rsp), %rsi
	movaps	%xmm1, 64(%rsp)
	movaps	400(%rsp), %xmm1
	mulps	528(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movq	48(%rsp), %rcx
	movaps	%xmm1, 80(%rsp)
	movq	%rsi, 352(%rsp)
	movq	72(%rsp), %rsi
	movq	%rsi, 360(%rsp)
	movq	%rcx, 368(%rsp)
	movq	88(%rsp), %rcx
	movq	%rcx, 376(%rsp)
	movaps	480(%rsp), %xmm1
	mulps	320(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movaps	336(%rsp), %xmm1
	mulps	496(%rsp), %xmm1
	movaps	%xmm1, 32(%rsp)
	movq	48(%rsp), %rsi
	movaps	48(%rsp), %xmm4
	movaps	%xmm4, 64(%rsp)
	movq	32(%rsp), %rcx
	movaps	%xmm1, 80(%rsp)
	movq	%rsi, 320(%rsp)
	movq	72(%rsp), %rsi
	movq	%rsi, 328(%rsp)
	movq	%rcx, 336(%rsp)
	movq	88(%rsp), %rcx
	movq	%rcx, 344(%rsp)
	movaps	288(%rsp), %xmm1
	mulps	448(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movaps	304(%rsp), %xmm1
	mulps	464(%rsp), %xmm1
	movaps	%xmm1, 32(%rsp)
	movq	48(%rsp), %rsi
	movaps	48(%rsp), %xmm6
	movaps	%xmm6, 64(%rsp)
	movq	32(%rsp), %rcx
	movaps	%xmm1, 80(%rsp)
	movq	%rsi, 288(%rsp)
	movq	72(%rsp), %rsi
	movq	%rsi, 296(%rsp)
	movq	%rcx, 304(%rsp)
	movq	88(%rsp), %rcx
	movq	%rcx, 312(%rsp)
	movaps	256(%rsp), %xmm1
	mulps	416(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movaps	272(%rsp), %xmm1
	mulps	432(%rsp), %xmm1
	movaps	%xmm1, 32(%rsp)
	movq	48(%rsp), %rsi
	movaps	48(%rsp), %xmm2
	movaps	%xmm2, 64(%rsp)
	movq	32(%rsp), %rcx
	movaps	%xmm1, 80(%rsp)
	movq	%rsi, 256(%rsp)
	movq	72(%rsp), %rsi
	movq	%rsi, 264(%rsp)
	movq	%rcx, 272(%rsp)
	movq	88(%rsp), %rcx
	movq	%rcx, 280(%rsp)
	subq	$-128, %rdx
	subl	$32, %eax
	cmpl	$31, %eax
	jbe	.L424
	movq	352(%rsp), %rcx
	movq	%rcx, 384(%rsp)
	movq	360(%rsp), %rcx
	movq	%rcx, 392(%rsp)
	movq	368(%rsp), %rcx
	movq	%rcx, 400(%rsp)
	movq	376(%rsp), %rcx
	movq	%rcx, 408(%rsp)
	jmp	.L425
.L424:
	movq	352(%rsp), %r9
	movq	%r9, 224(%rsp)
	movq	360(%rsp), %r8
	movq	%r8, 232(%rsp)
	movq	368(%rsp), %rsi
	movq	%rsi, 240(%rsp)
	movq	376(%rsp), %rcx
	movq	%rcx, 248(%rsp)
	leal	-32(%rdi), %edx
	movl	%edx, %eax
	shrl	$5, %eax
	movl	%eax, %edi
	addq	$1, %rdi
	salq	$7, %rdi
	addq	%rdi, %rbx
	movq	%r9, 96(%rsp)
	movq	%r8, 104(%rsp)
	movq	%rsi, 112(%rsp)
	movq	%rcx, 120(%rsp)
	sall	$5, %eax
	subl	%eax, %edx
.L422:
	testl	%edx, %edx
	je	.L426
.L427:
	addq	$4, %rbx
	mulss	-4(%rbx), %xmm0
	subl	$1, %edx
	jne	.L427
	jmp	.L426
.L429:
	mulss	96(%rsp,%rdx,4), %xmm0
	addq	$1, %rax
	movq	%rax, %rdx
	cmpq	$8, %rax
	jne	.L429
	movss	%xmm0, (%r12)
	jmp	.L432
.L431:
	movss	.LC0(%rip), %xmm0
.L419:
	movq	320(%rsp), %rdi
	movq	%rdi, 256(%rsp)
	movq	328(%rsp), %rsi
	movq	%rsi, 264(%rsp)
	movq	336(%rsp), %rcx
	movq	%rcx, 272(%rsp)
	movq	344(%rsp), %rax
	movq	%rax, 280(%rsp)
	movq	%rdi, 288(%rsp)
	movq	%rsi, 296(%rsp)
	movq	%rcx, 304(%rsp)
	movq	%rax, 312(%rsp)
	jmp	.L422
.L426:
	movq	96(%rsp), %rax
	movq	%rax, 544(%rsp)
	movq	104(%rsp), %rax
	movq	%rax, 552(%rsp)
	movq	112(%rsp), %rax
	movq	%rax, 560(%rsp)
	movq	120(%rsp), %rax
	movq	%rax, 568(%rsp)
	movaps	544(%rsp), %xmm1
	mulps	320(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movaps	560(%rsp), %xmm1
	mulps	336(%rsp), %xmm1
	movaps	%xmm1, 32(%rsp)
	movq	48(%rsp), %rdx
	movaps	48(%rsp), %xmm6
	movaps	%xmm6, 64(%rsp)
	movq	32(%rsp), %rax
	movaps	%xmm1, 80(%rsp)
	movq	%rdx, 128(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 136(%rsp)
	movq	%rax, 144(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 152(%rsp)
	movaps	288(%rsp), %xmm1
	mulps	256(%rsp), %xmm1
	movaps	%xmm1, 16(%rsp)
	movaps	304(%rsp), %xmm1
	mulps	272(%rsp), %xmm1
	movaps	%xmm1, (%rsp)
	movq	16(%rsp), %rdx
	movaps	16(%rsp), %xmm6
	movaps	%xmm6, 64(%rsp)
	movq	(%rsp), %rax
	movaps	%xmm1, 80(%rsp)
	movq	%rdx, 160(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 168(%rsp)
	movq	%rax, 176(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 184(%rsp)
	movaps	48(%rsp), %xmm4
	mulps	%xmm6, %xmm4
	movaps	%xmm4, 48(%rsp)
	movq	48(%rsp), %rdx
	movaps	%xmm4, 64(%rsp)
	movaps	32(%rsp), %xmm5
	mulps	%xmm1, %xmm5
	movaps	%xmm5, 48(%rsp)
	movq	48(%rsp), %rax
	movaps	%xmm5, 80(%rsp)
	movq	%rdx, 96(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 104(%rsp)
	movq	%rax, 112(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 120(%rsp)
	movl	$0, %edx
	movl	$0, %eax
	jmp	.L429
.L432:
	leaq	-24(%rbp), %rsp
	popq	%rbx
	popq	%r12
	popq	%r13
	popq	%rbp
	ret

simd_v8_combine:
	pushq	%rbp
	movq	%rsp, %rbp
	pushq	%r13
	pushq	%r12
	pushq	%rbx
	andq	$-32, %rsp
	subq	$960, %rsp
	movq	%rdi, %r13
	movq	%rsi, %r12
	call	get_vec_start
	movq	%rax, %rbx
	movq	%r13, %rdi
	call	vec_length
	movl	%eax, %edx
	movl	$0, %esi
	movl	$0, %ecx
	movss	.LC0(%rip), %xmm0
.L436:
	movss	%xmm0, 96(%rsp,%rsi,4)
	addq	$1, %rcx
	movq	%rcx, %rsi
	cmpq	$8, %rcx
	jne	.L436
	movq	96(%rsp), %rcx
	movq	%rcx, 544(%rsp)
	movq	104(%rsp), %rcx
	movq	%rcx, 552(%rsp)
	movq	112(%rsp), %rcx
	movq	%rcx, 560(%rsp)
	movq	120(%rsp), %rcx
	movq	%rcx, 568(%rsp)
	testb	$31, %bl
	je	.L449
	testl	%eax, %eax
	je	.L450
	movss	.LC0(%rip), %xmm0
.L442:
	addq	$4, %rbx
	mulss	-4(%rbx), %xmm0
	subl	$1, %edx
	testb	$31, %bl
	jne	.L439
	jmp	.L437
.L449:
	movss	.LC0(%rip), %xmm0
.L437:
	movl	%edx, %edi
	cmpl	$63, %edx
	ja	.L440
	movq	544(%rsp), %rdi
	movq	%rdi, 352(%rsp)
	movq	552(%rsp), %rsi
	movq	%rsi, 360(%rsp)
	movq	560(%rsp), %rcx
	movq	%rcx, 368(%rsp)
	movq	568(%rsp), %rax
	movq	%rax, 376(%rsp)
	movq	%rdi, 384(%rsp)
	movq	%rsi, 392(%rsp)
	movq	%rcx, 400(%rsp)
	movq	%rax, 408(%rsp)
	movq	%rdi, 416(%rsp)
	movq	%rsi, 424(%rsp)
	movq	%rcx, 432(%rsp)
	movq	%rax, 440(%rsp)
	movq	%rdi, 448(%rsp)
	movq	%rsi, 456(%rsp)
	movq	%rcx, 464(%rsp)
	movq	%rax, 472(%rsp)
	movq	%rdi, 480(%rsp)
	movq	%rsi, 488(%rsp)
	movq	%rcx, 496(%rsp)
	movq	%rax, 504(%rsp)
	movq	%rdi, 512(%rsp)
	movq	%rsi, 520(%rsp)
	movq	%rcx, 528(%rsp)
	movq	%rax, 536(%rsp)
	jmp	.L441
.L439:
	testl	%edx, %edx
	jne	.L442
	jmp	.L438
.L440:
	movq	544(%rsp), %r9
	movq	%r9, 608(%rsp)
	movq	552(%rsp), %r8
	movq	%r8, 616(%rsp)
	movq	560(%rsp), %rsi
	movq	%rsi, 624(%rsp)
	movq	568(%rsp), %rcx
	movq	%rcx, 632(%rsp)
	movl	%edx, %eax
	movq	%rbx, %rdx
	movq	%r9, 352(%rsp)
	movq	%r8, 360(%rsp)
	movq	%rsi, 368(%rsp)
	movq	%rcx, 376(%rsp)
	movq	%r9, 384(%rsp)
	movq	%r8, 392(%rsp)
	movq	%rsi, 400(%rsp)
	movq	%rcx, 408(%rsp)
	movq	%r9, 416(%rsp)
	movq	%r8, 424(%rsp)
	movq	%rsi, 432(%rsp)
	movq	%rcx, 440(%rsp)
	movq	%r9, 448(%rsp)
	movq	%r8, 456(%rsp)
	movq	%rsi, 464(%rsp)
	movq	%rcx, 472(%rsp)
	movq	%r9, 480(%rsp)
	movq	%r8, 488(%rsp)
	movq	%rsi, 496(%rsp)
	movq	%rcx, 504(%rsp)
	movq	%r9, 512(%rsp)
	movq	%r8, 520(%rsp)
	movq	%rsi, 528(%rsp)
	movq	%rcx, 536(%rsp)
.L444:
	movq	(%rdx), %rcx
	movq	%rcx, 864(%rsp)
	movq	8(%rdx), %rcx
	movq	%rcx, 872(%rsp)
	movq	16(%rdx), %rcx
	movq	%rcx, 880(%rsp)
	movq	24(%rdx), %rcx
	movq	%rcx, 888(%rsp)
	movq	32(%rdx), %rcx
	movq	%rcx, 832(%rsp)
	movq	40(%rdx), %rcx
	movq	%rcx, 840(%rsp)
	movq	48(%rdx), %rcx
	movq	%rcx, 848(%rsp)
	movq	56(%rdx), %rcx
	movq	%rcx, 856(%rsp)
	movq	64(%rdx), %rcx
	movq	%rcx, 800(%rsp)
	movq	72(%rdx), %rcx
	movq	%rcx, 808(%rsp)
	movq	80(%rdx), %rcx
	movq	%rcx, 816(%rsp)
	movq	88(%rdx), %rcx
	movq	%rcx, 824(%rsp)
	movq	96(%rdx), %rcx
	movq	%rcx, 768(%rsp)
	movq	104(%rdx), %rcx
	movq	%rcx, 776(%rsp)
	movq	112(%rdx), %rcx
	movq	%rcx, 784(%rsp)
	movq	120(%rdx), %rcx
	movq	%rcx, 792(%rsp)
	movq	128(%rdx), %rcx
	movq	%rcx, 736(%rsp)
	movq	136(%rdx), %rcx
	movq	%rcx, 744(%rsp)
	movq	144(%rdx), %rcx
	movq	%rcx, 752(%rsp)
	movq	152(%rdx), %rcx
	movq	%rcx, 760(%rsp)
	movq	160(%rdx), %rcx
	movq	%rcx, 704(%rsp)
	movq	168(%rdx), %rcx
	movq	%rcx, 712(%rsp)
	movq	176(%rdx), %rcx
	movq	%rcx, 720(%rsp)
	movq	184(%rdx), %rcx
	movq	%rcx, 728(%rsp)
	movq	192(%rdx), %rcx
	movq	%rcx, 672(%rsp)
	movq	200(%rdx), %rcx
	movq	%rcx, 680(%rsp)
	movq	208(%rdx), %rcx
	movq	%rcx, 688(%rsp)
	movq	216(%rdx), %rcx
	movq	%rcx, 696(%rsp)
	movq	224(%rdx), %rcx
	movq	%rcx, 640(%rsp)
	movq	232(%rdx), %rcx
	movq	%rcx, 648(%rsp)
	movq	240(%rdx), %rcx
	movq	%rcx, 656(%rsp)
	movq	248(%rdx), %rcx
	movq	%rcx, 664(%rsp)
	movaps	608(%rsp), %xmm1
	mulps	864(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movq	48(%rsp), %rsi
	movaps	%xmm1, 64(%rsp)
	movaps	624(%rsp), %xmm1
	mulps	880(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movq	48(%rsp), %rcx
	movaps	%xmm1, 80(%rsp)
	movq	%rsi, 576(%rsp)
	movq	72(%rsp), %rsi
	movq	%rsi, 584(%rsp)
	movq	%rcx, 592(%rsp)
	movq	88(%rsp), %rcx
	movq	%rcx, 600(%rsp)
	movaps	832(%rsp), %xmm1
	mulps	544(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movaps	848(%rsp), %xmm1
	mulps	560(%rsp), %xmm1
	movaps	%xmm1, 32(%rsp)
	movq	48(%rsp), %rsi
	movaps	48(%rsp), %xmm4
	movaps	%xmm4, 64(%rsp)
	movq	32(%rsp), %rcx
	movaps	%xmm1, 80(%rsp)
	movq	%rsi, 544(%rsp)
	movq	72(%rsp), %rsi
	movq	%rsi, 552(%rsp)
	movq	%rcx, 560(%rsp)
	movq	88(%rsp), %rcx
	movq	%rcx, 568(%rsp)
	movaps	800(%rsp), %xmm1
	mulps	512(%rsp), %xmm1
	movaps	%xmm1, 32(%rsp)
	movaps	816(%rsp), %xmm1
	mulps	528(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movq	32(%rsp), %rsi
	movaps	32(%rsp), %xmm6
	movaps	%xmm6, 64(%rsp)
	movq	48(%rsp), %rcx
	movaps	%xmm1, 80(%rsp)
	movq	%rsi, 512(%rsp)
	movq	72(%rsp), %rsi
	movq	%rsi, 520(%rsp)
	movq	%rcx, 528(%rsp)
	movq	88(%rsp), %rcx
	movq	%rcx, 536(%rsp)
	movaps	480(%rsp), %xmm1
	mulps	768(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movaps	496(%rsp), %xmm1
	mulps	784(%rsp), %xmm1
	movaps	%xmm1, 32(%rsp)
	movq	48(%rsp), %rsi
	movaps	48(%rsp), %xmm2
	movaps	%xmm2, 64(%rsp)
	movq	32(%rsp), %rcx
	movaps	%xmm1, 80(%rsp)
	movq	%rsi, 480(%rsp)
	movq	72(%rsp), %rsi
	movq	%rsi, 488(%rsp)
	movq	%rcx, 496(%rsp)
	movq	88(%rsp), %rcx
	movq	%rcx, 504(%rsp)
	movaps	448(%rsp), %xmm1
	mulps	736(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movaps	464(%rsp), %xmm1
	mulps	752(%rsp), %xmm1
	movaps	%xmm1, 32(%rsp)
	movq	48(%rsp), %rsi
	movaps	48(%rsp), %xmm4
	movaps	%xmm4, 64(%rsp)
	movq	32(%rsp), %rcx
	movaps	%xmm1, 80(%rsp)
	movq	%rsi, 448(%rsp)
	movq	72(%rsp), %rsi
	movq	%rsi, 456(%rsp)
	movq	%rcx, 464(%rsp)
	movq	88(%rsp), %rcx
	movq	%rcx, 472(%rsp)
	movaps	416(%rsp), %xmm1
	mulps	704(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movaps	432(%rsp), %xmm1
	mulps	720(%rsp), %xmm1
	movaps	%xmm1, 32(%rsp)
	movq	48(%rsp), %rsi
	movaps	48(%rsp), %xmm6
	movaps	%xmm6, 64(%rsp)
	movq	32(%rsp), %rcx
	movaps	%xmm1, 80(%rsp)
	movq	%rsi, 416(%rsp)
	movq	72(%rsp), %rsi
	movq	%rsi, 424(%rsp)
	movq	%rcx, 432(%rsp)
	movq	88(%rsp), %rcx
	movq	%rcx, 440(%rsp)
	movaps	384(%rsp), %xmm1
	mulps	672(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movaps	400(%rsp), %xmm1
	mulps	688(%rsp), %xmm1
	movaps	%xmm1, 32(%rsp)
	movq	48(%rsp), %rsi
	movaps	48(%rsp), %xmm2
	movaps	%xmm2, 64(%rsp)
	movq	32(%rsp), %rcx
	movaps	%xmm1, 80(%rsp)
	movq	%rsi, 384(%rsp)
	movq	72(%rsp), %rsi
	movq	%rsi, 392(%rsp)
	movq	%rcx, 400(%rsp)
	movq	88(%rsp), %rcx
	movq	%rcx, 408(%rsp)
	movaps	352(%rsp), %xmm1
	mulps	640(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movaps	368(%rsp), %xmm1
	mulps	656(%rsp), %xmm1
	movaps	%xmm1, 32(%rsp)
	movq	48(%rsp), %rsi
	movaps	48(%rsp), %xmm4
	movaps	%xmm4, 64(%rsp)
	movq	32(%rsp), %rcx
	movaps	%xmm1, 80(%rsp)
	movq	%rsi, 352(%rsp)
	movq	72(%rsp), %rsi
	movq	%rsi, 360(%rsp)
	movq	%rcx, 368(%rsp)
	movq	88(%rsp), %rcx
	movq	%rcx, 376(%rsp)
	addq	$256, %rdx
	subl	$64, %eax
	cmpl	$63, %eax
	jbe	.L443
	movq	576(%rsp), %rcx
	movq	%rcx, 608(%rsp)
	movq	584(%rsp), %rcx
	movq	%rcx, 616(%rsp)
	movq	592(%rsp), %rcx
	movq	%rcx, 624(%rsp)
	movq	600(%rsp), %rcx
	movq	%rcx, 632(%rsp)
	jmp	.L444
.L443:
	movq	576(%rsp), %r9
	movq	%r9, 320(%rsp)
	movq	584(%rsp), %r8
	movq	%r8, 328(%rsp)
	movq	592(%rsp), %rsi
	movq	%rsi, 336(%rsp)
	movq	600(%rsp), %rcx
	movq	%rcx, 344(%rsp)
	leal	-64(%rdi), %edx
	movl	%edx, %eax
	shrl	$6, %eax
	movl	%eax, %edi
	addq	$1, %rdi
	salq	$8, %rdi
	addq	%rdi, %rbx
	movq	%r9, 96(%rsp)
	movq	%r8, 104(%rsp)
	movq	%rsi, 112(%rsp)
	movq	%rcx, 120(%rsp)
	sall	$6, %eax
	subl	%eax, %edx
.L441:
	testl	%edx, %edx
	je	.L445
.L446:
	addq	$4, %rbx
	mulss	-4(%rbx), %xmm0
	subl	$1, %edx
	jne	.L446
	jmp	.L445
.L448:
	mulss	96(%rsp,%rdx,4), %xmm0
	addq	$1, %rax
	movq	%rax, %rdx
	cmpq	$8, %rax
	jne	.L448
	movss	%xmm0, (%r12)
	jmp	.L451
.L450:
	movss	.LC0(%rip), %xmm0
.L438:
	movq	544(%rsp), %rdi
	movq	%rdi, 352(%rsp)
	movq	552(%rsp), %rsi
	movq	%rsi, 360(%rsp)
	movq	560(%rsp), %rcx
	movq	%rcx, 368(%rsp)
	movq	568(%rsp), %rax
	movq	%rax, 376(%rsp)
	movq	%rdi, 384(%rsp)
	movq	%rsi, 392(%rsp)
	movq	%rcx, 400(%rsp)
	movq	%rax, 408(%rsp)
	movq	%rdi, 416(%rsp)
	movq	%rsi, 424(%rsp)
	movq	%rcx, 432(%rsp)
	movq	%rax, 440(%rsp)
	movq	%rdi, 448(%rsp)
	movq	%rsi, 456(%rsp)
	movq	%rcx, 464(%rsp)
	movq	%rax, 472(%rsp)
	movq	%rdi, 480(%rsp)
	movq	%rsi, 488(%rsp)
	movq	%rcx, 496(%rsp)
	movq	%rax, 504(%rsp)
	movq	%rdi, 512(%rsp)
	movq	%rsi, 520(%rsp)
	movq	%rcx, 528(%rsp)
	movq	%rax, 536(%rsp)
	jmp	.L441
.L445:
	movq	96(%rsp), %rax
	movq	%rax, 128(%rsp)
	movq	104(%rsp), %rax
	movq	%rax, 136(%rsp)
	movq	112(%rsp), %rax
	movq	%rax, 144(%rsp)
	movq	120(%rsp), %rax
	movq	%rax, 152(%rsp)
	movaps	128(%rsp), %xmm1
	mulps	544(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movaps	144(%rsp), %xmm1
	mulps	560(%rsp), %xmm1
	movaps	%xmm1, 32(%rsp)
	movq	48(%rsp), %rdx
	movaps	48(%rsp), %xmm6
	movaps	%xmm6, 64(%rsp)
	movq	32(%rsp), %rax
	movaps	%xmm1, 80(%rsp)
	movq	%rdx, 160(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 168(%rsp)
	movq	%rax, 176(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 184(%rsp)
	movaps	512(%rsp), %xmm1
	mulps	480(%rsp), %xmm1
	movaps	%xmm1, 16(%rsp)
	movaps	528(%rsp), %xmm1
	mulps	496(%rsp), %xmm1
	movaps	%xmm1, (%rsp)
	movq	16(%rsp), %rdx
	movaps	16(%rsp), %xmm6
	movaps	%xmm6, 64(%rsp)
	movq	(%rsp), %rax
	movaps	%xmm1, 80(%rsp)
	movq	%rdx, 192(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 200(%rsp)
	movq	%rax, 208(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 216(%rsp)
	movaps	48(%rsp), %xmm6
	mulps	16(%rsp), %xmm6
	movaps	%xmm6, 48(%rsp)
	movaps	32(%rsp), %xmm7
	mulps	%xmm1, %xmm7
	movaps	%xmm7, 32(%rsp)
	movq	48(%rsp), %rdx
	movaps	%xmm6, 64(%rsp)
	movq	32(%rsp), %rax
	movaps	%xmm7, 80(%rsp)
	movq	%rdx, 224(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 232(%rsp)
	movq	%rax, 240(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 248(%rsp)
	movaps	448(%rsp), %xmm1
	mulps	416(%rsp), %xmm1
	movaps	%xmm1, 16(%rsp)
	movaps	464(%rsp), %xmm1
	mulps	432(%rsp), %xmm1
	movaps	%xmm1, (%rsp)
	movq	16(%rsp), %rdx
	movaps	16(%rsp), %xmm6
	movaps	%xmm6, 64(%rsp)
	movq	(%rsp), %rax
	movaps	%xmm1, 80(%rsp)
	movq	%rdx, 256(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 264(%rsp)
	movq	%rax, 272(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 280(%rsp)
	movaps	48(%rsp), %xmm6
	mulps	16(%rsp), %xmm6
	movaps	%xmm6, 48(%rsp)
	movaps	32(%rsp), %xmm7
	mulps	%xmm1, %xmm7
	movaps	%xmm7, 32(%rsp)
	movq	48(%rsp), %rdx
	movaps	%xmm6, 64(%rsp)
	movq	32(%rsp), %rax
	movaps	%xmm7, 80(%rsp)
	movq	%rdx, 288(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 296(%rsp)
	movq	%rax, 304(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 312(%rsp)
	movaps	384(%rsp), %xmm1
	mulps	352(%rsp), %xmm1
	movaps	%xmm1, 16(%rsp)
	movaps	400(%rsp), %xmm1
	mulps	368(%rsp), %xmm1
	movaps	%xmm1, (%rsp)
	movq	16(%rsp), %rdx
	movaps	16(%rsp), %xmm6
	movaps	%xmm6, 64(%rsp)
	movq	(%rsp), %rax
	movaps	%xmm1, 80(%rsp)
	movq	%rdx, 896(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 904(%rsp)
	movq	%rax, 912(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 920(%rsp)
	movaps	48(%rsp), %xmm6
	mulps	16(%rsp), %xmm6
	movaps	%xmm6, 48(%rsp)
	movq	48(%rsp), %rdx
	movaps	%xmm6, 64(%rsp)
	movaps	32(%rsp), %xmm7
	mulps	%xmm1, %xmm7
	movaps	%xmm7, 48(%rsp)
	movq	48(%rsp), %rax
	movaps	%xmm7, 80(%rsp)
	movq	%rdx, 96(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 104(%rsp)
	movq	%rax, 112(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 120(%rsp)
	movl	$0, %edx
	movl	$0, %eax
	jmp	.L448
.L451:
	leaq	-24(%rbp), %rsp
	popq	%rbx
	popq	%r12
	popq	%r13
	popq	%rbp
	ret

simd_v10_combine:
	pushq	%rbp
	movq	%rsp, %rbp
	pushq	%r13
	pushq	%r12
	pushq	%rbx
	andq	$-32, %rsp
	subq	$1152, %rsp
	movq	%rdi, %r13
	movq	%rsi, %r12
	call	get_vec_start
	movq	%rax, %rbx
	movq	%r13, %rdi
	call	vec_length
	movl	%eax, %edx
	movl	$0, %esi
	movl	$0, %ecx
	movss	.LC0(%rip), %xmm0
.L455:
	movss	%xmm0, 96(%rsp,%rsi,4)
	addq	$1, %rcx
	movq	%rcx, %rsi
	cmpq	$8, %rcx
	jne	.L455
	movq	96(%rsp), %rcx
	movq	%rcx, 736(%rsp)
	movq	104(%rsp), %rcx
	movq	%rcx, 744(%rsp)
	movq	112(%rsp), %rcx
	movq	%rcx, 752(%rsp)
	movq	120(%rsp), %rcx
	movq	%rcx, 760(%rsp)
	testb	$31, %bl
	je	.L468
	testl	%eax, %eax
	je	.L469
	movss	.LC0(%rip), %xmm0
.L461:
	addq	$4, %rbx
	mulss	-4(%rbx), %xmm0
	subl	$1, %edx
	testb	$31, %bl
	jne	.L458
	jmp	.L456
.L468:
	movss	.LC0(%rip), %xmm0
.L456:
	movl	%edx, %esi
	cmpl	$79, %edx
	ja	.L459
	movq	736(%rsp), %rdi
	movq	%rdi, 480(%rsp)
	movq	744(%rsp), %rsi
	movq	%rsi, 488(%rsp)
	movq	752(%rsp), %rcx
	movq	%rcx, 496(%rsp)
	movq	760(%rsp), %rax
	movq	%rax, 504(%rsp)
	movq	%rdi, 512(%rsp)
	movq	%rsi, 520(%rsp)
	movq	%rcx, 528(%rsp)
	movq	%rax, 536(%rsp)
	movq	%rdi, 544(%rsp)
	movq	%rsi, 552(%rsp)
	movq	%rcx, 560(%rsp)
	movq	%rax, 568(%rsp)
	movq	%rdi, 576(%rsp)
	movq	%rsi, 584(%rsp)
	movq	%rcx, 592(%rsp)
	movq	%rax, 600(%rsp)
	movq	%rdi, 608(%rsp)
	movq	%rsi, 616(%rsp)
	movq	%rcx, 624(%rsp)
	movq	%rax, 632(%rsp)
	movq	%rdi, 640(%rsp)
	movq	%rsi, 648(%rsp)
	movq	%rcx, 656(%rsp)
	movq	%rax, 664(%rsp)
	movq	%rdi, 672(%rsp)
	movq	%rsi, 680(%rsp)
	movq	%rcx, 688(%rsp)
	movq	%rax, 696(%rsp)
	movq	%rdi, 704(%rsp)
	movq	%rsi, 712(%rsp)
	movq	%rcx, 720(%rsp)
	movq	%rax, 728(%rsp)
	jmp	.L460
.L458:
	testl	%edx, %edx
	jne	.L461
	jmp	.L457
.L459:
	movq	736(%rsp), %rdi
	movq	%rdi, 768(%rsp)
	movq	744(%rsp), %rcx
	movq	%rcx, 776(%rsp)
	movq	752(%rsp), %rdx
	movq	%rdx, 784(%rsp)
	movq	760(%rsp), %rax
	movq	%rax, 792(%rsp)
	movq	%rdi, 480(%rsp)
	movq	%rcx, 488(%rsp)
	movq	%rdx, 496(%rsp)
	movq	%rax, 504(%rsp)
	movq	%rdi, 512(%rsp)
	movq	%rcx, 520(%rsp)
	movq	%rdx, 528(%rsp)
	movq	%rax, 536(%rsp)
	movq	%rdi, 544(%rsp)
	movq	%rcx, 552(%rsp)
	movq	%rdx, 560(%rsp)
	movq	%rax, 568(%rsp)
	movq	%rdi, 576(%rsp)
	movq	%rcx, 584(%rsp)
	movq	%rdx, 592(%rsp)
	movq	%rax, 600(%rsp)
	movq	%rdi, 608(%rsp)
	movq	%rcx, 616(%rsp)
	movq	%rdx, 624(%rsp)
	movq	%rax, 632(%rsp)
	movq	%rdi, 640(%rsp)
	movq	%rcx, 648(%rsp)
	movq	%rdx, 656(%rsp)
	movq	%rax, 664(%rsp)
	movq	%rdi, 672(%rsp)
	movq	%rcx, 680(%rsp)
	movq	%rdx, 688(%rsp)
	movq	%rax, 696(%rsp)
	movq	%rdi, 704(%rsp)
	movq	%rcx, 712(%rsp)
	movq	%rdx, 720(%rsp)
	movq	%rax, 728(%rsp)
.L463:
	movq	(%rbx), %rax
	movq	%rax, 1120(%rsp)
	movq	8(%rbx), %rax
	movq	%rax, 1128(%rsp)
	movq	16(%rbx), %rax
	movq	%rax, 1136(%rsp)
	movq	24(%rbx), %rax
	movq	%rax, 1144(%rsp)
	movq	32(%rbx), %rax
	movq	%rax, 1088(%rsp)
	movq	40(%rbx), %rax
	movq	%rax, 1096(%rsp)
	movq	48(%rbx), %rax
	movq	%rax, 1104(%rsp)
	movq	56(%rbx), %rax
	movq	%rax, 1112(%rsp)
	movq	64(%rbx), %rax
	movq	%rax, 1056(%rsp)
	movq	72(%rbx), %rax
	movq	%rax, 1064(%rsp)
	movq	80(%rbx), %rax
	movq	%rax, 1072(%rsp)
	movq	88(%rbx), %rax
	movq	%rax, 1080(%rsp)
	movq	96(%rbx), %rax
	movq	%rax, 1024(%rsp)
	movq	104(%rbx), %rax
	movq	%rax, 1032(%rsp)
	movq	112(%rbx), %rax
	movq	%rax, 1040(%rsp)
	movq	120(%rbx), %rax
	movq	%rax, 1048(%rsp)
	movq	128(%rbx), %rax
	movq	%rax, 992(%rsp)
	movq	136(%rbx), %rax
	movq	%rax, 1000(%rsp)
	movq	144(%rbx), %rax
	movq	%rax, 1008(%rsp)
	movq	152(%rbx), %rax
	movq	%rax, 1016(%rsp)
	movq	160(%rbx), %rax
	movq	%rax, 960(%rsp)
	movq	168(%rbx), %rax
	movq	%rax, 968(%rsp)
	movq	176(%rbx), %rax
	movq	%rax, 976(%rsp)
	movq	184(%rbx), %rax
	movq	%rax, 984(%rsp)
	movq	192(%rbx), %rax
	movq	%rax, 928(%rsp)
	movq	200(%rbx), %rax
	movq	%rax, 936(%rsp)
	movq	208(%rbx), %rax
	movq	%rax, 944(%rsp)
	movq	216(%rbx), %rax
	movq	%rax, 952(%rsp)
	movq	224(%rbx), %rax
	movq	%rax, 896(%rsp)
	movq	232(%rbx), %rax
	movq	%rax, 904(%rsp)
	movq	240(%rbx), %rax
	movq	%rax, 912(%rsp)
	movq	248(%rbx), %rax
	movq	%rax, 920(%rsp)
	movq	256(%rbx), %rax
	movq	%rax, 864(%rsp)
	movq	264(%rbx), %rax
	movq	%rax, 872(%rsp)
	movq	272(%rbx), %rax
	movq	%rax, 880(%rsp)
	movq	280(%rbx), %rax
	movq	%rax, 888(%rsp)
	movq	288(%rbx), %rax
	movq	%rax, 832(%rsp)
	movq	296(%rbx), %rax
	movq	%rax, 840(%rsp)
	movq	304(%rbx), %rax
	movq	%rax, 848(%rsp)
	movq	312(%rbx), %rax
	movq	%rax, 856(%rsp)
	movaps	768(%rsp), %xmm1
	mulps	1120(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movq	48(%rsp), %rdx
	movaps	%xmm1, 64(%rsp)
	movaps	1136(%rsp), %xmm1
	mulps	784(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movq	48(%rsp), %rax
	movaps	%xmm1, 80(%rsp)
	movq	%rdx, 800(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 808(%rsp)
	movq	%rax, 816(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 824(%rsp)
	movaps	1088(%rsp), %xmm1
	mulps	736(%rsp), %xmm1
	movaps	%xmm1, 32(%rsp)
	movaps	1104(%rsp), %xmm1
	mulps	752(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movq	32(%rsp), %rdx
	movaps	32(%rsp), %xmm4
	movaps	%xmm4, 64(%rsp)
	movq	48(%rsp), %rax
	movaps	%xmm1, 80(%rsp)
	movq	%rdx, 736(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 744(%rsp)
	movq	%rax, 752(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 760(%rsp)
	movaps	704(%rsp), %xmm1
	mulps	1056(%rsp), %xmm1
	movaps	%xmm1, 32(%rsp)
	movaps	1072(%rsp), %xmm1
	mulps	720(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movq	32(%rsp), %rdx
	movaps	32(%rsp), %xmm6
	movaps	%xmm6, 64(%rsp)
	movq	48(%rsp), %rax
	movaps	%xmm1, 80(%rsp)
	movq	%rdx, 704(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 712(%rsp)
	movq	%rax, 720(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 728(%rsp)
	movaps	1024(%rsp), %xmm1
	mulps	672(%rsp), %xmm1
	movaps	%xmm1, 32(%rsp)
	movaps	1040(%rsp), %xmm1
	mulps	688(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movq	32(%rsp), %rdx
	movaps	32(%rsp), %xmm2
	movaps	%xmm2, 64(%rsp)
	movq	48(%rsp), %rax
	movaps	%xmm1, 80(%rsp)
	movq	%rdx, 672(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 680(%rsp)
	movq	%rax, 688(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 696(%rsp)
	movaps	992(%rsp), %xmm1
	mulps	640(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movaps	1008(%rsp), %xmm1
	mulps	656(%rsp), %xmm1
	movaps	%xmm1, 32(%rsp)
	movq	48(%rsp), %rdx
	movaps	48(%rsp), %xmm4
	movaps	%xmm4, 64(%rsp)
	movq	32(%rsp), %rax
	movaps	%xmm1, 80(%rsp)
	movq	%rdx, 640(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 648(%rsp)
	movq	%rax, 656(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 664(%rsp)
	movaps	960(%rsp), %xmm1
	mulps	608(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movaps	976(%rsp), %xmm1
	mulps	624(%rsp), %xmm1
	movaps	%xmm1, 32(%rsp)
	movq	48(%rsp), %rdx
	movaps	48(%rsp), %xmm6
	movaps	%xmm6, 64(%rsp)
	movq	32(%rsp), %rax
	movaps	%xmm1, 80(%rsp)
	movq	%rdx, 608(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 616(%rsp)
	movq	%rax, 624(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 632(%rsp)
	movaps	576(%rsp), %xmm1
	mulps	928(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movaps	592(%rsp), %xmm1
	mulps	944(%rsp), %xmm1
	movaps	%xmm1, 32(%rsp)
	movq	48(%rsp), %rdx
	movaps	48(%rsp), %xmm2
	movaps	%xmm2, 64(%rsp)
	movq	32(%rsp), %rax
	movaps	%xmm1, 80(%rsp)
	movq	%rdx, 576(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 584(%rsp)
	movq	%rax, 592(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 600(%rsp)
	movaps	544(%rsp), %xmm1
	mulps	896(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movaps	560(%rsp), %xmm1
	mulps	912(%rsp), %xmm1
	movaps	%xmm1, 32(%rsp)
	movq	48(%rsp), %rdx
	movaps	48(%rsp), %xmm4
	movaps	%xmm4, 64(%rsp)
	movq	32(%rsp), %rax
	movaps	%xmm1, 80(%rsp)
	movq	%rdx, 544(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 552(%rsp)
	movq	%rax, 560(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 568(%rsp)
	movaps	512(%rsp), %xmm1
	mulps	864(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movaps	528(%rsp), %xmm1
	mulps	880(%rsp), %xmm1
	movaps	%xmm1, 32(%rsp)
	movq	48(%rsp), %rcx
	movaps	48(%rsp), %xmm6
	movaps	%xmm6, 64(%rsp)
	movq	32(%rsp), %rax
	movaps	%xmm1, 80(%rsp)
	movq	%rcx, 512(%rsp)
	movq	72(%rsp), %rcx
	movq	%rcx, 520(%rsp)
	movq	%rax, 528(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 536(%rsp)
	movaps	480(%rsp), %xmm1
	mulps	832(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movaps	496(%rsp), %xmm1
	mulps	848(%rsp), %xmm1
	movaps	%xmm1, 32(%rsp)
	movq	48(%rsp), %rcx
	movaps	48(%rsp), %xmm2
	movaps	%xmm2, 64(%rsp)
	movq	32(%rsp), %rax
	movaps	%xmm1, 80(%rsp)
	movq	%rcx, 480(%rsp)
	movq	72(%rsp), %rcx
	movq	%rcx, 488(%rsp)
	movq	%rax, 496(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 504(%rsp)
	addq	$320, %rbx
	leal	-80(%rsi), %edx
	cmpl	$79, %edx
	jbe	.L462
	movl	%edx, %esi
	movq	800(%rsp), %rax
	movq	%rax, 768(%rsp)
	movq	808(%rsp), %rax
	movq	%rax, 776(%rsp)
	movq	816(%rsp), %rax
	movq	%rax, 784(%rsp)
	movq	824(%rsp), %rax
	movq	%rax, 792(%rsp)
	jmp	.L463
.L462:
	movq	800(%rsp), %rdi
	movq	%rdi, 128(%rsp)
	movq	808(%rsp), %rsi
	movq	%rsi, 136(%rsp)
	movq	816(%rsp), %rcx
	movq	%rcx, 144(%rsp)
	movq	824(%rsp), %rax
	movq	%rax, 152(%rsp)
	movq	%rdi, 96(%rsp)
	movq	%rsi, 104(%rsp)
	movq	%rcx, 112(%rsp)
	movq	%rax, 120(%rsp)
.L460:
	testl	%edx, %edx
	je	.L464
.L465:
	addq	$4, %rbx
	mulss	-4(%rbx), %xmm0
	subl	$1, %edx
	jne	.L465
.L464:
	movq	96(%rsp), %rax
	movq	%rax, 448(%rsp)
	movq	104(%rsp), %rax
	movq	%rax, 456(%rsp)
	movq	112(%rsp), %rax
	movq	%rax, 464(%rsp)
	movq	120(%rsp), %rax
	movq	%rax, 472(%rsp)
	movaps	448(%rsp), %xmm1
	mulps	736(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movaps	464(%rsp), %xmm1
	mulps	752(%rsp), %xmm1
	movaps	%xmm1, 32(%rsp)
	movq	48(%rsp), %rdx
	movaps	48(%rsp), %xmm4
	movaps	%xmm4, 64(%rsp)
	movq	32(%rsp), %rax
	movaps	%xmm1, %xmm5
	movaps	%xmm1, 80(%rsp)
	movq	%rdx, 416(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 424(%rsp)
	movq	%rax, 432(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 440(%rsp)
	movaps	704(%rsp), %xmm1
	mulps	672(%rsp), %xmm1
	movaps	%xmm1, 16(%rsp)
	movaps	720(%rsp), %xmm1
	mulps	688(%rsp), %xmm1
	movaps	%xmm1, (%rsp)
	movq	16(%rsp), %rdx
	movaps	16(%rsp), %xmm6
	movaps	%xmm6, 64(%rsp)
	movq	(%rsp), %rax
	movaps	%xmm1, 80(%rsp)
	movq	%rdx, 384(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 392(%rsp)
	movq	%rax, 400(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 408(%rsp)
	mulps	%xmm6, %xmm4
	movaps	%xmm4, 48(%rsp)
	mulps	%xmm1, %xmm5
	movaps	%xmm5, 32(%rsp)
	movq	48(%rsp), %rdx
	movaps	%xmm4, 64(%rsp)
	movq	32(%rsp), %rax
	movaps	%xmm5, 80(%rsp)
	movq	%rdx, 352(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 360(%rsp)
	movq	%rax, 368(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 376(%rsp)
	movaps	640(%rsp), %xmm1
	mulps	608(%rsp), %xmm1
	movaps	%xmm1, 16(%rsp)
	movaps	656(%rsp), %xmm1
	mulps	624(%rsp), %xmm1
	movaps	%xmm1, (%rsp)
	movq	16(%rsp), %rdx
	movaps	16(%rsp), %xmm6
	movaps	%xmm6, 64(%rsp)
	movq	(%rsp), %rax
	movaps	%xmm1, 80(%rsp)
	movq	%rdx, 320(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 328(%rsp)
	movq	%rax, 336(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 344(%rsp)
	movaps	%xmm4, %xmm6
	mulps	16(%rsp), %xmm6
	movaps	%xmm6, 48(%rsp)
	movaps	%xmm5, %xmm7
	mulps	%xmm1, %xmm7
	movaps	%xmm7, 32(%rsp)
	movq	48(%rsp), %rdx
	movaps	%xmm6, %xmm4
	movaps	%xmm6, 64(%rsp)
	movq	32(%rsp), %rax
	movaps	%xmm7, %xmm5
	movaps	%xmm7, 80(%rsp)
	movq	%rdx, 288(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 296(%rsp)
	movq	%rax, 304(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 312(%rsp)
	movaps	576(%rsp), %xmm1
	mulps	544(%rsp), %xmm1
	movaps	%xmm1, 16(%rsp)
	movaps	592(%rsp), %xmm1
	mulps	560(%rsp), %xmm1
	movaps	%xmm1, (%rsp)
	movq	16(%rsp), %rdx
	movaps	16(%rsp), %xmm6
	movaps	%xmm6, 64(%rsp)
	movq	(%rsp), %rax
	movaps	%xmm1, 80(%rsp)
	movq	%rdx, 256(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 264(%rsp)
	movq	%rax, 272(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 280(%rsp)
	mulps	%xmm6, %xmm4
	movaps	%xmm4, 48(%rsp)
	mulps	%xmm1, %xmm5
	movaps	%xmm5, 32(%rsp)
	movq	48(%rsp), %rdx
	movaps	%xmm4, 64(%rsp)
	movq	32(%rsp), %rax
	movaps	%xmm5, 80(%rsp)
	movq	%rdx, 224(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 232(%rsp)
	movq	%rax, 240(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 248(%rsp)
	movaps	512(%rsp), %xmm1
	mulps	480(%rsp), %xmm1
	movaps	%xmm1, 16(%rsp)
	movaps	528(%rsp), %xmm1
	mulps	496(%rsp), %xmm1
	movaps	%xmm1, (%rsp)
	movq	16(%rsp), %rdx
	movaps	16(%rsp), %xmm6
	movaps	%xmm6, 64(%rsp)
	movq	(%rsp), %rax
	movaps	%xmm1, 80(%rsp)
	movq	%rdx, 192(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 200(%rsp)
	movq	%rax, 208(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 216(%rsp)
	movaps	%xmm4, %xmm6
	mulps	16(%rsp), %xmm6
	movaps	%xmm6, 48(%rsp)
	movq	48(%rsp), %rdx
	movaps	%xmm6, 64(%rsp)
	movaps	%xmm5, %xmm7
	mulps	%xmm1, %xmm7
	movaps	%xmm7, 48(%rsp)
	movq	48(%rsp), %rax
	movaps	%xmm7, 80(%rsp)
	movq	%rdx, 96(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 104(%rsp)
	movq	%rax, 112(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 120(%rsp)
	movl	$0, %edx
	movl	$0, %eax
.L467:
	mulss	96(%rsp,%rdx,4), %xmm0
	addq	$1, %rax
	movq	%rax, %rdx
	cmpq	$8, %rax
	jne	.L467
	movss	%xmm0, (%r12)
	jmp	.L470
.L469:
	movss	.LC0(%rip), %xmm0
.L457:
	movq	736(%rsp), %rdi
	movq	%rdi, 480(%rsp)
	movq	744(%rsp), %rsi
	movq	%rsi, 488(%rsp)
	movq	752(%rsp), %rcx
	movq	%rcx, 496(%rsp)
	movq	760(%rsp), %rax
	movq	%rax, 504(%rsp)
	movq	%rdi, 512(%rsp)
	movq	%rsi, 520(%rsp)
	movq	%rcx, 528(%rsp)
	movq	%rax, 536(%rsp)
	movq	%rdi, 544(%rsp)
	movq	%rsi, 552(%rsp)
	movq	%rcx, 560(%rsp)
	movq	%rax, 568(%rsp)
	movq	%rdi, 576(%rsp)
	movq	%rsi, 584(%rsp)
	movq	%rcx, 592(%rsp)
	movq	%rax, 600(%rsp)
	movq	%rdi, 608(%rsp)
	movq	%rsi, 616(%rsp)
	movq	%rcx, 624(%rsp)
	movq	%rax, 632(%rsp)
	movq	%rdi, 640(%rsp)
	movq	%rsi, 648(%rsp)
	movq	%rcx, 656(%rsp)
	movq	%rax, 664(%rsp)
	movq	%rdi, 672(%rsp)
	movq	%rsi, 680(%rsp)
	movq	%rcx, 688(%rsp)
	movq	%rax, 696(%rsp)
	movq	%rdi, 704(%rsp)
	movq	%rsi, 712(%rsp)
	movq	%rcx, 720(%rsp)
	movq	%rax, 728(%rsp)
	jmp	.L460
.L470:
	leaq	-24(%rbp), %rsp
	popq	%rbx
	popq	%r12
	popq	%r13
	popq	%rbp
	ret

simd_v12_combine:
	pushq	%rbp
	movq	%rsp, %rbp
	pushq	%r13
	pushq	%r12
	pushq	%rbx
	andq	$-32, %rsp
	subq	$1344, %rsp
	movq	%rdi, %r13
	movq	%rsi, %r12
	call	get_vec_start
	movq	%rax, %rbx
	movq	%r13, %rdi
	call	vec_length
	movl	%eax, %edx
	movl	$0, %esi
	movl	$0, %ecx
	movss	.LC0(%rip), %xmm0
.L474:
	movss	%xmm0, 96(%rsp,%rsi,4)
	addq	$1, %rcx
	movq	%rcx, %rsi
	cmpq	$8, %rcx
	jne	.L474
	movq	96(%rsp), %rcx
	movq	%rcx, 864(%rsp)
	movq	104(%rsp), %rcx
	movq	%rcx, 872(%rsp)
	movq	112(%rsp), %rcx
	movq	%rcx, 880(%rsp)
	movq	120(%rsp), %rcx
	movq	%rcx, 888(%rsp)
	testb	$31, %bl
	je	.L487
	testl	%eax, %eax
	je	.L488
	movss	.LC0(%rip), %xmm0
.L480:
	addq	$4, %rbx
	mulss	-4(%rbx), %xmm0
	subl	$1, %edx
	testb	$31, %bl
	jne	.L477
	jmp	.L475
.L487:
	movss	.LC0(%rip), %xmm0
.L475:
	movl	%edx, %esi
	cmpl	$95, %edx
	ja	.L478
	movq	864(%rsp), %rdi
	movq	%rdi, 544(%rsp)
	movq	872(%rsp), %rsi
	movq	%rsi, 552(%rsp)
	movq	880(%rsp), %rcx
	movq	%rcx, 560(%rsp)
	movq	888(%rsp), %rax
	movq	%rax, 568(%rsp)
	movq	%rdi, 576(%rsp)
	movq	%rsi, 584(%rsp)
	movq	%rcx, 592(%rsp)
	movq	%rax, 600(%rsp)
	movq	%rdi, 608(%rsp)
	movq	%rsi, 616(%rsp)
	movq	%rcx, 624(%rsp)
	movq	%rax, 632(%rsp)
	movq	%rdi, 640(%rsp)
	movq	%rsi, 648(%rsp)
	movq	%rcx, 656(%rsp)
	movq	%rax, 664(%rsp)
	movq	%rdi, 672(%rsp)
	movq	%rsi, 680(%rsp)
	movq	%rcx, 688(%rsp)
	movq	%rax, 696(%rsp)
	movq	%rdi, 704(%rsp)
	movq	%rsi, 712(%rsp)
	movq	%rcx, 720(%rsp)
	movq	%rax, 728(%rsp)
	movq	%rdi, 736(%rsp)
	movq	%rsi, 744(%rsp)
	movq	%rcx, 752(%rsp)
	movq	%rax, 760(%rsp)
	movq	%rdi, 768(%rsp)
	movq	%rsi, 776(%rsp)
	movq	%rcx, 784(%rsp)
	movq	%rax, 792(%rsp)
	movq	%rdi, 800(%rsp)
	movq	%rsi, 808(%rsp)
	movq	%rcx, 816(%rsp)
	movq	%rax, 824(%rsp)
	movq	%rdi, 832(%rsp)
	movq	%rsi, 840(%rsp)
	movq	%rcx, 848(%rsp)
	movq	%rax, 856(%rsp)
	jmp	.L479
.L477:
	testl	%edx, %edx
	jne	.L480
	jmp	.L476
.L478:
	movq	864(%rsp), %rdi
	movq	%rdi, 896(%rsp)
	movq	872(%rsp), %rcx
	movq	%rcx, 904(%rsp)
	movq	880(%rsp), %rdx
	movq	%rdx, 912(%rsp)
	movq	888(%rsp), %rax
	movq	%rax, 920(%rsp)
	movq	%rdi, 544(%rsp)
	movq	%rcx, 552(%rsp)
	movq	%rdx, 560(%rsp)
	movq	%rax, 568(%rsp)
	movq	%rdi, 576(%rsp)
	movq	%rcx, 584(%rsp)
	movq	%rdx, 592(%rsp)
	movq	%rax, 600(%rsp)
	movq	%rdi, 608(%rsp)
	movq	%rcx, 616(%rsp)
	movq	%rdx, 624(%rsp)
	movq	%rax, 632(%rsp)
	movq	%rdi, 640(%rsp)
	movq	%rcx, 648(%rsp)
	movq	%rdx, 656(%rsp)
	movq	%rax, 664(%rsp)
	movq	%rdi, 672(%rsp)
	movq	%rcx, 680(%rsp)
	movq	%rdx, 688(%rsp)
	movq	%rax, 696(%rsp)
	movq	%rdi, 704(%rsp)
	movq	%rcx, 712(%rsp)
	movq	%rdx, 720(%rsp)
	movq	%rax, 728(%rsp)
	movq	%rdi, 736(%rsp)
	movq	%rcx, 744(%rsp)
	movq	%rdx, 752(%rsp)
	movq	%rax, 760(%rsp)
	movq	%rdi, 768(%rsp)
	movq	%rcx, 776(%rsp)
	movq	%rdx, 784(%rsp)
	movq	%rax, 792(%rsp)
	movq	%rdi, 800(%rsp)
	movq	%rcx, 808(%rsp)
	movq	%rdx, 816(%rsp)
	movq	%rax, 824(%rsp)
	movq	%rdi, 832(%rsp)
	movq	%rcx, 840(%rsp)
	movq	%rdx, 848(%rsp)
	movq	%rax, 856(%rsp)
.L482:
	movq	(%rbx), %rax
	movq	%rax, 1312(%rsp)
	movq	8(%rbx), %rax
	movq	%rax, 1320(%rsp)
	movq	16(%rbx), %rax
	movq	%rax, 1328(%rsp)
	movq	24(%rbx), %rax
	movq	%rax, 1336(%rsp)
	movq	32(%rbx), %rax
	movq	%rax, 1280(%rsp)
	movq	40(%rbx), %rax
	movq	%rax, 1288(%rsp)
	movq	48(%rbx), %rax
	movq	%rax, 1296(%rsp)
	movq	56(%rbx), %rax
	movq	%rax, 1304(%rsp)
	movq	64(%rbx), %rax
	movq	%rax, 1248(%rsp)
	movq	72(%rbx), %rax
	movq	%rax, 1256(%rsp)
	movq	80(%rbx), %rax
	movq	%rax, 1264(%rsp)
	movq	88(%rbx), %rax
	movq	%rax, 1272(%rsp)
	movq	96(%rbx), %rax
	movq	%rax, 1216(%rsp)
	movq	104(%rbx), %rax
	movq	%rax, 1224(%rsp)
	movq	112(%rbx), %rax
	movq	%rax, 1232(%rsp)
	movq	120(%rbx), %rax
	movq	%rax, 1240(%rsp)
	movq	128(%rbx), %rax
	movq	%rax, 1184(%rsp)
	movq	136(%rbx), %rax
	movq	%rax, 1192(%rsp)
	movq	144(%rbx), %rax
	movq	%rax, 1200(%rsp)
	movq	152(%rbx), %rax
	movq	%rax, 1208(%rsp)
	movq	160(%rbx), %rax
	movq	%rax, 1152(%rsp)
	movq	168(%rbx), %rax
	movq	%rax, 1160(%rsp)
	movq	176(%rbx), %rax
	movq	%rax, 1168(%rsp)
	movq	184(%rbx), %rax
	movq	%rax, 1176(%rsp)
	movq	192(%rbx), %rax
	movq	%rax, 1120(%rsp)
	movq	200(%rbx), %rax
	movq	%rax, 1128(%rsp)
	movq	208(%rbx), %rax
	movq	%rax, 1136(%rsp)
	movq	216(%rbx), %rax
	movq	%rax, 1144(%rsp)
	movq	224(%rbx), %rax
	movq	%rax, 1088(%rsp)
	movq	232(%rbx), %rax
	movq	%rax, 1096(%rsp)
	movq	240(%rbx), %rax
	movq	%rax, 1104(%rsp)
	movq	248(%rbx), %rax
	movq	%rax, 1112(%rsp)
	movq	256(%rbx), %rax
	movq	%rax, 1056(%rsp)
	movq	264(%rbx), %rax
	movq	%rax, 1064(%rsp)
	movq	272(%rbx), %rax
	movq	%rax, 1072(%rsp)
	movq	280(%rbx), %rax
	movq	%rax, 1080(%rsp)
	movq	288(%rbx), %rax
	movq	%rax, 1024(%rsp)
	movq	296(%rbx), %rax
	movq	%rax, 1032(%rsp)
	movq	304(%rbx), %rax
	movq	%rax, 1040(%rsp)
	movq	312(%rbx), %rax
	movq	%rax, 1048(%rsp)
	movq	320(%rbx), %rax
	movq	%rax, 992(%rsp)
	movq	328(%rbx), %rax
	movq	%rax, 1000(%rsp)
	movq	336(%rbx), %rax
	movq	%rax, 1008(%rsp)
	movq	344(%rbx), %rax
	movq	%rax, 1016(%rsp)
	movq	352(%rbx), %rax
	movq	%rax, 960(%rsp)
	movq	360(%rbx), %rax
	movq	%rax, 968(%rsp)
	movq	368(%rbx), %rax
	movq	%rax, 976(%rsp)
	movq	376(%rbx), %rax
	movq	%rax, 984(%rsp)
	movaps	896(%rsp), %xmm1
	mulps	1312(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movq	48(%rsp), %rdx
	movaps	%xmm1, 64(%rsp)
	movaps	912(%rsp), %xmm1
	mulps	1328(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movq	48(%rsp), %rax
	movaps	%xmm1, 80(%rsp)
	movq	%rdx, 928(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 936(%rsp)
	movq	%rax, 944(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 952(%rsp)
	movaps	1280(%rsp), %xmm1
	mulps	864(%rsp), %xmm1
	movaps	%xmm1, 32(%rsp)
	movaps	1296(%rsp), %xmm1
	mulps	880(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movq	32(%rsp), %rdx
	movaps	32(%rsp), %xmm4
	movaps	%xmm4, 64(%rsp)
	movq	48(%rsp), %rax
	movaps	%xmm1, 80(%rsp)
	movq	%rdx, 864(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 872(%rsp)
	movq	%rax, 880(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 888(%rsp)
	movaps	1248(%rsp), %xmm1
	mulps	832(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movaps	1264(%rsp), %xmm1
	mulps	848(%rsp), %xmm1
	movaps	%xmm1, 32(%rsp)
	movq	48(%rsp), %rdx
	movaps	48(%rsp), %xmm6
	movaps	%xmm6, 64(%rsp)
	movq	32(%rsp), %rax
	movaps	%xmm1, 80(%rsp)
	movq	%rdx, 832(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 840(%rsp)
	movq	%rax, 848(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 856(%rsp)
	movaps	1216(%rsp), %xmm1
	mulps	800(%rsp), %xmm1
	movaps	%xmm1, 32(%rsp)
	movaps	1232(%rsp), %xmm1
	mulps	816(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movq	32(%rsp), %rdx
	movaps	32(%rsp), %xmm2
	movaps	%xmm2, 64(%rsp)
	movq	48(%rsp), %rax
	movaps	%xmm1, 80(%rsp)
	movq	%rdx, 800(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 808(%rsp)
	movq	%rax, 816(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 824(%rsp)
	movaps	1184(%rsp), %xmm1
	mulps	768(%rsp), %xmm1
	movaps	%xmm1, 32(%rsp)
	movaps	1200(%rsp), %xmm1
	mulps	784(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movq	32(%rsp), %rdx
	movaps	32(%rsp), %xmm4
	movaps	%xmm4, 64(%rsp)
	movq	48(%rsp), %rax
	movaps	%xmm1, 80(%rsp)
	movq	%rdx, 768(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 776(%rsp)
	movq	%rax, 784(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 792(%rsp)
	movaps	1152(%rsp), %xmm1
	mulps	736(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movaps	1168(%rsp), %xmm1
	mulps	752(%rsp), %xmm1
	movaps	%xmm1, 32(%rsp)
	movq	48(%rsp), %rdx
	movaps	48(%rsp), %xmm6
	movaps	%xmm6, 64(%rsp)
	movq	32(%rsp), %rax
	movaps	%xmm1, 80(%rsp)
	movq	%rdx, 736(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 744(%rsp)
	movq	%rax, 752(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 760(%rsp)
	movaps	1120(%rsp), %xmm1
	mulps	704(%rsp), %xmm1
	movaps	%xmm1, 32(%rsp)
	movaps	1136(%rsp), %xmm1
	mulps	720(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movq	32(%rsp), %rdx
	movaps	32(%rsp), %xmm2
	movaps	%xmm2, 64(%rsp)
	movq	48(%rsp), %rax
	movaps	%xmm1, 80(%rsp)
	movq	%rdx, 704(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 712(%rsp)
	movq	%rax, 720(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 728(%rsp)
	movaps	672(%rsp), %xmm1
	mulps	1088(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movaps	688(%rsp), %xmm1
	mulps	1104(%rsp), %xmm1
	movaps	%xmm1, 32(%rsp)
	movq	48(%rsp), %rdx
	movaps	48(%rsp), %xmm4
	movaps	%xmm4, 64(%rsp)
	movq	32(%rsp), %rax
	movaps	%xmm1, 80(%rsp)
	movq	%rdx, 672(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 680(%rsp)
	movq	%rax, 688(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 696(%rsp)
	movaps	640(%rsp), %xmm1
	mulps	1056(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movaps	656(%rsp), %xmm1
	mulps	1072(%rsp), %xmm1
	movaps	%xmm1, 32(%rsp)
	movq	48(%rsp), %rdx
	movaps	48(%rsp), %xmm6
	movaps	%xmm6, 64(%rsp)
	movq	32(%rsp), %rax
	movaps	%xmm1, 80(%rsp)
	movq	%rdx, 640(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 648(%rsp)
	movq	%rax, 656(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 664(%rsp)
	movaps	608(%rsp), %xmm1
	mulps	1024(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movaps	624(%rsp), %xmm1
	mulps	1040(%rsp), %xmm1
	movaps	%xmm1, 32(%rsp)
	movq	48(%rsp), %rdx
	movaps	48(%rsp), %xmm2
	movaps	%xmm2, 64(%rsp)
	movq	32(%rsp), %rax
	movaps	%xmm1, 80(%rsp)
	movq	%rdx, 608(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 616(%rsp)
	movq	%rax, 624(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 632(%rsp)
	movaps	576(%rsp), %xmm1
	mulps	992(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movaps	592(%rsp), %xmm1
	mulps	1008(%rsp), %xmm1
	movaps	%xmm1, 32(%rsp)
	movq	48(%rsp), %rcx
	movaps	48(%rsp), %xmm4
	movaps	%xmm4, 64(%rsp)
	movq	32(%rsp), %rax
	movaps	%xmm1, 80(%rsp)
	movq	%rcx, 576(%rsp)
	movq	72(%rsp), %rcx
	movq	%rcx, 584(%rsp)
	movq	%rax, 592(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 600(%rsp)
	movaps	544(%rsp), %xmm1
	mulps	960(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movaps	560(%rsp), %xmm1
	mulps	976(%rsp), %xmm1
	movaps	%xmm1, 32(%rsp)
	movq	48(%rsp), %rcx
	movaps	48(%rsp), %xmm6
	movaps	%xmm6, 64(%rsp)
	movq	32(%rsp), %rax
	movaps	%xmm1, 80(%rsp)
	movq	%rcx, 544(%rsp)
	movq	72(%rsp), %rcx
	movq	%rcx, 552(%rsp)
	movq	%rax, 560(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 568(%rsp)
	addq	$384, %rbx
	leal	-96(%rsi), %edx
	cmpl	$95, %edx
	jbe	.L481
	movl	%edx, %esi
	movq	928(%rsp), %rax
	movq	%rax, 896(%rsp)
	movq	936(%rsp), %rax
	movq	%rax, 904(%rsp)
	movq	944(%rsp), %rax
	movq	%rax, 912(%rsp)
	movq	952(%rsp), %rax
	movq	%rax, 920(%rsp)
	jmp	.L482
.L481:
	movq	928(%rsp), %rdi
	movq	%rdi, 128(%rsp)
	movq	936(%rsp), %rsi
	movq	%rsi, 136(%rsp)
	movq	944(%rsp), %rcx
	movq	%rcx, 144(%rsp)
	movq	952(%rsp), %rax
	movq	%rax, 152(%rsp)
	movq	%rdi, 96(%rsp)
	movq	%rsi, 104(%rsp)
	movq	%rcx, 112(%rsp)
	movq	%rax, 120(%rsp)
.L479:
	testl	%edx, %edx
	je	.L483
.L484:
	addq	$4, %rbx
	mulss	-4(%rbx), %xmm0
	subl	$1, %edx
	jne	.L484
.L483:
	movq	96(%rsp), %rax
	movq	%rax, 512(%rsp)
	movq	104(%rsp), %rax
	movq	%rax, 520(%rsp)
	movq	112(%rsp), %rax
	movq	%rax, 528(%rsp)
	movq	120(%rsp), %rax
	movq	%rax, 536(%rsp)
	movaps	512(%rsp), %xmm1
	mulps	864(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movaps	528(%rsp), %xmm1
	mulps	880(%rsp), %xmm1
	movaps	%xmm1, 32(%rsp)
	movq	48(%rsp), %rdx
	movaps	48(%rsp), %xmm6
	movaps	%xmm6, 64(%rsp)
	movq	32(%rsp), %rax
	movaps	%xmm1, 80(%rsp)
	movq	%rdx, 480(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 488(%rsp)
	movq	%rax, 496(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 504(%rsp)
	movaps	832(%rsp), %xmm1
	mulps	800(%rsp), %xmm1
	movaps	%xmm1, 16(%rsp)
	movaps	848(%rsp), %xmm1
	mulps	816(%rsp), %xmm1
	movaps	%xmm1, (%rsp)
	movq	16(%rsp), %rdx
	movaps	16(%rsp), %xmm6
	movaps	%xmm6, 64(%rsp)
	movq	(%rsp), %rax
	movaps	%xmm1, 80(%rsp)
	movq	%rdx, 448(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 456(%rsp)
	movq	%rax, 464(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 472(%rsp)
	movaps	48(%rsp), %xmm2
	mulps	%xmm6, %xmm2
	movaps	%xmm2, 48(%rsp)
	movaps	32(%rsp), %xmm3
	mulps	%xmm1, %xmm3
	movaps	%xmm3, 32(%rsp)
	movq	48(%rsp), %rdx
	movaps	%xmm2, 64(%rsp)
	movq	32(%rsp), %rax
	movaps	%xmm3, 80(%rsp)
	movq	%rdx, 416(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 424(%rsp)
	movq	%rax, 432(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 440(%rsp)
	movaps	768(%rsp), %xmm1
	mulps	736(%rsp), %xmm1
	movaps	%xmm1, 16(%rsp)
	movaps	784(%rsp), %xmm1
	mulps	752(%rsp), %xmm1
	movaps	%xmm1, (%rsp)
	movq	16(%rsp), %rdx
	movaps	16(%rsp), %xmm4
	movaps	%xmm4, 64(%rsp)
	movq	(%rsp), %rax
	movaps	%xmm1, 80(%rsp)
	movq	%rdx, 384(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 392(%rsp)
	movq	%rax, 400(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 408(%rsp)
	movaps	%xmm2, %xmm4
	mulps	16(%rsp), %xmm4
	movaps	%xmm4, 48(%rsp)
	movaps	%xmm3, %xmm5
	mulps	%xmm1, %xmm5
	movaps	%xmm5, 32(%rsp)
	movq	48(%rsp), %rdx
	movaps	%xmm4, %xmm6
	movaps	%xmm4, 64(%rsp)
	movq	32(%rsp), %rax
	movaps	%xmm5, %xmm7
	movaps	%xmm5, 80(%rsp)
	movq	%rdx, 352(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 360(%rsp)
	movq	%rax, 368(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 376(%rsp)
	movaps	704(%rsp), %xmm1
	mulps	672(%rsp), %xmm1
	movaps	%xmm1, 16(%rsp)
	movaps	720(%rsp), %xmm1
	mulps	688(%rsp), %xmm1
	movaps	%xmm1, (%rsp)
	movq	16(%rsp), %rdx
	movaps	16(%rsp), %xmm2
	movaps	%xmm2, 64(%rsp)
	movq	(%rsp), %rax
	movaps	%xmm1, 80(%rsp)
	movq	%rdx, 320(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 328(%rsp)
	movq	%rax, 336(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 344(%rsp)
	mulps	%xmm2, %xmm6
	movaps	%xmm6, 48(%rsp)
	mulps	%xmm1, %xmm7
	movaps	%xmm7, 32(%rsp)
	movq	48(%rsp), %rdx
	movaps	%xmm6, %xmm4
	movaps	%xmm6, 64(%rsp)
	movq	32(%rsp), %rax
	movaps	%xmm7, %xmm5
	movaps	%xmm7, 80(%rsp)
	movq	%rdx, 288(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 296(%rsp)
	movq	%rax, 304(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 312(%rsp)
	movaps	640(%rsp), %xmm1
	mulps	608(%rsp), %xmm1
	movaps	%xmm1, 16(%rsp)
	movaps	656(%rsp), %xmm1
	mulps	624(%rsp), %xmm1
	movaps	%xmm1, (%rsp)
	movq	16(%rsp), %rdx
	movaps	16(%rsp), %xmm6
	movaps	%xmm6, 64(%rsp)
	movq	(%rsp), %rax
	movaps	%xmm1, 80(%rsp)
	movq	%rdx, 256(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 264(%rsp)
	movq	%rax, 272(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 280(%rsp)
	movaps	%xmm4, %xmm2
	mulps	%xmm6, %xmm2
	movaps	%xmm2, 48(%rsp)
	movaps	%xmm5, %xmm3
	mulps	%xmm1, %xmm3
	movaps	%xmm3, 32(%rsp)
	movq	48(%rsp), %rdx
	movaps	%xmm2, 64(%rsp)
	movq	32(%rsp), %rax
	movaps	%xmm3, 80(%rsp)
	movq	%rdx, 224(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 232(%rsp)
	movq	%rax, 240(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 248(%rsp)
	movaps	576(%rsp), %xmm1
	mulps	544(%rsp), %xmm1
	movaps	%xmm1, 16(%rsp)
	movaps	592(%rsp), %xmm1
	mulps	560(%rsp), %xmm1
	movaps	%xmm1, (%rsp)
	movq	16(%rsp), %rdx
	movaps	16(%rsp), %xmm4
	movaps	%xmm4, 64(%rsp)
	movq	(%rsp), %rax
	movaps	%xmm1, 80(%rsp)
	movq	%rdx, 192(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 200(%rsp)
	movq	%rax, 208(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 216(%rsp)
	movaps	%xmm2, %xmm4
	mulps	16(%rsp), %xmm4
	movaps	%xmm4, 48(%rsp)
	movq	48(%rsp), %rdx
	movaps	%xmm4, 64(%rsp)
	movaps	%xmm3, %xmm5
	mulps	%xmm1, %xmm5
	movaps	%xmm5, 48(%rsp)
	movq	48(%rsp), %rax
	movaps	%xmm5, 80(%rsp)
	movq	%rdx, 96(%rsp)
	movq	72(%rsp), %rdx
	movq	%rdx, 104(%rsp)
	movq	%rax, 112(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 120(%rsp)
	movl	$0, %edx
	movl	$0, %eax
.L486:
	mulss	96(%rsp,%rdx,4), %xmm0
	addq	$1, %rax
	movq	%rax, %rdx
	cmpq	$8, %rax
	jne	.L486
	movss	%xmm0, (%r12)
	jmp	.L489
.L488:
	movss	.LC0(%rip), %xmm0
.L476:
	movq	864(%rsp), %rdi
	movq	%rdi, 544(%rsp)
	movq	872(%rsp), %rsi
	movq	%rsi, 552(%rsp)
	movq	880(%rsp), %rcx
	movq	%rcx, 560(%rsp)
	movq	888(%rsp), %rax
	movq	%rax, 568(%rsp)
	movq	%rdi, 576(%rsp)
	movq	%rsi, 584(%rsp)
	movq	%rcx, 592(%rsp)
	movq	%rax, 600(%rsp)
	movq	%rdi, 608(%rsp)
	movq	%rsi, 616(%rsp)
	movq	%rcx, 624(%rsp)
	movq	%rax, 632(%rsp)
	movq	%rdi, 640(%rsp)
	movq	%rsi, 648(%rsp)
	movq	%rcx, 656(%rsp)
	movq	%rax, 664(%rsp)
	movq	%rdi, 672(%rsp)
	movq	%rsi, 680(%rsp)
	movq	%rcx, 688(%rsp)
	movq	%rax, 696(%rsp)
	movq	%rdi, 704(%rsp)
	movq	%rsi, 712(%rsp)
	movq	%rcx, 720(%rsp)
	movq	%rax, 728(%rsp)
	movq	%rdi, 736(%rsp)
	movq	%rsi, 744(%rsp)
	movq	%rcx, 752(%rsp)
	movq	%rax, 760(%rsp)
	movq	%rdi, 768(%rsp)
	movq	%rsi, 776(%rsp)
	movq	%rcx, 784(%rsp)
	movq	%rax, 792(%rsp)
	movq	%rdi, 800(%rsp)
	movq	%rsi, 808(%rsp)
	movq	%rcx, 816(%rsp)
	movq	%rax, 824(%rsp)
	movq	%rdi, 832(%rsp)
	movq	%rsi, 840(%rsp)
	movq	%rcx, 848(%rsp)
	movq	%rax, 856(%rsp)
	jmp	.L479
.L489:
	leaq	-24(%rbp), %rsp
	popq	%rbx
	popq	%r12
	popq	%r13
	popq	%rbp
	ret

simd_v2a_combine:
	pushq	%rbp
	movq	%rsp, %rbp
	pushq	%r13
	pushq	%r12
	pushq	%rbx
	andq	$-32, %rsp
	subq	$288, %rsp
	movq	%rdi, %r13
	movq	%rsi, %r12
	call	get_vec_start
	movq	%rax, %rbx
	movq	%r13, %rdi
	call	vec_length
	movl	%eax, %edx
	movl	$0, %esi
	movl	$0, %ecx
	movss	.LC0(%rip), %xmm0
.L493:
	movss	%xmm0, 64(%rsp,%rsi,4)
	addq	$1, %rcx
	movq	%rcx, %rsi
	cmpq	$8, %rcx
	jne	.L493
	testb	$31, %bl
	je	.L506
	testl	%eax, %eax
	je	.L507
	movss	.LC0(%rip), %xmm0
.L499:
	addq	$4, %rbx
	mulss	-4(%rbx), %xmm0
	subl	$1, %edx
	testb	$31, %bl
	jne	.L496
	jmp	.L494
.L506:
	movss	.LC0(%rip), %xmm0
.L494:
	movl	%edx, %edi
	cmpl	$15, %edx
	ja	.L497
	jmp	.L498
.L496:
	testl	%edx, %edx
	jne	.L499
	jmp	.L498
.L497:
	movq	64(%rsp), %rax
	movq	%rax, 128(%rsp)
	movq	72(%rsp), %rax
	movq	%rax, 136(%rsp)
	movq	80(%rsp), %rax
	movq	%rax, 144(%rsp)
	movq	88(%rsp), %rax
	movq	%rax, 152(%rsp)
	movl	%edx, %eax
	movq	%rbx, %rdx
.L501:
	movq	(%rdx), %rcx
	movq	%rcx, 256(%rsp)
	movq	8(%rdx), %rcx
	movq	%rcx, 264(%rsp)
	movq	16(%rdx), %rcx
	movq	%rcx, 272(%rsp)
	movq	24(%rdx), %rcx
	movq	%rcx, 280(%rsp)
	movq	32(%rdx), %rcx
	movq	%rcx, 224(%rsp)
	movq	40(%rdx), %rcx
	movq	%rcx, 232(%rsp)
	movq	48(%rdx), %rcx
	movq	%rcx, 240(%rsp)
	movq	56(%rdx), %rcx
	movq	%rcx, 248(%rsp)
	movaps	224(%rsp), %xmm1
	mulps	256(%rsp), %xmm1
	movaps	%xmm1, (%rsp)
	movaps	240(%rsp), %xmm1
	mulps	272(%rsp), %xmm1
	movaps	%xmm1, 16(%rsp)
	movq	(%rsp), %rsi
	movaps	(%rsp), %xmm4
	movaps	%xmm4, 32(%rsp)
	movq	16(%rsp), %rcx
	movaps	%xmm1, 48(%rsp)
	movq	%rsi, 192(%rsp)
	movq	40(%rsp), %rsi
	movq	%rsi, 200(%rsp)
	movq	%rcx, 208(%rsp)
	movq	56(%rsp), %rcx
	movq	%rcx, 216(%rsp)
	movaps	%xmm4, %xmm2
	mulps	128(%rsp), %xmm2
	movaps	%xmm2, (%rsp)
	movq	(%rsp), %rsi
	movaps	%xmm2, 32(%rsp)
	movaps	%xmm1, %xmm3
	mulps	144(%rsp), %xmm3
	movaps	%xmm3, 16(%rsp)
	movq	16(%rsp), %rcx
	movaps	%xmm3, 48(%rsp)
	movq	%rsi, 160(%rsp)
	movq	40(%rsp), %rsi
	movq	%rsi, 168(%rsp)
	movq	%rcx, 176(%rsp)
	movq	56(%rsp), %rcx
	movq	%rcx, 184(%rsp)
	addq	$64, %rdx
	subl	$16, %eax
	cmpl	$15, %eax
	jbe	.L500
	movq	160(%rsp), %rcx
	movq	%rcx, 128(%rsp)
	movq	%rsi, 136(%rsp)
	movq	176(%rsp), %rcx
	movq	%rcx, 144(%rsp)
	movq	184(%rsp), %rcx
	movq	%rcx, 152(%rsp)
	jmp	.L501
.L500:
	movq	160(%rsp), %r9
	movq	%r9, 96(%rsp)
	movq	168(%rsp), %r8
	movq	%r8, 104(%rsp)
	movq	176(%rsp), %rsi
	movq	%rsi, 112(%rsp)
	movq	184(%rsp), %rcx
	movq	%rcx, 120(%rsp)
	leal	-16(%rdi), %edx
	movl	%edx, %eax
	shrl	$4, %eax
	movl	%eax, %edi
	addq	$1, %rdi
	salq	$6, %rdi
	addq	%rdi, %rbx
	movq	%r9, 64(%rsp)
	movq	%r8, 72(%rsp)
	movq	%rsi, 80(%rsp)
	movq	%rcx, 88(%rsp)
	sall	$4, %eax
	subl	%eax, %edx
.L498:
	testl	%edx, %edx
	je	.L502
.L503:
	addq	$4, %rbx
	mulss	-4(%rbx), %xmm0
	subl	$1, %edx
	jne	.L503
	jmp	.L502
.L505:
	mulss	64(%rsp,%rdx,4), %xmm0
	addq	$1, %rax
	movq	%rax, %rdx
	cmpq	$8, %rax
	jne	.L505
	movss	%xmm0, (%r12)
	jmp	.L508
.L507:
	movss	.LC0(%rip), %xmm0
	jmp	.L498
.L502:
	movl	$0, %edx
	movl	$0, %eax
	jmp	.L505
.L508:
	leaq	-24(%rbp), %rsp
	popq	%rbx
	popq	%r12
	popq	%r13
	popq	%rbp
	ret

simd_v4a_combine:
	pushq	%rbp
	movq	%rsp, %rbp
	pushq	%r13
	pushq	%r12
	pushq	%rbx
	andq	$-32, %rsp
	subq	$448, %rsp
	movq	%rdi, %r13
	movq	%rsi, %r12
	call	get_vec_start
	movq	%rax, %rbx
	movq	%r13, %rdi
	call	vec_length
	movl	%eax, %edx
	movl	$0, %esi
	movl	$0, %ecx
	movss	.LC0(%rip), %xmm0
.L512:
	movss	%xmm0, 96(%rsp,%rsi,4)
	addq	$1, %rcx
	movq	%rcx, %rsi
	cmpq	$8, %rcx
	jne	.L512
	testb	$31, %bl
	je	.L525
	testl	%eax, %eax
	je	.L526
	movss	.LC0(%rip), %xmm0
.L518:
	addq	$4, %rbx
	mulss	-4(%rbx), %xmm0
	subl	$1, %edx
	testb	$31, %bl
	jne	.L515
	jmp	.L513
.L525:
	movss	.LC0(%rip), %xmm0
.L513:
	movl	%edx, %edi
	cmpl	$31, %edx
	ja	.L516
	jmp	.L517
.L515:
	testl	%edx, %edx
	jne	.L518
	jmp	.L517
.L516:
	movq	96(%rsp), %rax
	movq	%rax, 160(%rsp)
	movq	104(%rsp), %rax
	movq	%rax, 168(%rsp)
	movq	112(%rsp), %rax
	movq	%rax, 176(%rsp)
	movq	120(%rsp), %rax
	movq	%rax, 184(%rsp)
	movl	%edx, %eax
	movq	%rbx, %rdx
.L520:
	movq	(%rdx), %rcx
	movq	%rcx, 416(%rsp)
	movq	8(%rdx), %rcx
	movq	%rcx, 424(%rsp)
	movq	16(%rdx), %rcx
	movq	%rcx, 432(%rsp)
	movq	24(%rdx), %rcx
	movq	%rcx, 440(%rsp)
	movq	32(%rdx), %rcx
	movq	%rcx, 384(%rsp)
	movq	40(%rdx), %rcx
	movq	%rcx, 392(%rsp)
	movq	48(%rdx), %rcx
	movq	%rcx, 400(%rsp)
	movq	56(%rdx), %rcx
	movq	%rcx, 408(%rsp)
	movq	64(%rdx), %rcx
	movq	%rcx, 352(%rsp)
	movq	72(%rdx), %rcx
	movq	%rcx, 360(%rsp)
	movq	80(%rdx), %rcx
	movq	%rcx, 368(%rsp)
	movq	88(%rdx), %rcx
	movq	%rcx, 376(%rsp)
	movq	96(%rdx), %rcx
	movq	%rcx, 320(%rsp)
	movq	104(%rdx), %rcx
	movq	%rcx, 328(%rsp)
	movq	112(%rdx), %rcx
	movq	%rcx, 336(%rsp)
	movq	120(%rdx), %rcx
	movq	%rcx, 344(%rsp)
	movaps	384(%rsp), %xmm1
	mulps	416(%rsp), %xmm1
	movaps	%xmm1, 32(%rsp)
	movaps	400(%rsp), %xmm1
	mulps	432(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movq	32(%rsp), %rsi
	movaps	32(%rsp), %xmm6
	movaps	%xmm6, 64(%rsp)
	movq	48(%rsp), %rcx
	movaps	%xmm1, 80(%rsp)
	movq	%rsi, 288(%rsp)
	movq	72(%rsp), %rsi
	movq	%rsi, 296(%rsp)
	movq	%rcx, 304(%rsp)
	movq	88(%rsp), %rcx
	movq	%rcx, 312(%rsp)
	movaps	320(%rsp), %xmm1
	mulps	352(%rsp), %xmm1
	movaps	%xmm1, 16(%rsp)
	movaps	368(%rsp), %xmm1
	mulps	336(%rsp), %xmm1
	movaps	%xmm1, (%rsp)
	movq	16(%rsp), %rsi
	movaps	16(%rsp), %xmm6
	movaps	%xmm6, 64(%rsp)
	movq	(%rsp), %rcx
	movaps	%xmm1, 80(%rsp)
	movq	%rsi, 256(%rsp)
	movq	72(%rsp), %rsi
	movq	%rsi, 264(%rsp)
	movq	%rcx, 272(%rsp)
	movq	88(%rsp), %rcx
	movq	%rcx, 280(%rsp)
	movaps	32(%rsp), %xmm2
	mulps	%xmm6, %xmm2
	movaps	%xmm2, 32(%rsp)
	movaps	48(%rsp), %xmm3
	mulps	%xmm1, %xmm3
	movaps	%xmm3, 48(%rsp)
	movq	32(%rsp), %rsi
	movaps	%xmm2, 64(%rsp)
	movq	48(%rsp), %rcx
	movaps	%xmm3, 80(%rsp)
	movq	%rsi, 224(%rsp)
	movq	72(%rsp), %rsi
	movq	%rsi, 232(%rsp)
	movq	%rcx, 240(%rsp)
	movq	88(%rsp), %rcx
	movq	%rcx, 248(%rsp)
	movaps	%xmm2, %xmm4
	mulps	160(%rsp), %xmm4
	movaps	%xmm4, 32(%rsp)
	movq	32(%rsp), %rsi
	movaps	%xmm4, 64(%rsp)
	movaps	%xmm3, %xmm5
	mulps	176(%rsp), %xmm5
	movaps	%xmm5, 48(%rsp)
	movq	48(%rsp), %rcx
	movaps	%xmm5, 80(%rsp)
	movq	%rsi, 192(%rsp)
	movq	72(%rsp), %rsi
	movq	%rsi, 200(%rsp)
	movq	%rcx, 208(%rsp)
	movq	88(%rsp), %rcx
	movq	%rcx, 216(%rsp)
	subq	$-128, %rdx
	subl	$32, %eax
	cmpl	$31, %eax
	jbe	.L519
	movq	192(%rsp), %rcx
	movq	%rcx, 160(%rsp)
	movq	%rsi, 168(%rsp)
	movq	208(%rsp), %rcx
	movq	%rcx, 176(%rsp)
	movq	216(%rsp), %rcx
	movq	%rcx, 184(%rsp)
	jmp	.L520
.L519:
	movq	192(%rsp), %r9
	movq	%r9, 128(%rsp)
	movq	200(%rsp), %r8
	movq	%r8, 136(%rsp)
	movq	208(%rsp), %rsi
	movq	%rsi, 144(%rsp)
	movq	216(%rsp), %rcx
	movq	%rcx, 152(%rsp)
	leal	-32(%rdi), %edx
	movl	%edx, %eax
	shrl	$5, %eax
	movl	%eax, %edi
	addq	$1, %rdi
	salq	$7, %rdi
	addq	%rdi, %rbx
	movq	%r9, 96(%rsp)
	movq	%r8, 104(%rsp)
	movq	%rsi, 112(%rsp)
	movq	%rcx, 120(%rsp)
	sall	$5, %eax
	subl	%eax, %edx
.L517:
	testl	%edx, %edx
	je	.L521
.L522:
	addq	$4, %rbx
	mulss	-4(%rbx), %xmm0
	subl	$1, %edx
	jne	.L522
	jmp	.L521
.L524:
	mulss	96(%rsp,%rdx,4), %xmm0
	addq	$1, %rax
	movq	%rax, %rdx
	cmpq	$8, %rax
	jne	.L524
	movss	%xmm0, (%r12)
	jmp	.L527
.L526:
	movss	.LC0(%rip), %xmm0
	jmp	.L517
.L521:
	movl	$0, %edx
	movl	$0, %eax
	jmp	.L524
.L527:
	leaq	-24(%rbp), %rsp
	popq	%rbx
	popq	%r12
	popq	%r13
	popq	%rbp
	ret

simd_v8a_combine:
	pushq	%rbp
	movq	%rsp, %rbp
	pushq	%r13
	pushq	%r12
	pushq	%rbx
	andq	$-32, %rsp
	subq	$736, %rsp
	movq	%rdi, %r13
	movq	%rsi, %r12
	call	get_vec_start
	movq	%rax, %rbx
	movq	%r13, %rdi
	call	vec_length
	movl	%eax, %edx
	movl	$0, %esi
	movl	$0, %ecx
	movss	.LC0(%rip), %xmm0
.L531:
	movss	%xmm0, 128(%rsp,%rsi,4)
	addq	$1, %rcx
	movq	%rcx, %rsi
	cmpq	$8, %rcx
	jne	.L531
	testb	$31, %bl
	je	.L544
	testl	%eax, %eax
	je	.L545
	movss	.LC0(%rip), %xmm0
.L537:
	addq	$4, %rbx
	mulss	-4(%rbx), %xmm0
	subl	$1, %edx
	testb	$31, %bl
	jne	.L534
	jmp	.L532
.L544:
	movss	.LC0(%rip), %xmm0
.L532:
	movl	%edx, %edi
	cmpl	$63, %edx
	ja	.L535
	jmp	.L536
.L534:
	testl	%edx, %edx
	jne	.L537
	jmp	.L536
.L535:
	movq	128(%rsp), %rax
	movq	%rax, 192(%rsp)
	movq	136(%rsp), %rax
	movq	%rax, 200(%rsp)
	movq	144(%rsp), %rax
	movq	%rax, 208(%rsp)
	movq	152(%rsp), %rax
	movq	%rax, 216(%rsp)
	movl	%edx, %eax
	movq	%rbx, %rdx
.L539:
	movq	(%rdx), %rcx
	movq	%rcx, 704(%rsp)
	movq	8(%rdx), %rcx
	movq	%rcx, 712(%rsp)
	movq	16(%rdx), %rcx
	movq	%rcx, 720(%rsp)
	movq	24(%rdx), %rcx
	movq	%rcx, 728(%rsp)
	movq	32(%rdx), %rcx
	movq	%rcx, 672(%rsp)
	movq	40(%rdx), %rcx
	movq	%rcx, 680(%rsp)
	movq	48(%rdx), %rcx
	movq	%rcx, 688(%rsp)
	movq	56(%rdx), %rcx
	movq	%rcx, 696(%rsp)
	movq	64(%rdx), %rcx
	movq	%rcx, 640(%rsp)
	movq	72(%rdx), %rcx
	movq	%rcx, 648(%rsp)
	movq	80(%rdx), %rcx
	movq	%rcx, 656(%rsp)
	movq	88(%rdx), %rcx
	movq	%rcx, 664(%rsp)
	movq	96(%rdx), %rcx
	movq	%rcx, 608(%rsp)
	movq	104(%rdx), %rcx
	movq	%rcx, 616(%rsp)
	movq	112(%rdx), %rcx
	movq	%rcx, 624(%rsp)
	movq	120(%rdx), %rcx
	movq	%rcx, 632(%rsp)
	movq	128(%rdx), %rcx
	movq	%rcx, 576(%rsp)
	movq	136(%rdx), %rcx
	movq	%rcx, 584(%rsp)
	movq	144(%rdx), %rcx
	movq	%rcx, 592(%rsp)
	movq	152(%rdx), %rcx
	movq	%rcx, 600(%rsp)
	movq	160(%rdx), %rcx
	movq	%rcx, 544(%rsp)
	movq	168(%rdx), %rcx
	movq	%rcx, 552(%rsp)
	movq	176(%rdx), %rcx
	movq	%rcx, 560(%rsp)
	movq	184(%rdx), %rcx
	movq	%rcx, 568(%rsp)
	movq	192(%rdx), %rcx
	movq	%rcx, 512(%rsp)
	movq	200(%rdx), %rcx
	movq	%rcx, 520(%rsp)
	movq	208(%rdx), %rcx
	movq	%rcx, 528(%rsp)
	movq	216(%rdx), %rcx
	movq	%rcx, 536(%rsp)
	movq	224(%rdx), %rcx
	movq	%rcx, 480(%rsp)
	movq	232(%rdx), %rcx
	movq	%rcx, 488(%rsp)
	movq	240(%rdx), %rcx
	movq	%rcx, 496(%rsp)
	movq	248(%rdx), %rcx
	movq	%rcx, 504(%rsp)
	movaps	672(%rsp), %xmm1
	mulps	704(%rsp), %xmm1
	movaps	%xmm1, 64(%rsp)
	movaps	688(%rsp), %xmm1
	mulps	720(%rsp), %xmm1
	movaps	%xmm1, 80(%rsp)
	movq	64(%rsp), %rsi
	movaps	64(%rsp), %xmm4
	movaps	%xmm4, 96(%rsp)
	movq	80(%rsp), %rcx
	movaps	%xmm1, %xmm5
	movaps	%xmm1, 112(%rsp)
	movq	%rsi, 448(%rsp)
	movq	104(%rsp), %rsi
	movq	%rsi, 456(%rsp)
	movq	%rcx, 464(%rsp)
	movq	120(%rsp), %rcx
	movq	%rcx, 472(%rsp)
	movaps	608(%rsp), %xmm1
	mulps	640(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movaps	656(%rsp), %xmm1
	mulps	624(%rsp), %xmm1
	movaps	%xmm1, 32(%rsp)
	movq	48(%rsp), %rsi
	movaps	48(%rsp), %xmm6
	movaps	%xmm6, 96(%rsp)
	movq	32(%rsp), %rcx
	movaps	%xmm1, 112(%rsp)
	movq	%rsi, 416(%rsp)
	movq	104(%rsp), %rsi
	movq	%rsi, 424(%rsp)
	movq	%rcx, 432(%rsp)
	movq	120(%rsp), %rcx
	movq	%rcx, 440(%rsp)
	movaps	%xmm4, %xmm2
	mulps	%xmm6, %xmm2
	movaps	%xmm2, 64(%rsp)
	movaps	%xmm5, %xmm3
	mulps	%xmm1, %xmm3
	movaps	%xmm3, 80(%rsp)
	movq	64(%rsp), %rsi
	movaps	%xmm2, 96(%rsp)
	movq	80(%rsp), %rcx
	movaps	%xmm3, 112(%rsp)
	movq	%rsi, 384(%rsp)
	movq	104(%rsp), %rsi
	movq	%rsi, 392(%rsp)
	movq	%rcx, 400(%rsp)
	movq	120(%rsp), %rcx
	movq	%rcx, 408(%rsp)
	movaps	576(%rsp), %xmm1
	mulps	544(%rsp), %xmm1
	movaps	%xmm1, 48(%rsp)
	movaps	592(%rsp), %xmm1
	mulps	560(%rsp), %xmm1
	movaps	%xmm1, 32(%rsp)
	movq	48(%rsp), %rsi
	movaps	48(%rsp), %xmm6
	movaps	%xmm6, 96(%rsp)
	movq	32(%rsp), %rcx
	movaps	%xmm1, %xmm7
	movaps	%xmm1, 112(%rsp)
	movq	%rsi, 352(%rsp)
	movq	104(%rsp), %rsi
	movq	%rsi, 360(%rsp)
	movq	%rcx, 368(%rsp)
	movq	120(%rsp), %rcx
	movq	%rcx, 376(%rsp)
	movaps	512(%rsp), %xmm1
	mulps	480(%rsp), %xmm1
	movaps	%xmm1, 16(%rsp)
	movaps	528(%rsp), %xmm1
	mulps	496(%rsp), %xmm1
	movaps	%xmm1, (%rsp)
	movq	16(%rsp), %rsi
	movaps	16(%rsp), %xmm2
	movaps	%xmm2, 96(%rsp)
	movq	(%rsp), %rcx
	movaps	%xmm1, 112(%rsp)
	movq	%rsi, 320(%rsp)
	movq	104(%rsp), %rsi
	movq	%rsi, 328(%rsp)
	movq	%rcx, 336(%rsp)
	movq	120(%rsp), %rcx
	movq	%rcx, 344(%rsp)
	movaps	%xmm6, %xmm4
	mulps	%xmm2, %xmm4
	movaps	%xmm4, 48(%rsp)
	movaps	%xmm7, %xmm5
	mulps	%xmm1, %xmm5
	movaps	%xmm5, 32(%rsp)
	movq	48(%rsp), %rsi
	movaps	%xmm4, 96(%rsp)
	movq	32(%rsp), %rcx
	movaps	%xmm5, 112(%rsp)
	movq	%rsi, 288(%rsp)
	movq	104(%rsp), %rsi
	movq	%rsi, 296(%rsp)
	movq	%rcx, 304(%rsp)
	movq	120(%rsp), %rcx
	movq	%rcx, 312(%rsp)
	movaps	64(%rsp), %xmm6
	mulps	%xmm4, %xmm6
	movaps	%xmm6, 64(%rsp)
	movaps	80(%rsp), %xmm7
	mulps	%xmm5, %xmm7
	movaps	%xmm7, 80(%rsp)
	movq	64(%rsp), %rsi
	movaps	%xmm6, 96(%rsp)
	movq	80(%rsp), %rcx
	movaps	%xmm7, 112(%rsp)
	movq	%rsi, 256(%rsp)
	movq	104(%rsp), %rsi
	movq	%rsi, 264(%rsp)
	movq	%rcx, 272(%rsp)
	movq	120(%rsp), %rcx
	movq	%rcx, 280(%rsp)
	movaps	%xmm6, %xmm2
	mulps	192(%rsp), %xmm2
	movaps	%xmm2, 64(%rsp)
	movq	64(%rsp), %rsi
	movaps	%xmm2, 96(%rsp)
	movaps	%xmm7, %xmm3
	mulps	208(%rsp), %xmm3
	movaps	%xmm3, 80(%rsp)
	movq	80(%rsp), %rcx
	movaps	%xmm3, 112(%rsp)
	movq	%rsi, 224(%rsp)
	movq	104(%rsp), %rsi
	movq	%rsi, 232(%rsp)
	movq	%rcx, 240(%rsp)
	movq	120(%rsp), %rcx
	movq	%rcx, 248(%rsp)
	addq	$256, %rdx
	subl	$64, %eax
	cmpl	$63, %eax
	jbe	.L538
	movq	224(%rsp), %rcx
	movq	%rcx, 192(%rsp)
	movq	%rsi, 200(%rsp)
	movq	240(%rsp), %rcx
	movq	%rcx, 208(%rsp)
	movq	248(%rsp), %rcx
	movq	%rcx, 216(%rsp)
	jmp	.L539
.L538:
	movq	224(%rsp), %r9
	movq	%r9, 160(%rsp)
	movq	232(%rsp), %r8
	movq	%r8, 168(%rsp)
	movq	240(%rsp), %rsi
	movq	%rsi, 176(%rsp)
	movq	248(%rsp), %rcx
	movq	%rcx, 184(%rsp)
	leal	-64(%rdi), %edx
	movl	%edx, %eax
	shrl	$6, %eax
	movl	%eax, %edi
	addq	$1, %rdi
	salq	$8, %rdi
	addq	%rdi, %rbx
	movq	%r9, 128(%rsp)
	movq	%r8, 136(%rsp)
	movq	%rsi, 144(%rsp)
	movq	%rcx, 152(%rsp)
	sall	$6, %eax
	subl	%eax, %edx
.L536:
	testl	%edx, %edx
	je	.L540
.L541:
	addq	$4, %rbx
	mulss	-4(%rbx), %xmm0
	subl	$1, %edx
	jne	.L541
	jmp	.L540
.L543:
	mulss	128(%rsp,%rdx,4), %xmm0
	addq	$1, %rax
	movq	%rax, %rdx
	cmpq	$8, %rax
	jne	.L543
	movss	%xmm0, (%r12)
	jmp	.L546
.L545:
	movss	.LC0(%rip), %xmm0
	jmp	.L536
.L540:
	movl	$0, %edx
	movl	$0, %eax
	jmp	.L543
.L546:
	leaq	-24(%rbp), %rsp
	popq	%rbx
	popq	%r12
	popq	%r13
	popq	%rbp
	ret

unroll4x2as_combine:
	pushq	%r14
	pushq	%r13
	pushq	%r12
	pushq	%rbp
	pushq	%rbx
	movq	%rdi, %r14
	movq	%rsi, %r12
	call	vec_length
	movq	%rax, %rbx
	movq	%rax, %r13
	shrq	$63, %r13
	addq	%rax, %r13
	sarq	%r13
	movq	%r14, %rdi
	call	get_vec_start
	movq	%rax, %rcx
	leaq	(%rax,%r13,4), %rax
	testq	%r13, %r13
	jle	.L553
	movq	%r13, %rbp
	movss	.LC0(%rip), %xmm0
	movaps	%xmm0, %xmm1
	movl	$0, %edx
.L550:
	mulss	(%rcx,%rdx,4), %xmm1
	mulss	(%rax,%rdx,4), %xmm0
	addq	$1, %rdx
	cmpq	%rbp, %rdx
	jne	.L550
	jmp	.L549
.L553:
	movss	.LC0(%rip), %xmm0
	movaps	%xmm0, %xmm1
.L549:
	leaq	(%r13,%r13), %rdx
	cmpq	%rdx, %rbx
	jle	.L551
.L552:
	mulss	(%rcx,%rdx,4), %xmm0
	addq	$1, %rdx
	cmpq	%rbx, %rdx
	jne	.L552
.L551:
	mulss	%xmm1, %xmm0
	movss	%xmm0, (%r12)
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	popq	%r14
	ret

register_combiners:
	subq	$8, %rsp
	movl	$combine1_descr, %edx
	movl	$combine1, %esi
	movq	%rsi, %rdi
	call	add_combiner
	movl	$combine2_descr, %edx
	movl	$combine1, %esi
	movl	$combine2, %edi
	call	add_combiner
	movl	$combine3_descr, %edx
	movl	$combine1, %esi
	movl	$combine3, %edi
	call	add_combiner
	movl	$combine3w_descr, %edx
	movl	$combine1, %esi
	movl	$combine3w, %edi
	call	add_combiner
	movl	$combine4_descr, %edx
	movl	$combine1, %esi
	movl	$combine4, %edi
	call	add_combiner
	movl	$combine4b_descr, %edx
	movl	$combine1, %esi
	movl	$combine4b, %edi
	call	add_combiner
	movl	$combine4p_descr, %edx
	movl	$combine1, %esi
	movl	$combine4p, %edi
	call	add_combiner
	movl	$combine5_descr, %edx
	movl	$combine1, %esi
	movl	$combine5, %edi
	call	add_combiner
	movl	$combine5p_descr, %edx
	movl	$combine1, %esi
	movl	$combine5p, %edi
	call	add_combiner
	movl	$unroll2aw_descr, %edx
	movl	$combine1, %esi
	movl	$unroll2aw_combine, %edi
	call	add_combiner
	movl	$unroll3a_descr, %edx
	movl	$combine1, %esi
	movl	$unroll3a_combine, %edi
	call	add_combiner
	movl	$unroll4a_descr, %edx
	movl	$combine1, %esi
	movl	$unroll4a_combine, %edi
	call	add_combiner
	movl	$unroll5a_descr, %edx
	movl	$combine1, %esi
	movl	$unroll5a_combine, %edi
	call	add_combiner
	movl	$unroll6a_descr, %edx
	movl	$combine1, %esi
	movl	$unroll6a_combine, %edi
	call	add_combiner
	movl	$unroll7a_descr, %edx
	movl	$combine1, %esi
	movl	$unroll7a_combine, %edi
	call	add_combiner
	movl	$unroll8a_descr, %edx
	movl	$combine1, %esi
	movl	$unroll8a_combine, %edi
	call	add_combiner
	movl	$unroll9a_descr, %edx
	movl	$combine1, %esi
	movl	$unroll9a_combine, %edi
	call	add_combiner
	movl	$unroll10a_descr, %edx
	movl	$combine1, %esi
	movl	$unroll10a_combine, %edi
	call	add_combiner
	movl	$unroll16a_descr, %edx
	movl	$combine1, %esi
	movl	$unroll16a_combine, %edi
	call	add_combiner
	movl	$unroll2_descr, %edx
	movl	$combine1, %esi
	movl	$unroll2_combine, %edi
	call	add_combiner
	movl	$unroll3_descr, %edx
	movl	$combine1, %esi
	movl	$unroll3_combine, %edi
	call	add_combiner
	movl	$unroll4_descr, %edx
	movl	$combine1, %esi
	movl	$unroll4_combine, %edi
	call	add_combiner
	movl	$unroll8_descr, %edx
	movl	$combine1, %esi
	movl	$unroll8_combine, %edi
	call	add_combiner
	movl	$unroll16_descr, %edx
	movl	$combine1, %esi
	movl	$unroll16_combine, %edi
	call	add_combiner
	movl	$combine6_descr, %edx
	movl	$combine1, %esi
	movl	$combine6, %edi
	call	add_combiner
	movl	$unroll4x2a_descr, %edx
	movl	$combine1, %esi
	movl	$unroll4x2a_combine, %edi
	call	add_combiner
	movl	$unroll8x2a_descr, %edx
	movl	$combine1, %esi
	movl	$unroll8x2a_combine, %edi
	call	add_combiner
	movl	$unroll3x3a_descr, %edx
	movl	$combine1, %esi
	movl	$unroll3x3a_combine, %edi
	call	add_combiner
	movl	$unroll4x4a_descr, %edx
	movl	$combine1, %esi
	movl	$unroll4x4a_combine, %edi
	call	add_combiner
	movl	$unroll5x5a_descr, %edx
	movl	$combine1, %esi
	movl	$unroll5x5a_combine, %edi
	call	add_combiner
	movl	$unroll6x6a_descr, %edx
	movl	$combine1, %esi
	movl	$unroll6x6a_combine, %edi
	call	add_combiner
	movl	$unroll7x7a_descr, %edx
	movl	$combine1, %esi
	movl	$unroll7x7a_combine, %edi
	call	add_combiner
	movl	$unroll8x4a_descr, %edx
	movl	$combine1, %esi
	movl	$unroll8x4a_combine, %edi
	call	add_combiner
	movl	$unroll8x8a_descr, %edx
	movl	$combine1, %esi
	movl	$unroll8x8a_combine, %edi
	call	add_combiner
	movl	$unroll9x9a_descr, %edx
	movl	$combine1, %esi
	movl	$unroll9x9a_combine, %edi
	call	add_combiner
	movl	$unroll10x10a_descr, %edx
	movl	$combine1, %esi
	movl	$unroll10x10a_combine, %edi
	call	add_combiner
	movl	$unroll12x6a_descr, %edx
	movl	$combine1, %esi
	movl	$unroll12x6a_combine, %edi
	call	add_combiner
	movl	$unroll12x12a_descr, %edx
	movl	$combine1, %esi
	movl	$unroll12x12a_combine, %edi
	call	add_combiner
	movl	$unroll16x16a_descr, %edx
	movl	$combine1, %esi
	movl	$unroll16x16a_combine, %edi
	call	add_combiner
	movl	$unroll20x20a_descr, %edx
	movl	$combine1, %esi
	movl	$unroll20x20a_combine, %edi
	call	add_combiner
	movl	$unroll8x2_descr, %edx
	movl	$combine1, %esi
	movl	$unroll8x2_combine, %edi
	call	add_combiner
	movl	$unroll8x4_descr, %edx
	movl	$combine1, %esi
	movl	$unroll8x4_combine, %edi
	call	add_combiner
	movl	$unroll8x8_descr, %edx
	movl	$combine1, %esi
	movl	$unroll8x8_combine, %edi
	call	add_combiner
	movl	$unroll9x3_descr, %edx
	movl	$combine1, %esi
	movl	$unroll9x3_combine, %edi
	call	add_combiner
	movl	$unrollx2as_descr, %edx
	movl	$combine1, %esi
	movl	$unrollx2as_combine, %edi
	call	add_combiner
	movl	$combine7_descr, %edx
	movl	$combine1, %esi
	movl	$combine7, %edi
	call	add_combiner
	movl	$unroll3aa_descr, %edx
	movl	$combine1, %esi
	movl	$unroll3aa_combine, %edi
	call	add_combiner
	movl	$unroll4aa_descr, %edx
	movl	$combine1, %esi
	movl	$unroll4aa_combine, %edi
	call	add_combiner
	movl	$unroll5aa_descr, %edx
	movl	$combine1, %esi
	movl	$unroll5aa_combine, %edi
	call	add_combiner
	movl	$unroll6aa_descr, %edx
	movl	$combine1, %esi
	movl	$unroll6aa_combine, %edi
	call	add_combiner
	movl	$unroll7aa_descr, %edx
	movl	$combine1, %esi
	movl	$unroll7aa_combine, %edi
	call	add_combiner
	movl	$unroll8aa_descr, %edx
	movl	$combine1, %esi
	movl	$unroll8aa_combine, %edi
	call	add_combiner
	movl	$unroll9aa_descr, %edx
	movl	$combine1, %esi
	movl	$unroll9aa_combine, %edi
	call	add_combiner
	movl	$unroll10aa_descr, %edx
	movl	$combine1, %esi
	movl	$unroll10aa_combine, %edi
	call	add_combiner
	movl	$unroll12aa_descr, %edx
	movl	$combine1, %esi
	movl	$unroll12aa_combine, %edi
	call	add_combiner
	movl	$simd_v1_descr, %edx
	movl	$combine1, %esi
	movl	$simd_v1_combine, %edi
	call	add_combiner
	movl	$simd_v2_descr, %edx
	movl	$combine1, %esi
	movl	$simd_v2_combine, %edi
	call	add_combiner
	movl	$simd_v4_descr, %edx
	movl	$combine1, %esi
	movl	$simd_v4_combine, %edi
	call	add_combiner
	movl	$simd_v8_descr, %edx
	movl	$combine1, %esi
	movl	$simd_v8_combine, %edi
	call	add_combiner
	movl	$simd_v10_descr, %edx
	movl	$combine1, %esi
	movl	$simd_v10_combine, %edi
	call	add_combiner
	movl	$simd_v12_descr, %edx
	movl	$combine1, %esi
	movl	$simd_v12_combine, %edi
	call	add_combiner
	movl	$simd_v2a_descr, %edx
	movl	$combine1, %esi
	movl	$simd_v2a_combine, %edi
	call	add_combiner
	movl	$simd_v4a_descr, %edx
	movl	$combine1, %esi
	movl	$simd_v4a_combine, %edi
	call	add_combiner
	movl	$simd_v8a_descr, %edx
	movl	$combine1, %esi
	movl	$simd_v8a_combine, %edi
	call	add_combiner
	movsd	.LC1(%rip), %xmm1
	movsd	.LC2(%rip), %xmm0
	movl	$simd_v8a_combine, %edi
	call	log_combiner
	addq	$8, %rsp
	ret

simd_v8a_descr:
simd_v4a_descr:
simd_v2a_descr:
simd_v12_descr:
simd_v10_descr:
simd_v8_descr:
simd_v4_descr:
simd_v2_descr:
simd_v1_descr:
unroll12aa_descr:
unroll10aa_descr:
unroll9aa_descr:
unroll8aa_descr:
unroll7aa_descr:
unroll6aa_descr:
unroll5aa_descr:
unroll4aa_descr:
unroll3aa_descr:
combine7_descr:
unroll8x8_descr:
unroll8x4_descr:
unroll9x3_descr:
unroll8x2_descr:
unroll4x2as_descr:
unrollx2as_descr:
unroll10x10a_descr:
unroll9x9a_descr:
unroll8x8a_descr:
unroll7x7a_descr:
unroll6x6a_descr:
unroll5x5a_descr:
unroll20x20a_descr:
unroll16x16a_descr:
unroll12x12a_descr:
unroll12x6a_descr:
unroll8x4a_descr:
unroll4x4a_descr:
unroll3x3a_descr:
unroll8x2a_descr:
unroll4x2a_descr:
combine6_descr:
unroll16_descr:
unroll8_descr:
unroll4_descr:
unroll3_descr:
unroll2_descr:
unroll16a_descr:
unroll10a_descr:
unroll9a_descr:
unroll8a_descr:
unroll7a_descr:
unroll6a_descr:
unroll5a_descr:
unroll4a_descr:
unroll2aw_descr:
combine5p_descr:
unroll3a_descr:
combine5_descr:
combine4p_descr:
combine4b_descr:
combine4_descr:
combine3w_descr:
combine3_descr:
combine2_descr:
combine1_descr:
