X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fx86-64%2Fxform4.S;h=b0aca19c8b0963d00e22c4e42d608e97fc2e4c4e;hb=73f1e33d34b2044f2252a73e0fdd827d39724505;hp=e52a6118c31bdd3624cd90314ce4863541fa6b18;hpb=0ae2f59c0287f4baec6c7de5f2f0fdf736fba26d;p=mesa.git diff --git a/src/mesa/x86-64/xform4.S b/src/mesa/x86-64/xform4.S index e52a6118c31..b0aca19c8b0 100644 --- a/src/mesa/x86-64/xform4.S +++ b/src/mesa/x86-64/xform4.S @@ -1,6 +1,5 @@ /* * Mesa 3-D graphics library - * Version: 7.1 * * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. * @@ -17,9 +16,10 @@ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN - * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. */ #ifdef USE_X86_64_ASM @@ -55,7 +55,7 @@ _mesa_x86_64_transform_points4_general: * rdx = source */ movl V4F_COUNT(%rdx), %ecx /* count */ - movzx V4F_STRIDE(%rdx), %eax /* stride */ + movzbl V4F_STRIDE(%rdx), %eax /* stride */ movl %ecx, V4F_COUNT(%rdi) /* set dest count */ movl $4, V4F_SIZE(%rdi) /* set dest size */ @@ -69,7 +69,7 @@ _mesa_x86_64_transform_points4_general: movq V4F_START(%rdx), %rdx /* ptr to first src vertex */ movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */ - prefetch 16(%rdx) + prefetcht1 16(%rdx) movaps 0(%rsi), %xmm4 /* m3 | m2 | m1 | m0 */ movaps 16(%rsi), %xmm5 /* m7 | m6 | m5 | m4 */ @@ -80,7 +80,7 @@ _mesa_x86_64_transform_points4_general: p4_general_loop: movups (%rdx), %xmm8 /* ox | oy | oz | ow */ - prefetchw 16(%rdi) + prefetcht1 16(%rdi) pshufd $0x00, %xmm8, %xmm0 /* ox | ox | ox | ox */ addq %rax, %rdx @@ -93,7 +93,7 @@ p4_general_loop: addps %xmm1, %xmm0 /* ox*m3+oy*m7 | ... */ mulps %xmm7, %xmm3 /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */ addps %xmm2, %xmm0 /* ox*m3+oy*m7+oz*m11 | ... */ - prefetch 16(%rdx) + prefetcht1 16(%rdx) addps %xmm3, %xmm0 /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */ movaps %xmm0, (%rdi) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */ @@ -118,7 +118,7 @@ p4_constants: .byte 0x00, 0x00, 0x00, 0x00 .byte 0x00, 0x00, 0x00, 0x00 .byte 0x00, 0x00, 0x00, 0x00 -.float 0f+1.0 +.float 1.0 .text .align 16 @@ -138,7 +138,7 @@ _mesa_x86_64_transform_points4_3d: movaps 16(%rax), %xmm10 movl V4F_COUNT(%rdx), %ecx /* count */ - movzx V4F_STRIDE(%rdx), %eax /* stride */ + movzbl V4F_STRIDE(%rdx), %eax /* stride */ movl %ecx, V4F_COUNT(%rdi) /* set dest count */ movl $4, V4F_SIZE(%rdi) /* set dest size */ @@ -150,7 +150,7 @@ _mesa_x86_64_transform_points4_3d: movq V4F_START(%rdx), %rdx /* ptr to first src vertex */ movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */ - prefetch 16(%rdx) + prefetcht1 16(%rdx) movaps 0(%rsi), %xmm4 /* m3 | m2 | m1 | m0 */ movaps 16(%rsi), %xmm5 /* m7 | m6 | m5 | m4 */ @@ -166,7 +166,7 @@ _mesa_x86_64_transform_points4_3d: p4_3d_loop: movups (%rdx), %xmm8 /* ox | oy | oz | ow */ - prefetchw 16(%rdi) + prefetcht1 16(%rdi) pshufd $0x00, %xmm8, %xmm0 /* ox | ox | ox | ox */ addq %rax, %rdx @@ -179,7 +179,7 @@ p4_3d_loop: addps %xmm1, %xmm0 /* ox*m3+oy*m7 | ... */ mulps %xmm7, %xmm3 /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */ addps %xmm2, %xmm0 /* ox*m3+oy*m7+oz*m11 | ... */ - prefetch 16(%rdx) + prefetcht1 16(%rdx) addps %xmm3, %xmm0 /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */ movaps %xmm0, (%rdi) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */ @@ -199,7 +199,7 @@ p4_3d_done: _mesa_x86_64_transform_points4_identity: movl V4F_COUNT(%rdx), %ecx /* count */ - movzx V4F_STRIDE(%rdx), %eax /* stride */ + movzbl V4F_STRIDE(%rdx), %eax /* stride */ movl %ecx, V4F_COUNT(%rdi) /* set dest count */ movl $4, V4F_SIZE(%rdi) /* set dest size */ @@ -210,8 +210,8 @@ _mesa_x86_64_transform_points4_identity: movq V4F_START(%rdx), %rsi /* ptr to first src vertex */ movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */ - prefetch 64(%rsi) - prefetchw 64(%rdi) + prefetcht1 64(%rsi) + prefetcht1 64(%rdi) add %ecx, %ecx @@ -228,7 +228,7 @@ p4_identity_done: _mesa_3dnow_transform_points4_3d_no_rot: movl V4F_COUNT(%rdx), %ecx /* count */ - movzx V4F_STRIDE(%rdx), %eax /* stride */ + movzbl V4F_STRIDE(%rdx), %eax /* stride */ movl %ecx, V4F_COUNT(%rdi) /* set dest count */ movl $4, V4F_SIZE(%rdi) /* set dest size */ @@ -242,7 +242,7 @@ _mesa_3dnow_transform_points4_3d_no_rot: movq V4F_START(%rdx), %rdx /* ptr to first src vertex */ movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */ - prefetch (%rdx) + prefetcht1 (%rdx) movd (%rsi), %mm0 /* | m00 */ .byte 0x66, 0x66, 0x90 /* manual align += 3 */ @@ -255,7 +255,7 @@ _mesa_3dnow_transform_points4_3d_no_rot: p4_3d_no_rot_loop: - prefetchw 32(%rdi) + prefetcht1 32(%rdi) movq (%rdx), %mm4 /* x1 | x0 */ movq 8(%rdx), %mm5 /* x3 | x2 */ @@ -279,7 +279,7 @@ p4_3d_no_rot_loop: addq $16, %rdi decl %ecx - prefetch 32(%rdx) + prefetcht1 32(%rdx) jnz p4_3d_no_rot_loop p4_3d_no_rot_done: @@ -293,7 +293,7 @@ p4_3d_no_rot_done: _mesa_3dnow_transform_points4_perspective: movl V4F_COUNT(%rdx), %ecx /* count */ - movzx V4F_STRIDE(%rdx), %eax /* stride */ + movzbl V4F_STRIDE(%rdx), %eax /* stride */ movl %ecx, V4F_COUNT(%rdi) /* set dest count */ movl $4, V4F_SIZE(%rdi) /* set dest size */ @@ -311,7 +311,7 @@ _mesa_3dnow_transform_points4_perspective: punpckldq 20(%rsi), %mm0 /* m11 | m00 */ movq 32(%rsi), %mm2 /* m21 | m20 */ - prefetch (%rdx) + prefetcht1 (%rdx) movd 40(%rsi), %mm1 /* | m22 */ @@ -321,7 +321,7 @@ _mesa_3dnow_transform_points4_perspective: p4_perspective_loop: - prefetchw 32(%rdi) /* prefetch 2 vertices ahead */ + prefetcht1 32(%rdi) /* prefetch 2 vertices ahead */ movq (%rdx), %mm4 /* x1 | x0 */ movq 8(%rdx), %mm5 /* x3 | x2 */ @@ -347,7 +347,7 @@ p4_perspective_loop: addq $16, %rdi decl %ecx - prefetch 32(%rdx) /* hopefully stride is zero */ + prefetcht1 32(%rdx) /* hopefully stride is zero */ jnz p4_perspective_loop p4_perspective_done: @@ -360,7 +360,7 @@ p4_perspective_done: _mesa_3dnow_transform_points4_2d_no_rot: movl V4F_COUNT(%rdx), %ecx /* count */ - movzx V4F_STRIDE(%rdx), %eax /* stride */ + movzbl V4F_STRIDE(%rdx), %eax /* stride */ movl %ecx, V4F_COUNT(%rdi) /* set dest count */ movl $4, V4F_SIZE(%rdi) /* set dest size */ @@ -374,14 +374,14 @@ _mesa_3dnow_transform_points4_2d_no_rot: movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */ movd (%rsi), %mm0 /* | m00 */ - prefetch (%rdx) + prefetcht1 (%rdx) punpckldq 20(%rsi), %mm0 /* m11 | m00 */ movq 48(%rsi), %mm1 /* m31 | m30 */ p4_2d_no_rot_loop: - prefetchw 32(%rdi) /* prefetch 2 vertices ahead */ + prefetcht1 32(%rdi) /* prefetch 2 vertices ahead */ movq (%rdx), %mm4 /* x1 | x0 */ movq 8(%rdx), %mm5 /* x3 | x2 */ @@ -394,7 +394,7 @@ p4_2d_no_rot_loop: addq %rax, %rdx pfmul %mm1, %mm6 /* x3*m31 | x3*m30 */ - prefetch 32(%rdx) /* hopefully stride is zero */ + prefetcht1 32(%rdx) /* hopefully stride is zero */ pfadd %mm4, %mm6 /* x1*m11+x3*m31 | x0*m00+x3*m30 */ movq %mm6, (%rdi) /* write r0, r1 */ @@ -416,7 +416,7 @@ p4_2d_no_rot_done: _mesa_3dnow_transform_points4_2d: movl V4F_COUNT(%rdx), %ecx /* count */ - movzx V4F_STRIDE(%rdx), %eax /* stride */ + movzbl V4F_STRIDE(%rdx), %eax /* stride */ movl %ecx, V4F_COUNT(%rdi) /* set dest count */ movl $4, V4F_SIZE(%rdi) /* set dest size */ @@ -433,7 +433,7 @@ _mesa_3dnow_transform_points4_2d: movd (%rsi), %mm0 /* | m00 */ movd 4(%rsi), %mm1 /* | m01 */ - prefetch (%rdx) + prefetcht1 (%rdx) punpckldq 16(%rsi), %mm0 /* m10 | m00 */ .byte 0x66, 0x66, 0x90 /* manual align += 4 */ @@ -443,7 +443,7 @@ _mesa_3dnow_transform_points4_2d: p4_2d_loop: - prefetchw 32(%rdi) /* prefetch 2 vertices ahead */ + prefetcht1 32(%rdi) /* prefetch 2 vertices ahead */ movq (%rdx), %mm3 /* x1 | x0 */ movq 8(%rdx), %mm5 /* x3 | x2 */ @@ -460,7 +460,7 @@ p4_2d_loop: pfacc %mm4, %mm3 /* x0*m01+x1*m11 | x0*m00+x1*m10 */ pfmul %mm2, %mm6 /* x3*m31 | x3*m30 */ - prefetch 32(%rdx) /* hopefully stride is zero */ + prefetcht1 32(%rdx) /* hopefully stride is zero */ pfadd %mm6, %mm3 /* r1 | r0 */