projects
/
mesa.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
mesa: add support for CET to x86/x86-64 asm files.
[mesa.git]
/
src
/
mesa
/
x86-64
/
xform4.S
diff --git
a/src/mesa/x86-64/xform4.S
b/src/mesa/x86-64/xform4.S
index 805969127db5f4ed2663b5122099da017f1704b2..e36a6276d2ae902f45ab451eb8262f27e9eb00b4 100644
(file)
--- a/
src/mesa/x86-64/xform4.S
+++ b/
src/mesa/x86-64/xform4.S
@@
-1,6
+1,5
@@
/*
* Mesa 3-D graphics library
/*
* Mesa 3-D graphics library
- * Version: 7.1
*
* Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
*
*
* Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
*
@@
-17,20
+16,29
@@
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
*/
*/
+#ifdef HAVE_CET_H
+#include <cet.h>
+#else
+#define _CET_ENDBR
+#endif
#ifdef USE_X86_64_ASM
#ifdef USE_X86_64_ASM
-#include "matypes.h"
+#define MATH_ASM_PTR_SIZE 8
+#include "math/m_vector_asm.h"
.text
.align 16
.globl _mesa_x86_64_cpuid
.text
.align 16
.globl _mesa_x86_64_cpuid
+.hidden _mesa_x86_64_cpuid
_mesa_x86_64_cpuid:
_mesa_x86_64_cpuid:
+ _CET_ENDBR
pushq %rbx
movl (%rdi), %eax
movl 8(%rdi), %ecx
pushq %rbx
movl (%rdi), %eax
movl 8(%rdi), %ecx
@@
-46,14
+54,16
@@
_mesa_x86_64_cpuid:
.align 16
.globl _mesa_x86_64_transform_points4_general
.align 16
.globl _mesa_x86_64_transform_points4_general
+.hidden _mesa_x86_64_transform_points4_general
_mesa_x86_64_transform_points4_general:
/*
* rdi = dest
* rsi = matrix
* rdx = source
*/
_mesa_x86_64_transform_points4_general:
/*
* rdi = dest
* rsi = matrix
* rdx = source
*/
+ _CET_ENDBR
movl V4F_COUNT(%rdx), %ecx /* count */
movl V4F_COUNT(%rdx), %ecx /* count */
- movz
x V4F_STRIDE(%rdx), %eax
/* stride */
+ movz
bl V4F_STRIDE(%rdx), %eax
/* stride */
movl %ecx, V4F_COUNT(%rdi) /* set dest count */
movl $4, V4F_SIZE(%rdi) /* set dest size */
movl %ecx, V4F_COUNT(%rdi) /* set dest count */
movl $4, V4F_SIZE(%rdi) /* set dest size */
@@
-67,7
+77,7
@@
_mesa_x86_64_transform_points4_general:
movq V4F_START(%rdx), %rdx /* ptr to first src vertex */
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
movq V4F_START(%rdx), %rdx /* ptr to first src vertex */
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
- prefetch 16(%rdx)
+ prefetch
t1
16(%rdx)
movaps 0(%rsi), %xmm4 /* m3 | m2 | m1 | m0 */
movaps 16(%rsi), %xmm5 /* m7 | m6 | m5 | m4 */
movaps 0(%rsi), %xmm4 /* m3 | m2 | m1 | m0 */
movaps 16(%rsi), %xmm5 /* m7 | m6 | m5 | m4 */
@@
-78,7
+88,7
@@
_mesa_x86_64_transform_points4_general:
p4_general_loop:
movups (%rdx), %xmm8 /* ox | oy | oz | ow */
p4_general_loop:
movups (%rdx), %xmm8 /* ox | oy | oz | ow */
- prefetch
w
16(%rdi)
+ prefetch
t1
16(%rdi)
pshufd $0x00, %xmm8, %xmm0 /* ox | ox | ox | ox */
addq %rax, %rdx
pshufd $0x00, %xmm8, %xmm0 /* ox | ox | ox | ox */
addq %rax, %rdx
@@
-91,7
+101,7
@@
p4_general_loop:
addps %xmm1, %xmm0 /* ox*m3+oy*m7 | ... */
mulps %xmm7, %xmm3 /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */
addps %xmm2, %xmm0 /* ox*m3+oy*m7+oz*m11 | ... */
addps %xmm1, %xmm0 /* ox*m3+oy*m7 | ... */
mulps %xmm7, %xmm3 /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */
addps %xmm2, %xmm0 /* ox*m3+oy*m7+oz*m11 | ... */
- prefetch 16(%rdx)
+ prefetch
t1
16(%rdx)
addps %xmm3, %xmm0 /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */
movaps %xmm0, (%rdi) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */
addps %xmm3, %xmm0 /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */
movaps %xmm0, (%rdi) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */
@@
-116,17
+126,18
@@
p4_constants:
.byte 0x00, 0x00, 0x00, 0x00
.byte 0x00, 0x00, 0x00, 0x00
.byte 0x00, 0x00, 0x00, 0x00
.byte 0x00, 0x00, 0x00, 0x00
.byte 0x00, 0x00, 0x00, 0x00
.byte 0x00, 0x00, 0x00, 0x00
-.float
0f+
1.0
+.float 1.0
.text
.align 16
.globl _mesa_x86_64_transform_points4_3d
.text
.align 16
.globl _mesa_x86_64_transform_points4_3d
+.hidden _mesa_x86_64_transform_points4_3d
/*
* this is slower than _mesa_x86_64_transform_points4_general
* because it ensures that the last matrix row (or is it column?) is 0,0,0,1
*/
_mesa_x86_64_transform_points4_3d:
/*
* this is slower than _mesa_x86_64_transform_points4_general
* because it ensures that the last matrix row (or is it column?) is 0,0,0,1
*/
_mesa_x86_64_transform_points4_3d:
-
+ _CET_ENDBR
leaq p4_constants(%rip), %rax
prefetchnta 64(%rsi)
leaq p4_constants(%rip), %rax
prefetchnta 64(%rsi)
@@
-135,7
+146,7
@@
_mesa_x86_64_transform_points4_3d:
movaps 16(%rax), %xmm10
movl V4F_COUNT(%rdx), %ecx /* count */
movaps 16(%rax), %xmm10
movl V4F_COUNT(%rdx), %ecx /* count */
- movz
x V4F_STRIDE(%rdx), %eax
/* stride */
+ movz
bl V4F_STRIDE(%rdx), %eax
/* stride */
movl %ecx, V4F_COUNT(%rdi) /* set dest count */
movl $4, V4F_SIZE(%rdi) /* set dest size */
movl %ecx, V4F_COUNT(%rdi) /* set dest count */
movl $4, V4F_SIZE(%rdi) /* set dest size */
@@
-147,7
+158,7
@@
_mesa_x86_64_transform_points4_3d:
movq V4F_START(%rdx), %rdx /* ptr to first src vertex */
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
movq V4F_START(%rdx), %rdx /* ptr to first src vertex */
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
- prefetch 16(%rdx)
+ prefetch
t1
16(%rdx)
movaps 0(%rsi), %xmm4 /* m3 | m2 | m1 | m0 */
movaps 16(%rsi), %xmm5 /* m7 | m6 | m5 | m4 */
movaps 0(%rsi), %xmm4 /* m3 | m2 | m1 | m0 */
movaps 16(%rsi), %xmm5 /* m7 | m6 | m5 | m4 */
@@
-163,7
+174,7
@@
_mesa_x86_64_transform_points4_3d:
p4_3d_loop:
movups (%rdx), %xmm8 /* ox | oy | oz | ow */
p4_3d_loop:
movups (%rdx), %xmm8 /* ox | oy | oz | ow */
- prefetch
w
16(%rdi)
+ prefetch
t1
16(%rdi)
pshufd $0x00, %xmm8, %xmm0 /* ox | ox | ox | ox */
addq %rax, %rdx
pshufd $0x00, %xmm8, %xmm0 /* ox | ox | ox | ox */
addq %rax, %rdx
@@
-176,7
+187,7
@@
p4_3d_loop:
addps %xmm1, %xmm0 /* ox*m3+oy*m7 | ... */
mulps %xmm7, %xmm3 /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */
addps %xmm2, %xmm0 /* ox*m3+oy*m7+oz*m11 | ... */
addps %xmm1, %xmm0 /* ox*m3+oy*m7 | ... */
mulps %xmm7, %xmm3 /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */
addps %xmm2, %xmm0 /* ox*m3+oy*m7+oz*m11 | ... */
- prefetch 16(%rdx)
+ prefetch
t1
16(%rdx)
addps %xmm3, %xmm0 /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */
movaps %xmm0, (%rdi) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */
addps %xmm3, %xmm0 /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */
movaps %xmm0, (%rdi) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */
@@
-192,10
+203,11
@@
p4_3d_done:
.align 16
.globl _mesa_x86_64_transform_points4_identity
.align 16
.globl _mesa_x86_64_transform_points4_identity
+.hidden _mesa_x86_64_transform_points4_identity
_mesa_x86_64_transform_points4_identity:
_mesa_x86_64_transform_points4_identity:
-
+ _CET_ENDBR
movl V4F_COUNT(%rdx), %ecx /* count */
movl V4F_COUNT(%rdx), %ecx /* count */
- movz
x V4F_STRIDE(%rdx), %eax
/* stride */
+ movz
bl V4F_STRIDE(%rdx), %eax
/* stride */
movl %ecx, V4F_COUNT(%rdi) /* set dest count */
movl $4, V4F_SIZE(%rdi) /* set dest size */
movl %ecx, V4F_COUNT(%rdi) /* set dest count */
movl $4, V4F_SIZE(%rdi) /* set dest size */
@@
-206,8
+218,8
@@
_mesa_x86_64_transform_points4_identity:
movq V4F_START(%rdx), %rsi /* ptr to first src vertex */
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
movq V4F_START(%rdx), %rsi /* ptr to first src vertex */
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
- prefetch 64(%rsi)
- prefetch
w
64(%rdi)
+ prefetch
t1
64(%rsi)
+ prefetch
t1
64(%rdi)
add %ecx, %ecx
add %ecx, %ecx
@@
-220,10
+232,11
@@
p4_identity_done:
.align 16
.globl _mesa_3dnow_transform_points4_3d_no_rot
.align 16
.globl _mesa_3dnow_transform_points4_3d_no_rot
+.hidden _mesa_3dnow_transform_points4_3d_no_rot
_mesa_3dnow_transform_points4_3d_no_rot:
_mesa_3dnow_transform_points4_3d_no_rot:
-
+ _CET_ENDBR
movl V4F_COUNT(%rdx), %ecx /* count */
movl V4F_COUNT(%rdx), %ecx /* count */
- movz
x V4F_STRIDE(%rdx), %eax
/* stride */
+ movz
bl V4F_STRIDE(%rdx), %eax
/* stride */
movl %ecx, V4F_COUNT(%rdi) /* set dest count */
movl $4, V4F_SIZE(%rdi) /* set dest size */
movl %ecx, V4F_COUNT(%rdi) /* set dest count */
movl $4, V4F_SIZE(%rdi) /* set dest size */
@@
-237,7
+250,7
@@
_mesa_3dnow_transform_points4_3d_no_rot:
movq V4F_START(%rdx), %rdx /* ptr to first src vertex */
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
movq V4F_START(%rdx), %rdx /* ptr to first src vertex */
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
- prefetch (%rdx)
+ prefetch
t1
(%rdx)
movd (%rsi), %mm0 /* | m00 */
.byte 0x66, 0x66, 0x90 /* manual align += 3 */
movd (%rsi), %mm0 /* | m00 */
.byte 0x66, 0x66, 0x90 /* manual align += 3 */
@@
-250,7
+263,7
@@
_mesa_3dnow_transform_points4_3d_no_rot:
p4_3d_no_rot_loop:
p4_3d_no_rot_loop:
- prefetch
w
32(%rdi)
+ prefetch
t1
32(%rdi)
movq (%rdx), %mm4 /* x1 | x0 */
movq 8(%rdx), %mm5 /* x3 | x2 */
movq (%rdx), %mm4 /* x1 | x0 */
movq 8(%rdx), %mm5 /* x3 | x2 */
@@
-274,7
+287,7
@@
p4_3d_no_rot_loop:
addq $16, %rdi
decl %ecx
addq $16, %rdi
decl %ecx
- prefetch 32(%rdx)
+ prefetch
t1
32(%rdx)
jnz p4_3d_no_rot_loop
p4_3d_no_rot_done:
jnz p4_3d_no_rot_loop
p4_3d_no_rot_done:
@@
-284,10
+297,11
@@
p4_3d_no_rot_done:
.align 16
.globl _mesa_3dnow_transform_points4_perspective
.align 16
.globl _mesa_3dnow_transform_points4_perspective
+.hidden _mesa_3dnow_transform_points4_perspective
_mesa_3dnow_transform_points4_perspective:
_mesa_3dnow_transform_points4_perspective:
-
+ _CET_ENDBR
movl V4F_COUNT(%rdx), %ecx /* count */
movl V4F_COUNT(%rdx), %ecx /* count */
- movz
x V4F_STRIDE(%rdx), %eax
/* stride */
+ movz
bl V4F_STRIDE(%rdx), %eax
/* stride */
movl %ecx, V4F_COUNT(%rdi) /* set dest count */
movl $4, V4F_SIZE(%rdi) /* set dest size */
movl %ecx, V4F_COUNT(%rdi) /* set dest count */
movl $4, V4F_SIZE(%rdi) /* set dest size */
@@
-305,7
+319,7
@@
_mesa_3dnow_transform_points4_perspective:
punpckldq 20(%rsi), %mm0 /* m11 | m00 */
movq 32(%rsi), %mm2 /* m21 | m20 */
punpckldq 20(%rsi), %mm0 /* m11 | m00 */
movq 32(%rsi), %mm2 /* m21 | m20 */
- prefetch (%rdx)
+ prefetch
t1
(%rdx)
movd 40(%rsi), %mm1 /* | m22 */
movd 40(%rsi), %mm1 /* | m22 */
@@
-315,7
+329,7
@@
_mesa_3dnow_transform_points4_perspective:
p4_perspective_loop:
p4_perspective_loop:
- prefetch
w 32(%rdi)
/* prefetch 2 vertices ahead */
+ prefetch
t1 32(%rdi)
/* prefetch 2 vertices ahead */
movq (%rdx), %mm4 /* x1 | x0 */
movq 8(%rdx), %mm5 /* x3 | x2 */
movq (%rdx), %mm4 /* x1 | x0 */
movq 8(%rdx), %mm5 /* x3 | x2 */
@@
-341,7
+355,7
@@
p4_perspective_loop:
addq $16, %rdi
decl %ecx
addq $16, %rdi
decl %ecx
- prefetch
32(%rdx)
/* hopefully stride is zero */
+ prefetch
t1 32(%rdx)
/* hopefully stride is zero */
jnz p4_perspective_loop
p4_perspective_done:
jnz p4_perspective_loop
p4_perspective_done:
@@
-350,10
+364,11
@@
p4_perspective_done:
.align 16
.globl _mesa_3dnow_transform_points4_2d_no_rot
.align 16
.globl _mesa_3dnow_transform_points4_2d_no_rot
+.hidden _mesa_3dnow_transform_points4_2d_no_rot
_mesa_3dnow_transform_points4_2d_no_rot:
_mesa_3dnow_transform_points4_2d_no_rot:
-
+ _CET_ENDBR
movl V4F_COUNT(%rdx), %ecx /* count */
movl V4F_COUNT(%rdx), %ecx /* count */
- movz
x V4F_STRIDE(%rdx), %eax
/* stride */
+ movz
bl V4F_STRIDE(%rdx), %eax
/* stride */
movl %ecx, V4F_COUNT(%rdi) /* set dest count */
movl $4, V4F_SIZE(%rdi) /* set dest size */
movl %ecx, V4F_COUNT(%rdi) /* set dest count */
movl $4, V4F_SIZE(%rdi) /* set dest size */
@@
-367,14
+382,14
@@
_mesa_3dnow_transform_points4_2d_no_rot:
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
movd (%rsi), %mm0 /* | m00 */
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
movd (%rsi), %mm0 /* | m00 */
- prefetch (%rdx)
+ prefetch
t1
(%rdx)
punpckldq 20(%rsi), %mm0 /* m11 | m00 */
movq 48(%rsi), %mm1 /* m31 | m30 */
p4_2d_no_rot_loop:
punpckldq 20(%rsi), %mm0 /* m11 | m00 */
movq 48(%rsi), %mm1 /* m31 | m30 */
p4_2d_no_rot_loop:
- prefetch
w 32(%rdi)
/* prefetch 2 vertices ahead */
+ prefetch
t1 32(%rdi)
/* prefetch 2 vertices ahead */
movq (%rdx), %mm4 /* x1 | x0 */
movq 8(%rdx), %mm5 /* x3 | x2 */
movq (%rdx), %mm4 /* x1 | x0 */
movq 8(%rdx), %mm5 /* x3 | x2 */
@@
-387,7
+402,7
@@
p4_2d_no_rot_loop:
addq %rax, %rdx
pfmul %mm1, %mm6 /* x3*m31 | x3*m30 */
addq %rax, %rdx
pfmul %mm1, %mm6 /* x3*m31 | x3*m30 */
- prefetch
32(%rdx)
/* hopefully stride is zero */
+ prefetch
t1 32(%rdx)
/* hopefully stride is zero */
pfadd %mm4, %mm6 /* x1*m11+x3*m31 | x0*m00+x3*m30 */
movq %mm6, (%rdi) /* write r0, r1 */
pfadd %mm4, %mm6 /* x1*m11+x3*m31 | x0*m00+x3*m30 */
movq %mm6, (%rdi) /* write r0, r1 */
@@
-405,10
+420,11
@@
p4_2d_no_rot_done:
.align 16
.globl _mesa_3dnow_transform_points4_2d
.align 16
.globl _mesa_3dnow_transform_points4_2d
+.hidden _mesa_3dnow_transform_points4_2d
_mesa_3dnow_transform_points4_2d:
_mesa_3dnow_transform_points4_2d:
-
+ _CET_ENDBR
movl V4F_COUNT(%rdx), %ecx /* count */
movl V4F_COUNT(%rdx), %ecx /* count */
- movz
x V4F_STRIDE(%rdx), %eax
/* stride */
+ movz
bl V4F_STRIDE(%rdx), %eax
/* stride */
movl %ecx, V4F_COUNT(%rdi) /* set dest count */
movl $4, V4F_SIZE(%rdi) /* set dest size */
movl %ecx, V4F_COUNT(%rdi) /* set dest count */
movl $4, V4F_SIZE(%rdi) /* set dest size */
@@
-425,7
+441,7
@@
_mesa_3dnow_transform_points4_2d:
movd (%rsi), %mm0 /* | m00 */
movd 4(%rsi), %mm1 /* | m01 */
movd (%rsi), %mm0 /* | m00 */
movd 4(%rsi), %mm1 /* | m01 */
- prefetch (%rdx)
+ prefetch
t1
(%rdx)
punpckldq 16(%rsi), %mm0 /* m10 | m00 */
.byte 0x66, 0x66, 0x90 /* manual align += 4 */
punpckldq 16(%rsi), %mm0 /* m10 | m00 */
.byte 0x66, 0x66, 0x90 /* manual align += 4 */
@@
-435,7
+451,7
@@
_mesa_3dnow_transform_points4_2d:
p4_2d_loop:
p4_2d_loop:
- prefetch
w 32(%rdi)
/* prefetch 2 vertices ahead */
+ prefetch
t1 32(%rdi)
/* prefetch 2 vertices ahead */
movq (%rdx), %mm3 /* x1 | x0 */
movq 8(%rdx), %mm5 /* x3 | x2 */
movq (%rdx), %mm3 /* x1 | x0 */
movq 8(%rdx), %mm5 /* x3 | x2 */
@@
-452,7
+468,7
@@
p4_2d_loop:
pfacc %mm4, %mm3 /* x0*m01+x1*m11 | x0*m00+x1*m10 */
pfmul %mm2, %mm6 /* x3*m31 | x3*m30 */
pfacc %mm4, %mm3 /* x0*m01+x1*m11 | x0*m00+x1*m10 */
pfmul %mm2, %mm6 /* x3*m31 | x3*m30 */
- prefetch
32(%rdx)
/* hopefully stride is zero */
+ prefetch
t1 32(%rdx)
/* hopefully stride is zero */
pfadd %mm6, %mm3 /* r1 | r0 */
pfadd %mm6, %mm3 /* r1 | r0 */