/* * Clip testing in SPARC assembly */ #if __arch64__ #define LDPTR ldx #define MATH_ASM_PTR_SIZE 8 #include "math/m_vector_asm.h" #else #define LDPTR ld #define MATH_ASM_PTR_SIZE 4 #include "math/m_vector_asm.h" #endif #define VEC_SIZE_1 1 #define VEC_SIZE_2 3 #define VEC_SIZE_3 7 #define VEC_SIZE_4 15 .register %g2, #scratch .register %g3, #scratch .text .align 64 one_dot_zero: .word 0x3f800000 /* 1.0f */ /* This trick is shamelessly stolen from the x86 * Mesa asm. Very clever, and we can do it too * since we have the necessary add with carry * instructions on Sparc. */ clip_table: .byte 0, 1, 0, 2, 4, 5, 4, 6 .byte 0, 1, 0, 2, 8, 9, 8, 10 .byte 32, 33, 32, 34, 36, 37, 36, 38 .byte 32, 33, 32, 34, 40, 41, 40, 42 .byte 0, 1, 0, 2, 4, 5, 4, 6 .byte 0, 1, 0, 2, 8, 9, 8, 10 .byte 16, 17, 16, 18, 20, 21, 20, 22 .byte 16, 17, 16, 18, 24, 25, 24, 26 .byte 63, 61, 63, 62, 55, 53, 55, 54 .byte 63, 61, 63, 62, 59, 57, 59, 58 .byte 47, 45, 47, 46, 39, 37, 39, 38 .byte 47, 45, 47, 46, 43, 41, 43, 42 .byte 63, 61, 63, 62, 55, 53, 55, 54 .byte 63, 61, 63, 62, 59, 57, 59, 58 .byte 31, 29, 31, 30, 23, 21, 23, 22 .byte 31, 29, 31, 30, 27, 25, 27, 26 /* GLvector4f *clip_vec, GLvector4f *proj_vec, GLubyte clipMask[], GLubyte *orMask, GLubyte *andMask, GLboolean viewport_z_enable */ .align 64 __pc_tramp: retl nop .globl _mesa_sparc_cliptest_points4 _mesa_sparc_cliptest_points4: save %sp, -64, %sp call __pc_tramp sub %o7, (. - one_dot_zero - 4), %g1 ld [%g1 + 0x0], %f4 add %g1, 0x4, %g1 ld [%i0 + V4F_STRIDE], %l1 ld [%i0 + V4F_COUNT], %l3 LDPTR [%i0 + V4F_START], %i0 LDPTR [%i1 + V4F_START], %i5 ldub [%i3], %g2 ldub [%i4], %g3 sll %g3, 8, %g3 or %g2, %g3, %g2 ld [%i1 + V4F_FLAGS], %g3 or %g3, VEC_SIZE_4, %g3 st %g3, [%i1 + V4F_FLAGS] mov 3, %g3 st %g3, [%i1 + V4F_SIZE] st %l3, [%i1 + V4F_COUNT] clr %l2 clr %l0 /* l0: i * l3: count * l1: stride * l2: c * g2: (tmpAndMask << 8) | tmpOrMask * g1: clip_table * i0: from[stride][i] * i2: clipMask * i5: vProj[4][i] */ 1: ld [%i0 + 0x0c], %f3 ! LSU Group ld [%i0 + 0x0c], %g5 ! LSU Group ld [%i0 + 0x08], %g4 ! LSU Group fdivs %f4, %f3, %f8 ! FGM addcc %g5, %g5, %g5 ! IEU1 Group addx %g0, 0x0, %g3 ! IEU1 Group addcc %g4, %g4, %g4 ! IEU1 Group addx %g3, %g3, %g3 ! IEU1 Group subcc %g5, %g4, %g0 ! IEU1 Group ld [%i0 + 0x04], %g4 ! LSU Group addx %g3, %g3, %g3 ! IEU1 Group addcc %g4, %g4, %g4 ! IEU1 Group addx %g3, %g3, %g3 ! IEU1 Group subcc %g5, %g4, %g0 ! IEU1 Group ld [%i0 + 0x00], %g4 ! LSU Group addx %g3, %g3, %g3 ! IEU1 Group addcc %g4, %g4, %g4 ! IEU1 Group addx %g3, %g3, %g3 ! IEU1 Group subcc %g5, %g4, %g0 ! IEU1 Group addx %g3, %g3, %g3 ! IEU1 Group ldub [%g1 + %g3], %g3 ! LSU Group cmp %g3, 0 ! IEU1 Group, stall be 2f ! CTI stb %g3, [%i2] ! LSU sll %g3, 8, %g4 ! IEU1 Group add %l2, 1, %l2 ! IEU0 st %g0, [%i5 + 0x00] ! LSU or %g4, 0xff, %g4 ! IEU0 Group or %g2, %g3, %g2 ! IEU1 st %g0, [%i5 + 0x04] ! LSU and %g2, %g4, %g2 ! IEU0 Group st %g0, [%i5 + 0x08] ! LSU b 3f ! CTI st %f4, [%i5 + 0x0c] ! LSU Group 2: ld [%i0 + 0x00], %f0 ! LSU Group ld [%i0 + 0x04], %f1 ! LSU Group ld [%i0 + 0x08], %f2 ! LSU Group fmuls %f0, %f8, %f0 ! FGM st %f0, [%i5 + 0x00] ! LSU Group fmuls %f1, %f8, %f1 ! FGM st %f1, [%i5 + 0x04] ! LSU Group fmuls %f2, %f8, %f2 ! FGM st %f2, [%i5 + 0x08] ! LSU Group st %f8, [%i5 + 0x0c] ! LSU Group 3: add %i5, 0x10, %i5 ! IEU1 add %l0, 1, %l0 ! IEU0 Group add %i2, 1, %i2 ! IEU0 Group cmp %l0, %l3 ! IEU1 Group bne 1b ! CTI add %i0, %l1, %i0 ! IEU0 Group stb %g2, [%i3] ! LSU srl %g2, 8, %g3 ! IEU0 Group cmp %l2, %l3 ! IEU1 Group bl,a 1f ! CTI clr %g3 ! IEU0 1: stb %g3, [%i4] ! LSU Group ret ! CTI Group restore %i1, 0x0, %o0 .globl _mesa_sparc_cliptest_points4_np _mesa_sparc_cliptest_points4_np: save %sp, -64, %sp call __pc_tramp sub %o7, (. - one_dot_zero - 4), %g1 add %g1, 0x4, %g1 ld [%i0 + V4F_STRIDE], %l1 ld [%i0 + V4F_COUNT], %l3 LDPTR [%i0 + V4F_START], %i0 ldub [%i3], %g2 ldub [%i4], %g3 sll %g3, 8, %g3 or %g2, %g3, %g2 clr %l2 clr %l0 /* l0: i * l3: count * l1: stride * l2: c * g2: (tmpAndMask << 8) | tmpOrMask * g1: clip_table * i0: from[stride][i] * i2: clipMask */ 1: ld [%i0 + 0x0c], %g5 ! LSU Group ld [%i0 + 0x08], %g4 ! LSU Group addcc %g5, %g5, %g5 ! IEU1 Group addx %g0, 0x0, %g3 ! IEU1 Group addcc %g4, %g4, %g4 ! IEU1 Group addx %g3, %g3, %g3 ! IEU1 Group subcc %g5, %g4, %g0 ! IEU1 Group ld [%i0 + 0x04], %g4 ! LSU Group addx %g3, %g3, %g3 ! IEU1 Group addcc %g4, %g4, %g4 ! IEU1 Group addx %g3, %g3, %g3 ! IEU1 Group subcc %g5, %g4, %g0 ! IEU1 Group ld [%i0 + 0x00], %g4 ! LSU Group addx %g3, %g3, %g3 ! IEU1 Group addcc %g4, %g4, %g4 ! IEU1 Group addx %g3, %g3, %g3 ! IEU1 Group subcc %g5, %g4, %g0 ! IEU1 Group addx %g3, %g3, %g3 ! IEU1 Group ldub [%g1 + %g3], %g3 ! LSU Group cmp %g3, 0 ! IEU1 Group, stall be 2f ! CTI stb %g3, [%i2] ! LSU sll %g3, 8, %g4 ! IEU1 Group add %l2, 1, %l2 ! IEU0 or %g4, 0xff, %g4 ! IEU0 Group or %g2, %g3, %g2 ! IEU1 and %g2, %g4, %g2 ! IEU0 Group 2: add %l0, 1, %l0 ! IEU0 Group add %i2, 1, %i2 ! IEU0 Group cmp %l0, %l3 ! IEU1 Group bne 1b ! CTI add %i0, %l1, %i0 ! IEU0 Group stb %g2, [%i3] ! LSU srl %g2, 8, %g3 ! IEU0 Group cmp %l2, %l3 ! IEU1 Group bl,a 1f ! CTI clr %g3 ! IEU0 1: stb %g3, [%i4] ! LSU Group ret ! CTI Group restore %i1, 0x0, %o0