Merge branch 'mesa_7_7_branch'
[mesa.git] / src / mesa / drivers / dri / unichrome / via_memcpy.c
1 /*
2 * Copyright (C) 2004 Thomas Hellstrom, All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE CODE SUPPLIER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /* Thomas' orginal gutted for mesa by Keith Whitwell
25 */
26
27 #include "via_tex.h"
28
29 #if defined( USE_SSE_ASM )
30
31 #define SSE_PREFETCH " prefetchnta "
32 #define FENCE __asm__ __volatile__ ("sfence":::"memory");
33
34
35 #define PREFETCH1(arch_prefetch,from) \
36 __asm__ __volatile__ ( \
37 "1: " arch_prefetch "(%0)\n" \
38 arch_prefetch "32(%0)\n" \
39 arch_prefetch "64(%0)\n" \
40 arch_prefetch "96(%0)\n" \
41 arch_prefetch "128(%0)\n" \
42 arch_prefetch "160(%0)\n" \
43 arch_prefetch "192(%0)\n" \
44 arch_prefetch "256(%0)\n" \
45 arch_prefetch "288(%0)\n" \
46 "2:\n" \
47 : : "r" (from) );
48
49
50
51 #define small_memcpy(to,from,n) \
52 { \
53 __asm__ __volatile__( \
54 "movl %2,%%ecx\n\t" \
55 "sarl $2,%%ecx\n\t" \
56 "rep ; movsl\n\t" \
57 "testb $2,%b2\n\t" \
58 "je 1f\n\t" \
59 "movsw\n" \
60 "1:\ttestb $1,%b2\n\t" \
61 "je 2f\n\t" \
62 "movsb\n" \
63 "2:" \
64 :"=&D" (to), "=&S" (from) \
65 :"q" (n),"0" ((long) to),"1" ((long) from) \
66 : "%ecx","memory"); \
67 }
68
69
70 #define SSE_CPY(prefetch,from,to,dummy,lcnt) \
71 if ((unsigned long) from & 15) { \
72 __asm__ __volatile__ ( \
73 "1:\n" \
74 prefetch "320(%1)\n" \
75 " movups (%1), %%xmm0\n" \
76 " movups 16(%1), %%xmm1\n" \
77 " movntps %%xmm0, (%0)\n" \
78 " movntps %%xmm1, 16(%0)\n" \
79 prefetch "352(%1)\n" \
80 " movups 32(%1), %%xmm2\n" \
81 " movups 48(%1), %%xmm3\n" \
82 " movntps %%xmm2, 32(%0)\n" \
83 " movntps %%xmm3, 48(%0)\n" \
84 " addl $64,%0\n" \
85 " addl $64,%1\n" \
86 " decl %2\n" \
87 " jne 1b\n" \
88 :"=&D"(to), "=&S"(from), "=&r"(dummy) \
89 :"0" (to), "1" (from), "2" (lcnt): "memory"); \
90 } else { \
91 __asm__ __volatile__ ( \
92 "2:\n" \
93 prefetch "320(%1)\n" \
94 " movaps (%1), %%xmm0\n" \
95 " movaps 16(%1), %%xmm1\n" \
96 " movntps %%xmm0, (%0)\n" \
97 " movntps %%xmm1, 16(%0)\n" \
98 prefetch "352(%1)\n" \
99 " movaps 32(%1), %%xmm2\n" \
100 " movaps 48(%1), %%xmm3\n" \
101 " movntps %%xmm2, 32(%0)\n" \
102 " movntps %%xmm3, 48(%0)\n" \
103 " addl $64,%0\n" \
104 " addl $64,%1\n" \
105 " decl %2\n" \
106 " jne 2b\n" \
107 :"=&D"(to), "=&S"(from), "=&r"(dummy) \
108 :"0" (to), "1" (from), "2" (lcnt): "memory"); \
109 }
110
111
112
113 /*
114 */
115 void via_sse_memcpy(void *to,
116 const void *from,
117 size_t sz)
118
119 {
120 int dummy;
121 int lcnt = sz >> 6;
122 int rest = sz & 63;
123
124 PREFETCH1(SSE_PREFETCH,from);
125
126 if (lcnt > 5) {
127 lcnt -= 5;
128 SSE_CPY(SSE_PREFETCH,from,to,dummy,lcnt);
129 lcnt = 5;
130 }
131 if (lcnt) {
132 SSE_CPY("#",from,to,dummy,lcnt);
133 }
134 if (rest) small_memcpy(to, from, rest);
135 FENCE;
136 }
137
138 #endif /* defined( USE_SSE_ASM ) */