R6xx/R7xx: WIP r6xx-rewrite code
[mesa.git] / src / mesa / drivers / dri / r600 / r600_emit.c
1 /**************************************************************************
2
3 Copyright 2008, 2009 Advanced Micro Devices Inc. (AMD)
4
5 Copyright (C) Advanced Micro Devices Inc. (AMD) 2009. All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining
8 a copy of this software and associated documentation files (the
9 "Software"), to deal in the Software without restriction, including
10 without limitation the rights to use, copy, modify, merge, publish,
11 distribute, sublicense, and/or sell copies of the Software, and to
12 permit persons to whom the Software is furnished to do so, subject to
13 the following conditions:
14
15 The above copyright notice and this permission notice (including the
16 next paragraph) shall be included in all copies or substantial
17 portions of the Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
27 **************************************************************************/
28
29 /*
30 * Authors:
31 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
32 * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
33 */
34
35 #include "main/glheader.h"
36 #include "main/mtypes.h"
37 #include "main/colormac.h"
38 #include "main/imports.h"
39 #include "main/macros.h"
40 #include "main/image.h"
41
42 #include "swrast_setup/swrast_setup.h"
43 #include "math/m_translate.h"
44 #include "tnl/tnl.h"
45 #include "tnl/t_context.h"
46
47 #include "r600_context.h"
48 #include "r600_emit.h"
49
50 #if defined(USE_X86_ASM)
51 #define COPY_DWORDS( dst, src, nr ) \
52 do { \
53 int __tmp; \
54 __asm__ __volatile__( "rep ; movsl" \
55 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
56 : "0" (nr), \
57 "D" ((long)dst), \
58 "S" ((long)src) ); \
59 } while (0)
60 #else
61 #define COPY_DWORDS( dst, src, nr ) \
62 do { \
63 int j; \
64 for ( j = 0 ; j < nr ; j++ ) \
65 dst[j] = ((int *)src)[j]; \
66 dst += nr; \
67 } while (0)
68 #endif
69
70 static void r600EmitVec4(uint32_t *out, GLvoid * data, int stride, int count)
71 {
72 int i;
73
74 if (RADEON_DEBUG & DEBUG_VERTS)
75 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
76 __FUNCTION__, count, stride, (void *)out, (void *)data);
77
78 if (stride == 4)
79 COPY_DWORDS(out, data, count);
80 else
81 for (i = 0; i < count; i++) {
82 out[0] = *(int *)data;
83 out++;
84 data += stride;
85 }
86 }
87
88 static void r600EmitVec8(uint32_t *out, GLvoid * data, int stride, int count)
89 {
90 int i;
91
92 if (RADEON_DEBUG & DEBUG_VERTS)
93 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
94 __FUNCTION__, count, stride, (void *)out, (void *)data);
95
96 if (stride == 8)
97 COPY_DWORDS(out, data, count * 2);
98 else
99 for (i = 0; i < count; i++) {
100 out[0] = *(int *)data;
101 out[1] = *(int *)(data + 4);
102 out += 2;
103 data += stride;
104 }
105 }
106
107 static void r600EmitVec12(uint32_t *out, GLvoid * data, int stride, int count)
108 {
109 int i;
110
111 if (RADEON_DEBUG & DEBUG_VERTS)
112 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
113 __FUNCTION__, count, stride, (void *)out, (void *)data);
114
115 if (stride == 12) {
116 COPY_DWORDS(out, data, count * 3);
117 }
118 else
119 for (i = 0; i < count; i++) {
120 out[0] = *(int *)data;
121 out[1] = *(int *)(data + 4);
122 out[2] = *(int *)(data + 8);
123 out += 3;
124 data += stride;
125 }
126 }
127
128 static void r600EmitVec16(uint32_t *out, GLvoid * data, int stride, int count)
129 {
130 int i;
131
132 if (RADEON_DEBUG & DEBUG_VERTS)
133 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
134 __FUNCTION__, count, stride, (void *)out, (void *)data);
135
136 if (stride == 16)
137 COPY_DWORDS(out, data, count * 4);
138 else
139 for (i = 0; i < count; i++) {
140 out[0] = *(int *)data;
141 out[1] = *(int *)(data + 4);
142 out[2] = *(int *)(data + 8);
143 out[3] = *(int *)(data + 12);
144 out += 4;
145 data += stride;
146 }
147 }
148
149 /* Emit vertex data to GART memory
150 * Route inputs to the vertex processor
151 * This function should never return R600_FALLBACK_TCL when using software tcl.
152 */
153 int r600EmitArrays(GLcontext * ctx)
154 {
155
156 return R600_FALLBACK_NONE;
157 }
158
159 void r600EmitCacheFlush(r600ContextPtr rmesa)
160 {
161 BATCH_LOCALS(&rmesa->radeon);
162 /*
163 BEGIN_BATCH_NO_AUTOSTATE(4);
164 OUT_BATCH_REGVAL(R600_RB3D_DSTCACHE_CTLSTAT,
165 R600_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
166 R600_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
167 OUT_BATCH_REGVAL(R600_ZB_ZCACHE_CTLSTAT,
168 R600_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
169 R600_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
170 END_BATCH();
171 COMMIT_BATCH();
172 */
173 }
174
175 GLboolean r600EmitShader(GLcontext * ctx,
176 void ** shaderbo,
177 GLvoid * data,
178 int sizeinDWORD)
179 {
180 radeonContextPtr radeonctx = RADEON_CONTEXT(ctx);
181
182 struct radeon_bo * pbo;
183 uint32_t *out;
184
185 shader_again_alloc:
186 pbo = radeon_bo_open(radeonctx->radeonScreen->bom,
187 0,
188 sizeinDWORD * 4,
189 256,
190 RADEON_GEM_DOMAIN_GTT,
191 0);
192
193 if (!pbo)
194 {
195 rcommonFlushCmdBuf(radeonctx, __FUNCTION__);
196 goto shader_again_alloc;
197 }
198
199 radeon_validate_bo(radeonctx, pbo, RADEON_GEM_DOMAIN_GTT, 0);
200
201 if (radeon_revalidate_bos(radeonctx->glCtx) == GL_FALSE)
202 {
203 fprintf(stderr,"failure to revalidate BOs - badness\n");
204 }
205
206 radeon_bo_map(pbo, 1);
207
208 radeon_bo_ref(pbo);
209
210 out = (uint32_t*)(pbo->ptr);
211
212 memcpy(out, data, sizeinDWORD * 4);
213
214 *shaderbo = (void*)pbo;
215
216 return GL_TRUE;
217 }
218
219 GLboolean r600DeleteShader(GLcontext * ctx,
220 void * shaderbo)
221 {
222 struct radeon_bo * pbo = (struct radeon_bo *)shaderbo;
223
224 radeon_bo_unmap(pbo);
225 radeon_bo_unref(pbo); /* when bo->cref <= 0, bo will be bo_free */
226
227 return GL_TRUE;
228 }
229
230 GLboolean r600EmitVec(GLcontext * ctx,
231 struct radeon_aos *aos,
232 GLvoid * data,
233 int size,
234 int stride,
235 int count)
236 {
237 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
238 uint32_t *out;
239
240 if (stride == 0)
241 {
242 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
243 aos->stride = 0;
244 }
245 else
246 {
247 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
248 aos->stride = size;
249 }
250
251 aos->components = size;
252 aos->count = count;
253
254 out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
255 switch (size) {
256 case 1: r600EmitVec4(out, data, stride, count); break;
257 case 2: r600EmitVec8(out, data, stride, count); break;
258 case 3: r600EmitVec12(out, data, stride, count); break;
259 case 4: r600EmitVec16(out, data, stride, count); break;
260 default:
261 assert(0);
262 break;
263 }
264
265 return GL_TRUE;
266 }
267
268 void r600ReleaseVec(GLcontext * ctx)
269 {
270 radeonReleaseArrays(ctx, ~0);
271 }
272
273 void r600FreeDmaRegion(context_t *context,
274 void * shaderbo)
275 {
276 struct radeon_bo *pbo = (struct radeon_bo *)shaderbo;
277 if(pbo)
278 {
279 radeon_bo_unref(pbo);
280 }
281 }