r300: Cleaned up t_vir0 and t_vir1 slightly.
[mesa.git] / src / mesa / drivers / dri / r300 / r300_emit.c
1 /*
2 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
3
4 The Weather Channel (TM) funded Tungsten Graphics to develop the
5 initial release of the Radeon 8500 driver under the XFree86 license.
6 This notice must be preserved.
7
8 Permission is hereby granted, free of charge, to any person obtaining
9 a copy of this software and associated documentation files (the
10 "Software"), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
15
16 The above copyright notice and this permission notice (including the
17 next paragraph) shall be included in all copies or substantial
18 portions of the Software.
19
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
24 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
28 **************************************************************************/
29
30 /**
31 * \file
32 *
33 * \author Keith Whitwell <keith@tungstengraphics.com>
34 */
35
36 #include "glheader.h"
37 #include "mtypes.h"
38 #include "colormac.h"
39 #include "imports.h"
40 #include "macros.h"
41 #include "image.h"
42
43 #include "swrast_setup/swrast_setup.h"
44 #include "math/m_translate.h"
45 #include "tnl/tnl.h"
46 #include "tnl/t_context.h"
47
48 #include "r300_context.h"
49 #include "radeon_ioctl.h"
50 #include "r300_state.h"
51 #include "r300_emit.h"
52 #include "r300_ioctl.h"
53
54 #ifdef USER_BUFFERS
55 #include "r300_mem.h"
56 #endif
57
58 #if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \
59 SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \
60 SWIZZLE_Z != R300_INPUT_ROUTE_SELECT_Z || \
61 SWIZZLE_W != R300_INPUT_ROUTE_SELECT_W || \
62 SWIZZLE_ZERO != R300_INPUT_ROUTE_SELECT_ZERO || \
63 SWIZZLE_ONE != R300_INPUT_ROUTE_SELECT_ONE
64 #error Cannot change these!
65 #endif
66
67 #define DEBUG_ALL DEBUG_VERTS
68
69 #if defined(USE_X86_ASM)
70 #define COPY_DWORDS( dst, src, nr ) \
71 do { \
72 int __tmp; \
73 __asm__ __volatile__( "rep ; movsl" \
74 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
75 : "0" (nr), \
76 "D" ((long)dst), \
77 "S" ((long)src) ); \
78 } while (0)
79 #else
80 #define COPY_DWORDS( dst, src, nr ) \
81 do { \
82 int j; \
83 for ( j = 0 ; j < nr ; j++ ) \
84 dst[j] = ((int *)src)[j]; \
85 dst += nr; \
86 } while (0)
87 #endif
88
89 static void r300EmitVec4(GLcontext * ctx,
90 struct r300_dma_region *rvb,
91 GLvoid * data, int stride, int count)
92 {
93 int i;
94 int *out = (int *)(rvb->address + rvb->start);
95
96 if (RADEON_DEBUG & DEBUG_VERTS)
97 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
98 __FUNCTION__, count, stride, (void *)out, (void *)data);
99
100 if (stride == 4)
101 COPY_DWORDS(out, data, count);
102 else
103 for (i = 0; i < count; i++) {
104 out[0] = *(int *)data;
105 out++;
106 data += stride;
107 }
108 }
109
110 static void r300EmitVec8(GLcontext * ctx,
111 struct r300_dma_region *rvb,
112 GLvoid * data, int stride, int count)
113 {
114 int i;
115 int *out = (int *)(rvb->address + rvb->start);
116
117 if (RADEON_DEBUG & DEBUG_VERTS)
118 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
119 __FUNCTION__, count, stride, (void *)out, (void *)data);
120
121 if (stride == 8)
122 COPY_DWORDS(out, data, count * 2);
123 else
124 for (i = 0; i < count; i++) {
125 out[0] = *(int *)data;
126 out[1] = *(int *)(data + 4);
127 out += 2;
128 data += stride;
129 }
130 }
131
132 static void r300EmitVec12(GLcontext * ctx,
133 struct r300_dma_region *rvb,
134 GLvoid * data, int stride, int count)
135 {
136 int i;
137 int *out = (int *)(rvb->address + rvb->start);
138
139 if (RADEON_DEBUG & DEBUG_VERTS)
140 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
141 __FUNCTION__, count, stride, (void *)out, (void *)data);
142
143 if (stride == 12)
144 COPY_DWORDS(out, data, count * 3);
145 else
146 for (i = 0; i < count; i++) {
147 out[0] = *(int *)data;
148 out[1] = *(int *)(data + 4);
149 out[2] = *(int *)(data + 8);
150 out += 3;
151 data += stride;
152 }
153 }
154
155 static void r300EmitVec16(GLcontext * ctx,
156 struct r300_dma_region *rvb,
157 GLvoid * data, int stride, int count)
158 {
159 int i;
160 int *out = (int *)(rvb->address + rvb->start);
161
162 if (RADEON_DEBUG & DEBUG_VERTS)
163 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
164 __FUNCTION__, count, stride, (void *)out, (void *)data);
165
166 if (stride == 16)
167 COPY_DWORDS(out, data, count * 4);
168 else
169 for (i = 0; i < count; i++) {
170 out[0] = *(int *)data;
171 out[1] = *(int *)(data + 4);
172 out[2] = *(int *)(data + 8);
173 out[3] = *(int *)(data + 12);
174 out += 4;
175 data += stride;
176 }
177 }
178
179 static void r300EmitVec(GLcontext * ctx,
180 struct r300_dma_region *rvb,
181 GLvoid * data, int size, int stride, int count)
182 {
183 r300ContextPtr rmesa = R300_CONTEXT(ctx);
184
185 if (stride == 0) {
186 r300AllocDmaRegion(rmesa, rvb, size * 4, 4);
187 count = 1;
188 rvb->aos_offset = GET_START(rvb);
189 rvb->aos_stride = 0;
190 } else {
191 r300AllocDmaRegion(rmesa, rvb, size * count * 4, 4);
192 rvb->aos_offset = GET_START(rvb);
193 rvb->aos_stride = size;
194 }
195
196 switch (size) {
197 case 1:
198 r300EmitVec4(ctx, rvb, data, stride, count);
199 break;
200 case 2:
201 r300EmitVec8(ctx, rvb, data, stride, count);
202 break;
203 case 3:
204 r300EmitVec12(ctx, rvb, data, stride, count);
205 break;
206 case 4:
207 r300EmitVec16(ctx, rvb, data, stride, count);
208 break;
209 default:
210 assert(0);
211 _mesa_exit(-1);
212 break;
213 }
214
215 }
216
217 /* TODO: explain this... */
218 #define R300_VIR0_AOS_SIZE_SHIFT 0
219 #define R300_VIR0_AOS_INPUT_SHIFT 8
220 #define R300_VIR0_AOS_STOP_SHIFT 13
221 #define R300_VIR0_AOS_TYPE_SHIFT 14
222 #define R300_VIR0_HIGH_SHIFT 16
223
224 /*
225 * pack 4 elemets in a 16 bit integer.
226 *
227 * aos_size first 8
228 * input next 5
229 * 1 stop bit (whild gues)
230 * aos_type last 2
231 */
232 static inline GLuint t_vir_pack(GLvector4f ** dt, int *inputs, int i)
233 {
234 GLuint dw;
235 dw = (dt[i]->size - 1) << R300_VIR0_AOS_SIZE_SHIFT;
236 dw |= inputs[i] << R300_VIR0_AOS_INPUT_SHIFT;
237 #if 0
238 dw |= t_type(&dt[i]) << R300_VIR0_AOS_TYPE_SHIFT;
239 #endif
240 return dw;
241 }
242
243 static GLuint t_vir0(uint32_t * dst, GLvector4f ** dt, int *inputs,
244 GLint * tab, GLuint nr)
245 {
246 GLuint i, dw;
247
248 for (i = 0; i + 1 < nr; i += 2) {
249 dw = t_vir_pack(dt, inputs, tab[i]);
250 dw |= t_vir_pack(dt, inputs, tab[i + 1]) << R300_VIR0_HIGH_SHIFT;
251 if (i + 2 == nr) {
252 dw |= (1 << (R300_VIR0_AOS_STOP_SHIFT + R300_VIR0_HIGH_SHIFT));
253 }
254 dst[i >> 1] = dw;
255 }
256
257 if (nr & 1) {
258 dw = t_vir_pack(dt, inputs, tab[nr - 1]);
259 dw |= 1 << R300_VIR0_AOS_STOP_SHIFT;
260 dst[nr >> 1] = dw;
261 }
262
263 return (nr + 1) >> 1;
264 }
265
266 static GLuint t_swizzle(int swizzle[4])
267 {
268 return (swizzle[0] << R300_INPUT_ROUTE_X_SHIFT) |
269 (swizzle[1] << R300_INPUT_ROUTE_Y_SHIFT) |
270 (swizzle[2] << R300_INPUT_ROUTE_Z_SHIFT) |
271 (swizzle[3] << R300_INPUT_ROUTE_W_SHIFT);
272 }
273
274 static GLuint t_vir1(uint32_t * dst, int swizzle[][4], GLuint nr)
275 {
276 GLuint i;
277
278 for (i = 0; i + 1 < nr; i += 2) {
279 dst[i >> 1] = t_swizzle(swizzle[i]) | R300_INPUT_ROUTE_ENABLE;
280 dst[i >> 1] |= (t_swizzle(swizzle[i + 1]) | R300_INPUT_ROUTE_ENABLE) << 16;
281 }
282
283 if (nr & 1) {
284 dst[nr >> 1] = t_swizzle(swizzle[nr - 1]) | R300_INPUT_ROUTE_ENABLE;
285 }
286
287 return (nr + 1) >> 1;
288 }
289
290 static GLuint t_vic(GLcontext * ctx, GLuint InputsRead)
291 {
292 r300ContextPtr r300 = R300_CONTEXT(ctx);
293 GLuint i, vic_1 = 0;
294
295 if (InputsRead & (1 << VERT_ATTRIB_POS))
296 vic_1 |= R300_INPUT_CNTL_POS;
297
298 if (InputsRead & (1 << VERT_ATTRIB_NORMAL))
299 vic_1 |= R300_INPUT_CNTL_NORMAL;
300
301 if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
302 vic_1 |= R300_INPUT_CNTL_COLOR;
303
304 r300->state.texture.tc_count = 0;
305 for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
306 if (InputsRead & (1 << (VERT_ATTRIB_TEX0 + i))) {
307 r300->state.texture.tc_count++;
308 vic_1 |= R300_INPUT_CNTL_TC0 << i;
309 }
310
311 return vic_1;
312 }
313
314 /* Emit vertex data to GART memory
315 * Route inputs to the vertex processor
316 * This function should never return R300_FALLBACK_TCL when using software tcl.
317 */
318
319 int r300EmitArrays(GLcontext * ctx)
320 {
321 r300ContextPtr rmesa = R300_CONTEXT(ctx);
322 r300ContextPtr r300 = rmesa;
323 TNLcontext *tnl = TNL_CONTEXT(ctx);
324 struct vertex_buffer *vb = &tnl->vb;
325 GLuint nr;
326 GLuint count = vb->Count;
327 GLuint i;
328 GLuint InputsRead = 0, OutputsWritten = 0;
329 int *inputs = NULL;
330 int vir_inputs[VERT_ATTRIB_MAX];
331 GLint tab[VERT_ATTRIB_MAX];
332 int swizzle[VERT_ATTRIB_MAX][4];
333
334 if (hw_tcl_on) {
335 struct r300_vertex_program *prog =
336 (struct r300_vertex_program *)
337 CURRENT_VERTEX_SHADER(ctx);
338 inputs = prog->inputs;
339 InputsRead = CURRENT_VERTEX_SHADER(ctx)->key.InputsRead;
340 OutputsWritten = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten;
341 } else {
342 DECLARE_RENDERINPUTS(inputs_bitset);
343 inputs = r300->state.sw_tcl_inputs;
344
345 RENDERINPUTS_COPY(inputs_bitset,
346 TNL_CONTEXT(ctx)->render_inputs_bitset);
347
348 assert(RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_POS));
349 InputsRead |= 1 << VERT_ATTRIB_POS;
350 OutputsWritten |= 1 << VERT_RESULT_HPOS;
351
352 assert(RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_NORMAL)
353 == 0);
354
355 assert(RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_COLOR0));
356 InputsRead |= 1 << VERT_ATTRIB_COLOR0;
357 OutputsWritten |= 1 << VERT_RESULT_COL0;
358
359 if (RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_COLOR1)) {
360 InputsRead |= 1 << VERT_ATTRIB_COLOR1;
361 OutputsWritten |= 1 << VERT_RESULT_COL1;
362 }
363
364 for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
365 if (RENDERINPUTS_TEST
366 (inputs_bitset, _TNL_ATTRIB_TEX(i))) {
367 InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i);
368 OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
369 }
370
371 for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++)
372 if (InputsRead & (1 << i))
373 inputs[i] = nr++;
374 else
375 inputs[i] = -1;
376
377 if (!
378 (r300->radeon.radeonScreen->
379 chip_flags & RADEON_CHIPSET_TCL)) {
380 /* Fixed, apply to vir0 only */
381 memcpy(vir_inputs, inputs,
382 VERT_ATTRIB_MAX * sizeof(int));
383 inputs = vir_inputs;
384
385 if (InputsRead & VERT_ATTRIB_POS)
386 inputs[VERT_ATTRIB_POS] = 0;
387
388 if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
389 inputs[VERT_ATTRIB_COLOR0] = 2;
390
391 if (InputsRead & (1 << VERT_ATTRIB_COLOR1))
392 inputs[VERT_ATTRIB_COLOR1] = 3;
393
394 for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++)
395 if (InputsRead & (1 << i))
396 inputs[i] = 6 + (i - VERT_ATTRIB_TEX0);
397 }
398
399 RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset,
400 inputs_bitset);
401 }
402
403 assert(InputsRead);
404 assert(OutputsWritten);
405
406 for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++)
407 if (InputsRead & (1 << i))
408 tab[nr++] = i;
409
410 if (nr > R300_MAX_AOS_ARRAYS)
411 return R300_FALLBACK_TCL;
412
413 for (i = 0; i < nr; i++) {
414 int ci;
415 int comp_size, fix, found = 0;
416
417 swizzle[i][0] = SWIZZLE_ZERO;
418 swizzle[i][1] = SWIZZLE_ZERO;
419 swizzle[i][2] = SWIZZLE_ZERO;
420 swizzle[i][3] = SWIZZLE_ONE;
421
422 for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++)
423 swizzle[i][ci] = ci;
424
425 if (r300IsGartMemory(rmesa, vb->AttribPtr[tab[i]]->data, 4)) {
426 if (vb->AttribPtr[tab[i]]->stride % 4)
427 return R300_FALLBACK_TCL;
428
429 rmesa->state.aos[i].address =
430 (void *)(vb->AttribPtr[tab[i]]->data);
431 rmesa->state.aos[i].start = 0;
432 rmesa->state.aos[i].aos_offset =
433 r300GartOffsetFromVirtual(rmesa,
434 vb->
435 AttribPtr[tab[i]]->data);
436 rmesa->state.aos[i].aos_stride =
437 vb->AttribPtr[tab[i]]->stride / 4;
438
439 rmesa->state.aos[i].aos_size =
440 vb->AttribPtr[tab[i]]->size;
441 } else {
442 r300EmitVec(ctx, &rmesa->state.aos[i],
443 vb->AttribPtr[tab[i]]->data,
444 vb->AttribPtr[tab[i]]->size,
445 vb->AttribPtr[tab[i]]->stride, count);
446 }
447
448 rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size;
449
450 comp_size = _mesa_sizeof_type(GL_FLOAT);
451
452 for (fix = 0; fix <= 4 - vb->AttribPtr[tab[i]]->size; fix++) {
453 if ((rmesa->state.aos[i].aos_offset -
454 comp_size * fix) % 4)
455 continue;
456
457 found = 1;
458 break;
459 }
460
461 if (found) {
462 if (fix > 0) {
463 WARN_ONCE("Feeling lucky?\n");
464 }
465
466 rmesa->state.aos[i].aos_offset -= comp_size * fix;
467
468 for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++)
469 swizzle[i][ci] += fix;
470 } else {
471 WARN_ONCE
472 ("Cannot handle offset %x with stride %d, comp %d\n",
473 rmesa->state.aos[i].aos_offset,
474 rmesa->state.aos[i].aos_stride,
475 vb->AttribPtr[tab[i]]->size);
476 return R300_FALLBACK_TCL;
477 }
478 }
479
480 /* setup INPUT_ROUTE */
481 R300_STATECHANGE(r300, vir[0]);
482 ((drm_r300_cmd_header_t *) r300->hw.vir[0].cmd)->packet0.count =
483 t_vir0(&r300->hw.vir[0].cmd[R300_VIR_CNTL_0], vb->AttribPtr,
484 inputs, tab, nr);
485
486 R300_STATECHANGE(r300, vir[1]);
487 ((drm_r300_cmd_header_t *) r300->hw.vir[1].cmd)->packet0.count =
488 t_vir1(&r300->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, nr);
489
490 /* Set up input_cntl */
491 /* I don't think this is needed for vertex buffers, but it doesn't hurt anything */
492 R300_STATECHANGE(r300, vic);
493 r300->hw.vic.cmd[R300_VIC_CNTL_0] = 0x5555; /* Hard coded value, no idea what it means */
494 r300->hw.vic.cmd[R300_VIC_CNTL_1] = t_vic(ctx, InputsRead);
495
496 /* Stage 3: VAP output */
497
498 R300_STATECHANGE(r300, vof);
499
500 r300->hw.vof.cmd[R300_VOF_CNTL_0] = 0;
501 r300->hw.vof.cmd[R300_VOF_CNTL_1] = 0;
502
503 if (OutputsWritten & (1 << VERT_RESULT_HPOS))
504 r300->hw.vof.cmd[R300_VOF_CNTL_0] |=
505 R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
506
507 if (OutputsWritten & (1 << VERT_RESULT_COL0))
508 r300->hw.vof.cmd[R300_VOF_CNTL_0] |=
509 R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT;
510
511 if (OutputsWritten & (1 << VERT_RESULT_COL1))
512 r300->hw.vof.cmd[R300_VOF_CNTL_0] |=
513 R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT;
514
515 #if 0
516 if (OutputsWritten & (1 << VERT_RESULT_BFC0))
517 r300->hw.vof.cmd[R300_VOF_CNTL_0] |=
518 R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT;
519
520 if (OutputsWritten & (1 << VERT_RESULT_BFC1))
521 r300->hw.vof.cmd[R300_VOF_CNTL_0] |=
522 R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT;
523
524 if (OutputsWritten & (1 << VERT_RESULT_FOGC)) ;
525 #endif
526
527 if (OutputsWritten & (1 << VERT_RESULT_PSIZ))
528 r300->hw.vof.cmd[R300_VOF_CNTL_0] |=
529 R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
530
531 for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
532 if (OutputsWritten & (1 << (VERT_RESULT_TEX0 + i)))
533 r300->hw.vof.cmd[R300_VOF_CNTL_1] |= (4 << (3 * i));
534
535 rmesa->state.aos_count = nr;
536
537 return R300_FALLBACK_NONE;
538 }
539
540 #ifdef USER_BUFFERS
541 void r300UseArrays(GLcontext * ctx)
542 {
543 r300ContextPtr rmesa = R300_CONTEXT(ctx);
544 int i;
545
546 if (rmesa->state.elt_dma.buf)
547 r300_mem_use(rmesa, rmesa->state.elt_dma.buf->id);
548
549 for (i = 0; i < rmesa->state.aos_count; i++) {
550 if (rmesa->state.aos[i].buf)
551 r300_mem_use(rmesa, rmesa->state.aos[i].buf->id);
552 }
553 }
554 #endif
555
556 void r300ReleaseArrays(GLcontext * ctx)
557 {
558 r300ContextPtr rmesa = R300_CONTEXT(ctx);
559 int i;
560
561 r300ReleaseDmaRegion(rmesa, &rmesa->state.elt_dma, __FUNCTION__);
562 for (i = 0; i < rmesa->state.aos_count; i++) {
563 r300ReleaseDmaRegion(rmesa, &rmesa->state.aos[i], __FUNCTION__);
564 }
565 }