r300: Removed the radeon_vertex_buffer structure.
[mesa.git] / src / mesa / drivers / dri / r300 / r300_emit.c
1 /*
2 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
3
4 The Weather Channel (TM) funded Tungsten Graphics to develop the
5 initial release of the Radeon 8500 driver under the XFree86 license.
6 This notice must be preserved.
7
8 Permission is hereby granted, free of charge, to any person obtaining
9 a copy of this software and associated documentation files (the
10 "Software"), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
15
16 The above copyright notice and this permission notice (including the
17 next paragraph) shall be included in all copies or substantial
18 portions of the Software.
19
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
24 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
28 **************************************************************************/
29
30 /**
31 * \file
32 *
33 * \author Keith Whitwell <keith@tungstengraphics.com>
34 */
35
36 #include "glheader.h"
37 #include "mtypes.h"
38 #include "colormac.h"
39 #include "imports.h"
40 #include "macros.h"
41 #include "image.h"
42
43 #include "swrast_setup/swrast_setup.h"
44 #include "math/m_translate.h"
45 #include "tnl/tnl.h"
46 #include "tnl/t_context.h"
47
48 #include "r300_context.h"
49 #include "radeon_ioctl.h"
50 #include "r300_state.h"
51 #include "r300_emit.h"
52 #include "r300_ioctl.h"
53
54 #ifdef USER_BUFFERS
55 #include "r300_mem.h"
56 #endif
57
58 #if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \
59 SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \
60 SWIZZLE_Z != R300_INPUT_ROUTE_SELECT_Z || \
61 SWIZZLE_W != R300_INPUT_ROUTE_SELECT_W || \
62 SWIZZLE_ZERO != R300_INPUT_ROUTE_SELECT_ZERO || \
63 SWIZZLE_ONE != R300_INPUT_ROUTE_SELECT_ONE
64 #error Cannot change these!
65 #endif
66
67 #define DEBUG_ALL DEBUG_VERTS
68
69 #if defined(USE_X86_ASM)
70 #define COPY_DWORDS( dst, src, nr ) \
71 do { \
72 int __tmp; \
73 __asm__ __volatile__( "rep ; movsl" \
74 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
75 : "0" (nr), \
76 "D" ((long)dst), \
77 "S" ((long)src) ); \
78 } while (0)
79 #else
80 #define COPY_DWORDS( dst, src, nr ) \
81 do { \
82 int j; \
83 for ( j = 0 ; j < nr ; j++ ) \
84 dst[j] = ((int *)src)[j]; \
85 dst += nr; \
86 } while (0)
87 #endif
88
89 static void r300EmitVec4(GLcontext * ctx,
90 struct r300_dma_region *rvb,
91 GLvoid * data, int stride, int count)
92 {
93 int i;
94 int *out = (int *)(rvb->address + rvb->start);
95
96 if (RADEON_DEBUG & DEBUG_VERTS)
97 fprintf(stderr, "%s count %d stride %d\n",
98 __FUNCTION__, count, stride);
99
100 if (stride == 4)
101 COPY_DWORDS(out, data, count);
102 else
103 for (i = 0; i < count; i++) {
104 out[0] = *(int *)data;
105 out++;
106 data += stride;
107 }
108 }
109
110 static void r300EmitVec8(GLcontext * ctx,
111 struct r300_dma_region *rvb,
112 GLvoid * data, int stride, int count)
113 {
114 int i;
115 int *out = (int *)(rvb->address + rvb->start);
116
117 if (RADEON_DEBUG & DEBUG_VERTS)
118 fprintf(stderr, "%s count %d stride %d\n",
119 __FUNCTION__, count, stride);
120
121 if (stride == 8)
122 COPY_DWORDS(out, data, count * 2);
123 else
124 for (i = 0; i < count; i++) {
125 out[0] = *(int *)data;
126 out[1] = *(int *)(data + 4);
127 out += 2;
128 data += stride;
129 }
130 }
131
132 static void r300EmitVec12(GLcontext * ctx,
133 struct r300_dma_region *rvb,
134 GLvoid * data, int stride, int count)
135 {
136 int i;
137 int *out = (int *)(rvb->address + rvb->start);
138
139 if (RADEON_DEBUG & DEBUG_VERTS)
140 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
141 __FUNCTION__, count, stride, (void *)out, (void *)data);
142
143 if (stride == 12)
144 COPY_DWORDS(out, data, count * 3);
145 else
146 for (i = 0; i < count; i++) {
147 out[0] = *(int *)data;
148 out[1] = *(int *)(data + 4);
149 out[2] = *(int *)(data + 8);
150 out += 3;
151 data += stride;
152 }
153 }
154
155 static void r300EmitVec16(GLcontext * ctx,
156 struct r300_dma_region *rvb,
157 GLvoid * data, int stride, int count)
158 {
159 int i;
160 int *out = (int *)(rvb->address + rvb->start);
161
162 if (RADEON_DEBUG & DEBUG_VERTS)
163 fprintf(stderr, "%s count %d stride %d\n",
164 __FUNCTION__, count, stride);
165
166 if (stride == 16)
167 COPY_DWORDS(out, data, count * 4);
168 else
169 for (i = 0; i < count; i++) {
170 out[0] = *(int *)data;
171 out[1] = *(int *)(data + 4);
172 out[2] = *(int *)(data + 8);
173 out[3] = *(int *)(data + 12);
174 out += 4;
175 data += stride;
176 }
177 }
178
179 static void r300EmitVec(GLcontext * ctx,
180 struct r300_dma_region *rvb,
181 GLvoid * data, int size, int stride, int count)
182 {
183 r300ContextPtr rmesa = R300_CONTEXT(ctx);
184
185 if (RADEON_DEBUG & DEBUG_VERTS)
186 fprintf(stderr, "%s count %d size %d stride %d\n",
187 __FUNCTION__, count, size, stride);
188
189 /* Gets triggered when playing with future_hw_tcl_on ... */
190 //assert(!rvb->buf);
191
192 if (stride == 0) {
193 r300AllocDmaRegion(rmesa, rvb, size * 4, 4);
194 count = 1;
195 rvb->aos_offset = GET_START(rvb);
196 rvb->aos_stride = 0;
197 } else {
198 r300AllocDmaRegion(rmesa, rvb, size * count * 4, 4); /* alignment? */
199 rvb->aos_offset = GET_START(rvb);
200 rvb->aos_stride = size;
201 }
202
203 /* Emit the data
204 */
205 switch (size) {
206 case 1:
207 r300EmitVec4(ctx, rvb, data, stride, count);
208 break;
209 case 2:
210 r300EmitVec8(ctx, rvb, data, stride, count);
211 break;
212 case 3:
213 r300EmitVec12(ctx, rvb, data, stride, count);
214 break;
215 case 4:
216 r300EmitVec16(ctx, rvb, data, stride, count);
217 break;
218 default:
219 assert(0);
220 _mesa_exit(-1);
221 break;
222 }
223
224 }
225
226 #define R300_VIR0_AOS_SIZE_SHIFT 0
227 #define R300_VIR0_AOS_INPUT_SHIFT 8
228 #define R300_VIR0_AOS_STOP_SHIFT 13
229 #define R300_VIR0_AOS_TYPE_SHIFT 14
230 #define R300_VIR0_HIGH_SHIFT 16
231
232 // Pack 4 elemets in a 16 bit (aos_size first 8, input next 5, 1 stop bit(Whild gues), aos_type last 2);
233 static inline GLuint t_vir_pack(GLvector4f ** dt, int *inputs, int i)
234 {
235 GLuint dw;
236 dw = (dt[i]->size - 1) << R300_VIR0_AOS_SIZE_SHIFT;
237 dw |= inputs[i] << R300_VIR0_AOS_INPUT_SHIFT;
238 //dw |= t_type(&dt[i]) << R300_VIR0_AOS_TYPE_SHIFT;
239 return dw;
240 }
241
242 static GLuint t_vir0(uint32_t * dst, GLvector4f ** dt, int *inputs,
243 GLint * tab, GLuint nr)
244 {
245 GLuint i, dw, dwInternel;
246
247 for (i = 0; i + 1 < nr; i += 2) {
248 dw = t_vir_pack(dt, inputs, tab[i]);
249 dwInternel = t_vir_pack(dt, inputs, tab[i + 1]);
250 dw |= dwInternel << R300_VIR0_HIGH_SHIFT;
251
252 if (i + 2 == nr) {
253 dw |=
254 (1 <<
255 (R300_VIR0_AOS_STOP_SHIFT + R300_VIR0_HIGH_SHIFT));
256 }
257 dst[i >> 1] = dw; // Is the same as i/2
258 }
259
260 if (nr & 1) {
261 dw = t_vir_pack(dt, inputs, tab[nr - 1]);
262 dw |= 1 << R300_VIR0_AOS_STOP_SHIFT;
263
264 dst[nr >> 1] = dw;
265 }
266
267 return (nr + 1) >> 1; // Is the same as (nr+1)/2
268 }
269
270 static GLuint t_swizzle(int swizzle[4])
271 {
272 return (swizzle[0] << R300_INPUT_ROUTE_X_SHIFT) |
273 (swizzle[1] << R300_INPUT_ROUTE_Y_SHIFT) |
274 (swizzle[2] << R300_INPUT_ROUTE_Z_SHIFT) |
275 (swizzle[3] << R300_INPUT_ROUTE_W_SHIFT);
276 }
277
278 static GLuint t_vir1(uint32_t * dst, int swizzle[][4], GLuint nr)
279 {
280 GLuint i;
281
282 for (i = 0; i + 1 < nr; i += 2) {
283 dst[i >> 1] = t_swizzle(swizzle[i]) | R300_INPUT_ROUTE_ENABLE;
284 dst[i >> 1] |=
285 (t_swizzle(swizzle[i + 1]) | R300_INPUT_ROUTE_ENABLE)
286 << 16;
287 }
288
289 if (nr & 1)
290 dst[nr >> 1] =
291 t_swizzle(swizzle[nr - 1]) | R300_INPUT_ROUTE_ENABLE;
292
293 return (nr + 1) >> 1;
294 }
295
296 static GLuint t_vic(GLcontext * ctx, GLuint InputsRead)
297 {
298 r300ContextPtr r300 = R300_CONTEXT(ctx);
299 GLuint i, vic_1 = 0;
300
301 if (InputsRead & (1 << VERT_ATTRIB_POS))
302 vic_1 |= R300_INPUT_CNTL_POS;
303
304 if (InputsRead & (1 << VERT_ATTRIB_NORMAL))
305 vic_1 |= R300_INPUT_CNTL_NORMAL;
306
307 if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
308 vic_1 |= R300_INPUT_CNTL_COLOR;
309
310 r300->state.texture.tc_count = 0;
311 for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
312 if (InputsRead & (1 << (VERT_ATTRIB_TEX0 + i))) {
313 r300->state.texture.tc_count++;
314 vic_1 |= R300_INPUT_CNTL_TC0 << i;
315 }
316
317 return vic_1;
318 }
319
320 /* Emit vertex data to GART memory
321 * Route inputs to the vertex processor
322 * This function should never return R300_FALLBACK_TCL when using software tcl.
323 */
324
325 int r300EmitArrays(GLcontext * ctx)
326 {
327 r300ContextPtr rmesa = R300_CONTEXT(ctx);
328 r300ContextPtr r300 = rmesa;
329 TNLcontext *tnl = TNL_CONTEXT(ctx);
330 struct vertex_buffer *vb = &tnl->vb;
331 GLuint nr;
332 GLuint count = vb->Count;
333 GLuint i;
334 GLuint InputsRead = 0, OutputsWritten = 0;
335 int *inputs = NULL;
336 int vir_inputs[VERT_ATTRIB_MAX];
337 GLint tab[VERT_ATTRIB_MAX];
338 int swizzle[VERT_ATTRIB_MAX][4];
339
340 if (hw_tcl_on) {
341 struct r300_vertex_program *prog =
342 (struct r300_vertex_program *)
343 CURRENT_VERTEX_SHADER(ctx);
344 inputs = prog->inputs;
345 InputsRead = CURRENT_VERTEX_SHADER(ctx)->key.InputsRead;
346 OutputsWritten = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten;
347 } else {
348 DECLARE_RENDERINPUTS(inputs_bitset);
349 inputs = r300->state.sw_tcl_inputs;
350
351 RENDERINPUTS_COPY(inputs_bitset,
352 TNL_CONTEXT(ctx)->render_inputs_bitset);
353
354 assert(RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_POS));
355 InputsRead |= 1 << VERT_ATTRIB_POS;
356 OutputsWritten |= 1 << VERT_RESULT_HPOS;
357
358 assert(RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_NORMAL)
359 == 0);
360
361 assert(RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_COLOR0));
362 InputsRead |= 1 << VERT_ATTRIB_COLOR0;
363 OutputsWritten |= 1 << VERT_RESULT_COL0;
364
365 if (RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_COLOR1)) {
366 InputsRead |= 1 << VERT_ATTRIB_COLOR1;
367 OutputsWritten |= 1 << VERT_RESULT_COL1;
368 }
369
370 for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
371 if (RENDERINPUTS_TEST
372 (inputs_bitset, _TNL_ATTRIB_TEX(i))) {
373 InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i);
374 OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
375 }
376
377 for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++)
378 if (InputsRead & (1 << i))
379 inputs[i] = nr++;
380 else
381 inputs[i] = -1;
382
383 if (!
384 (r300->radeon.radeonScreen->
385 chip_flags & RADEON_CHIPSET_TCL)) {
386 /* Fixed, apply to vir0 only */
387 memcpy(vir_inputs, inputs,
388 VERT_ATTRIB_MAX * sizeof(int));
389 inputs = vir_inputs;
390
391 if (InputsRead & VERT_ATTRIB_POS)
392 inputs[VERT_ATTRIB_POS] = 0;
393
394 if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
395 inputs[VERT_ATTRIB_COLOR0] = 2;
396
397 if (InputsRead & (1 << VERT_ATTRIB_COLOR1))
398 inputs[VERT_ATTRIB_COLOR1] = 3;
399
400 for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++)
401 if (InputsRead & (1 << i))
402 inputs[i] = 6 + (i - VERT_ATTRIB_TEX0);
403 }
404
405 RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset,
406 inputs_bitset);
407 }
408 assert(InputsRead);
409 assert(OutputsWritten);
410
411 for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++)
412 if (InputsRead & (1 << i))
413 tab[nr++] = i;
414
415 if (nr > R300_MAX_AOS_ARRAYS)
416 return R300_FALLBACK_TCL;
417
418 for (i = 0; i < nr; i++) {
419 int ci;
420 int comp_size, fix, found = 0;
421
422 swizzle[i][0] = SWIZZLE_ZERO;
423 swizzle[i][1] = SWIZZLE_ZERO;
424 swizzle[i][2] = SWIZZLE_ZERO;
425 swizzle[i][3] = SWIZZLE_ONE;
426
427 for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++)
428 swizzle[i][ci] = ci;
429
430 if (r300IsGartMemory(rmesa, vb->AttribPtr[tab[i]]->data,
431 /*(count-1)*stride */ 4)) {
432 if (vb->AttribPtr[tab[i]]->stride % 4)
433 return R300_FALLBACK_TCL;
434
435 rmesa->state.aos[i].address =
436 (void *)(vb->AttribPtr[tab[i]]->data);
437 rmesa->state.aos[i].start = 0;
438 rmesa->state.aos[i].aos_offset =
439 r300GartOffsetFromVirtual(rmesa,
440 vb->
441 AttribPtr[tab[i]]->data);
442 rmesa->state.aos[i].aos_stride =
443 vb->AttribPtr[tab[i]]->stride / 4;
444
445 rmesa->state.aos[i].aos_size =
446 vb->AttribPtr[tab[i]]->size;
447 } else {
448 r300EmitVec(ctx, &rmesa->state.aos[i],
449 vb->AttribPtr[tab[i]]->data,
450 vb->AttribPtr[tab[i]]->size,
451 vb->AttribPtr[tab[i]]->stride, count);
452 }
453
454 rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size;
455
456 comp_size = _mesa_sizeof_type(GL_FLOAT);
457
458 for (fix = 0; fix <= 4 - vb->AttribPtr[tab[i]]->size; fix++) {
459 if ((rmesa->state.aos[i].aos_offset -
460 comp_size * fix) % 4)
461 continue;
462
463 found = 1;
464 break;
465 }
466
467 if (found) {
468 if (fix > 0) {
469 WARN_ONCE("Feeling lucky?\n");
470 }
471
472 rmesa->state.aos[i].aos_offset -= comp_size * fix;
473
474 for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++)
475 swizzle[i][ci] += fix;
476 } else {
477 WARN_ONCE
478 ("Cannot handle offset %x with stride %d, comp %d\n",
479 rmesa->state.aos[i].aos_offset,
480 rmesa->state.aos[i].aos_stride,
481 vb->AttribPtr[tab[i]]->size);
482 return R300_FALLBACK_TCL;
483 }
484 }
485
486 /* setup INPUT_ROUTE */
487 R300_STATECHANGE(r300, vir[0]);
488 ((drm_r300_cmd_header_t *) r300->hw.vir[0].cmd)->packet0.count =
489 t_vir0(&r300->hw.vir[0].cmd[R300_VIR_CNTL_0], vb->AttribPtr,
490 inputs, tab, nr);
491
492 R300_STATECHANGE(r300, vir[1]);
493 ((drm_r300_cmd_header_t *) r300->hw.vir[1].cmd)->packet0.count =
494 t_vir1(&r300->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, nr);
495
496 /* Set up input_cntl */
497 /* I don't think this is needed for vertex buffers, but it doesn't hurt anything */
498 R300_STATECHANGE(r300, vic);
499 r300->hw.vic.cmd[R300_VIC_CNTL_0] = 0x5555; /* Hard coded value, no idea what it means */
500 r300->hw.vic.cmd[R300_VIC_CNTL_1] = t_vic(ctx, InputsRead);
501
502 /* Stage 3: VAP output */
503
504 R300_STATECHANGE(r300, vof);
505
506 r300->hw.vof.cmd[R300_VOF_CNTL_0] = 0;
507 r300->hw.vof.cmd[R300_VOF_CNTL_1] = 0;
508
509 if (OutputsWritten & (1 << VERT_RESULT_HPOS))
510 r300->hw.vof.cmd[R300_VOF_CNTL_0] |=
511 R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
512
513 if (OutputsWritten & (1 << VERT_RESULT_COL0))
514 r300->hw.vof.cmd[R300_VOF_CNTL_0] |=
515 R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT;
516
517 if (OutputsWritten & (1 << VERT_RESULT_COL1))
518 r300->hw.vof.cmd[R300_VOF_CNTL_0] |=
519 R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT;
520
521 /*if(OutputsWritten & (1 << VERT_RESULT_BFC0))
522 r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT;
523
524 if(OutputsWritten & (1 << VERT_RESULT_BFC1))
525 r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT; */
526 //if(OutputsWritten & (1 << VERT_RESULT_FOGC))
527
528 if (OutputsWritten & (1 << VERT_RESULT_PSIZ))
529 r300->hw.vof.cmd[R300_VOF_CNTL_0] |=
530 R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
531
532 for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
533 if (OutputsWritten & (1 << (VERT_RESULT_TEX0 + i)))
534 r300->hw.vof.cmd[R300_VOF_CNTL_1] |= (4 << (3 * i));
535
536 rmesa->state.aos_count = nr;
537
538 return R300_FALLBACK_NONE;
539 }
540
541 #ifdef USER_BUFFERS
542 void r300UseArrays(GLcontext * ctx)
543 {
544 r300ContextPtr rmesa = R300_CONTEXT(ctx);
545 int i;
546
547 if (rmesa->state.elt_dma.buf)
548 r300_mem_use(rmesa, rmesa->state.elt_dma.buf->id);
549
550 for (i = 0; i < rmesa->state.aos_count; i++) {
551 if (rmesa->state.aos[i].buf)
552 r300_mem_use(rmesa, rmesa->state.aos[i].buf->id);
553 }
554 }
555 #endif
556
557 void r300ReleaseArrays(GLcontext * ctx)
558 {
559 r300ContextPtr rmesa = R300_CONTEXT(ctx);
560 int i;
561
562 r300ReleaseDmaRegion(rmesa, &rmesa->state.elt_dma, __FUNCTION__);
563 for (i = 0; i < rmesa->state.aos_count; i++) {
564 r300ReleaseDmaRegion(rmesa, &rmesa->state.aos[i], __FUNCTION__);
565 }
566 }