pack output vertex attributes in sequential slots
[mesa.git] / src / mesa / pipe / softpipe / sp_draw_arrays.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /* Author:
29 * Brian Paul
30 * Keith Whitwell
31 */
32
33
34 /** TEMP */
35 #include "main/context.h"
36 #include "main/macros.h"
37
38 #include "pipe/p_defines.h"
39 #include "pipe/p_context.h"
40 #include "pipe/p_winsys.h"
41
42
43 #include "sp_context.h"
44 #include "sp_state.h"
45
46 #include "pipe/draw/draw_private.h"
47 #include "pipe/draw/draw_context.h"
48 #include "pipe/draw/draw_prim.h"
49
50 #include "pipe/tgsi/core/tgsi_exec.h"
51 #include "pipe/tgsi/core/tgsi_build.h"
52 #include "pipe/tgsi/core/tgsi_util.h"
53
54
55 #if defined __GNUC__
56 #define ALIGN16_DECL(TYPE, NAME, SIZE) TYPE NAME[SIZE] __attribute__(( aligned( 16 ) ))
57 #define ALIGN16_ASSIGN(P) P
58 #else
59 #define ALIGN16_DECL(TYPE, NAME, SIZE) TYPE NAME[SIZE + 1]
60 #define ALIGN16_ASSIGN(P) align16(P)
61 #endif
62
63
64
65 static INLINE unsigned
66 compute_clipmask(float cx, float cy, float cz, float cw)
67 {
68 unsigned mask;
69 #if defined(macintosh) || defined(__powerpc__)
70 /* on powerpc cliptest is 17% faster in this way. */
71 mask = (((cw < cx) << CLIP_RIGHT_SHIFT));
72 mask |= (((cw < -cx) << CLIP_LEFT_SHIFT));
73 mask |= (((cw < cy) << CLIP_TOP_SHIFT));
74 mask |= (((cw < -cy) << CLIP_BOTTOM_SHIFT));
75 mask |= (((cw < cz) << CLIP_FAR_SHIFT));
76 mask |= (((cw < -cz) << CLIP_NEAR_SHIFT));
77 #else /* !defined(macintosh)) */
78 mask = 0x0;
79 if (-cx + cw < 0) mask |= CLIP_RIGHT_BIT;
80 if ( cx + cw < 0) mask |= CLIP_LEFT_BIT;
81 if (-cy + cw < 0) mask |= CLIP_TOP_BIT;
82 if ( cy + cw < 0) mask |= CLIP_BOTTOM_BIT;
83 if (-cz + cw < 0) mask |= CLIP_FAR_BIT;
84 if ( cz + cw < 0) mask |= CLIP_NEAR_BIT;
85 #endif /* defined(macintosh) */
86 return mask;
87 }
88
89
90
91 /**
92 * Transform vertices with the current vertex program/shader
93 * Up to four vertices can be shaded at a time.
94 * \param vbuffer the input vertex data
95 * \param elts indexes of four input vertices
96 * \param count number of vertices to shade [1..4]
97 * \param vOut array of pointers to four output vertices
98 */
99 static void
100 run_vertex_program(struct draw_context *draw,
101 const void *vbuffer, unsigned elts[4], unsigned count,
102 struct vertex_header *vOut[])
103 {
104 struct softpipe_context *sp = softpipe_context(draw->pipe);
105 struct tgsi_exec_machine machine;
106 unsigned int j;
107
108 ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_ATTRIB_MAX);
109 ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_ATTRIB_MAX);
110 const float *scale = draw->viewport.scale;
111 const float *trans = draw->viewport.translate;
112
113 assert(count <= 4);
114
115 #ifdef DEBUG
116 memset( &machine, 0, sizeof( machine ) );
117 #endif
118
119 /* init machine state */
120 tgsi_exec_machine_init(
121 &machine,
122 sp->vs.tokens,
123 PIPE_MAX_SAMPLERS,
124 NULL /*samplers*/ );
125
126 /* Consts does not require 16 byte alignment. */
127 machine.Consts = sp->vs.constants->constant;
128
129 machine.Inputs = ALIGN16_ASSIGN(inputs);
130 machine.Outputs = ALIGN16_ASSIGN(outputs);
131
132 /* load machine inputs */
133 for (j = 0; j < count; j++) {
134 unsigned attr;
135 for (attr = 0; attr < 16; attr++) {
136 if (sp->vs.inputs_read & (1 << attr)) {
137 const float *p
138 = (const float *) ((const ubyte *) vbuffer
139 + draw->vertex_buffer[attr].buffer_offset
140 + draw->vertex_element[attr].src_offset
141 + elts[j] * draw->vertex_buffer[attr].pitch);
142
143 machine.Inputs[attr].xyzw[0].f[j] = p[0]; /*X*/
144 machine.Inputs[attr].xyzw[1].f[j] = p[1]; /*Y*/
145 machine.Inputs[attr].xyzw[2].f[j] = p[2]; /*Z*/
146 machine.Inputs[attr].xyzw[3].f[j] = 1.0; /*W*/
147 #if 0
148 if (attr == 0) {
149 printf("Input vertex %d: %f %f %f\n",
150 j, p[0], p[1], p[2]);
151 }
152 #endif
153 }
154 }
155 }
156
157 #if 0
158 printf("Consts:\n");
159 for (i = 0; i < 4; i++) {
160 printf(" %d: %f %f %f %f\n", i,
161 machine.Consts[i][0],
162 machine.Consts[i][1],
163 machine.Consts[i][2],
164 machine.Consts[i][3]);
165 }
166 #endif
167
168 /* run shader */
169 tgsi_exec_machine_run( &machine );
170
171 #if 0
172 printf("VS result: %f %f %f %f\n",
173 outputs[0].xyzw[0].f[0],
174 outputs[0].xyzw[1].f[0],
175 outputs[0].xyzw[2].f[0],
176 outputs[0].xyzw[3].f[0]);
177 #endif
178
179 /* store machine results */
180 assert(sp->vs.outputs_written & (1 << VERT_RESULT_HPOS));
181 for (j = 0; j < count; j++) {
182 unsigned attr, slot;
183 float x, y, z, w;
184
185 /* Handle attr[0] (position) specially: */
186 x = vOut[j]->clip[0] = outputs[0].xyzw[0].f[j];
187 y = vOut[j]->clip[1] = outputs[0].xyzw[1].f[j];
188 z = vOut[j]->clip[2] = outputs[0].xyzw[2].f[j];
189 w = vOut[j]->clip[3] = outputs[0].xyzw[3].f[j];
190
191 vOut[j]->clipmask = compute_clipmask(x, y, z, w);
192 vOut[j]->edgeflag = 1;
193
194 /* divide by w */
195 w = 1.0 / w;
196 x *= w;
197 y *= w;
198 z *= w;
199
200 /* Viewport mapping */
201 vOut[j]->data[0][0] = x * scale[0] + trans[0];
202 vOut[j]->data[0][1] = y * scale[1] + trans[1];
203 vOut[j]->data[0][2] = z * scale[2] + trans[2];
204 vOut[j]->data[0][3] = w;
205 #if 0
206 printf("wincoord: %f %f %f\n",
207 vOut[j]->data[0][0],
208 vOut[j]->data[0][1],
209 vOut[j]->data[0][2]);
210 #endif
211
212 /* remaining attributes: */
213 /* pack into sequential post-transform attrib slots */
214 slot = 1;
215 for (attr = 1; attr < VERT_RESULT_MAX; attr++) {
216 if (sp->vs.outputs_written & (1 << attr)) {
217 assert(slot < draw->nr_attrs - 2);
218 vOut[j]->data[slot][0] = outputs[attr].xyzw[0].f[j];
219 vOut[j]->data[slot][1] = outputs[attr].xyzw[1].f[j];
220 vOut[j]->data[slot][2] = outputs[attr].xyzw[2].f[j];
221 vOut[j]->data[slot][3] = outputs[attr].xyzw[3].f[j];
222 slot++;
223 }
224 }
225 }
226
227 #if 0
228 memcpy(
229 quad->outputs.color,
230 &machine.Outputs[1].xyzw[0].f[0],
231 sizeof( quad->outputs.color ) );
232 #endif
233 }
234
235
236 /**
237 * Stand-in for actual vertex program execution
238 * XXX this will probably live in a new file, like "sp_vs.c"
239 * \param draw the drawing context
240 * \param vbuffer the mapped vertex buffer pointer
241 * \param elem which element of the vertex buffer to use as input
242 * \param vOut the output vertex
243 */
244 #if 0
245 static void
246 run_vertex_program(struct draw_context *draw,
247 const void *vbuffer, unsigned elem,
248 struct vertex_header *vOut)
249 {
250 const float *vIn, *cIn;
251 const float *scale = draw->viewport.scale;
252 const float *trans = draw->viewport.translate;
253 const void *mapped = vbuffer;
254
255 /* XXX temporary hack: */
256 GET_CURRENT_CONTEXT(ctx);
257 const float *m = ctx->_ModelProjectMatrix.m;
258
259 vIn = (const float *) ((const ubyte *) mapped
260 + draw->vertex_buffer[0].buffer_offset
261 + draw->vertex_element[0].src_offset
262 + elem * draw->vertex_buffer[0].pitch);
263
264 cIn = (const float *) ((const ubyte *) mapped
265 + draw->vertex_buffer[3].buffer_offset
266 + draw->vertex_element[3].src_offset
267 + elem * draw->vertex_buffer[3].pitch);
268
269 {
270 float x = vIn[0];
271 float y = vIn[1];
272 float z = vIn[2];
273 float w = 1.0;
274
275 vOut->clipmask = 0x0;
276 vOut->edgeflag = 0;
277 /* MVP */
278 vOut->clip[0] = m[0] * x + m[4] * y + m[ 8] * z + m[12] * w;
279 vOut->clip[1] = m[1] * x + m[5] * y + m[ 9] * z + m[13] * w;
280 vOut->clip[2] = m[2] * x + m[6] * y + m[10] * z + m[14] * w;
281 vOut->clip[3] = m[3] * x + m[7] * y + m[11] * z + m[15] * w;
282
283 /* divide by w */
284 x = vOut->clip[0] / vOut->clip[3];
285 y = vOut->clip[1] / vOut->clip[3];
286 z = vOut->clip[2] / vOut->clip[3];
287 w = 1.0 / vOut->clip[3];
288
289 /* Viewport */
290 vOut->data[0][0] = scale[0] * x + trans[0];
291 vOut->data[0][1] = scale[1] * y + trans[1];
292 vOut->data[0][2] = scale[2] * z + trans[2];
293 vOut->data[0][3] = w;
294
295 /* color */
296 vOut->data[1][0] = cIn[0];
297 vOut->data[1][1] = cIn[1];
298 vOut->data[1][2] = cIn[2];
299 vOut->data[1][3] = 1.0;
300 }
301 }
302 #endif
303
304
305 /**
306 * Called by the draw module when the vertx cache needs to be flushed.
307 * This involves running the vertex shader.
308 */
309 static void vs_flush( struct draw_context *draw )
310 {
311 unsigned i, j;
312
313 /* We're not really running a vertex shader yet, so flushing the vs
314 * queue is just a matter of building the vertices and returning.
315 */
316 /* Actually, I'm cheating even more and pre-building them still
317 * with the mesa/vf module. So it's very easy...
318 */
319 #if 0
320 for (i = 0; i < draw->vs.queue_nr; i++) {
321 #else
322 for (i = 0; i < draw->vs.queue_nr; i+=4) {
323 #endif
324 /* Would do the following steps here:
325 *
326 * 1) Loop over vertex element descriptors, fetch data from each
327 * to build the pre-tnl vertex. This might require a new struct
328 * to represent the pre-tnl vertex.
329 *
330 * 2) Bundle groups of upto 4 pre-tnl vertices together and pass
331 * to vertex shader.
332 *
333 * 3) Do any necessary unswizzling, make sure vertex headers are
334 * correctly populated, store resulting post-transformed
335 * vertices in vcache.
336 *
337 * In this version, just do the last step:
338 */
339 #if 0
340 const unsigned elt = draw->vs.queue[i].elt;
341 struct vertex_header *dest = draw->vs.queue[i].dest;
342
343 run_vertex_program(draw, draw->mapped_vbuffer, elt, dest);
344 #else
345 struct vertex_header *dests[4];
346 unsigned elts[4];
347 int n;
348
349 for (j = 0; j < 4; j++) {
350 elts[j] = draw->vs.queue[i + j].elt;
351 dests[j] = draw->vs.queue[i + j].dest;
352 }
353
354 n = MIN2(4, draw->vs.queue_nr - i);
355 assert(n > 0);
356 assert(n <= 4);
357
358 run_vertex_program(draw, draw->mapped_vbuffer, elts, n, dests);
359 #endif
360 }
361 draw->vs.queue_nr = 0;
362 }
363
364
365
366 void
367 softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
368 unsigned start, unsigned count)
369 {
370 struct softpipe_context *sp = softpipe_context(pipe);
371 struct draw_context *draw = sp->draw;
372 struct pipe_buffer_handle *buf;
373
374 softpipe_map_surfaces(sp);
375
376 /*
377 * Map vertex buffers
378 */
379 buf = sp->vertex_buffer[0].buffer;
380 draw->mapped_vbuffer
381 = pipe->winsys->buffer_map(pipe->winsys, buf, PIPE_BUFFER_FLAG_READ);
382
383
384 /* tell drawing pipeline we're beginning drawing */
385 draw->pipeline.first->begin( draw->pipeline.first );
386
387 draw->vs_flush = vs_flush;
388 draw->pipe = pipe; /* XXX pass pipe to draw_create() */
389
390 draw_invalidate_vcache( draw );
391
392 draw_set_element_buffer(draw, 0, NULL); /* no index/element buffer */
393 draw_set_prim( draw, mode );
394
395 /* XXX draw_prim_info() and TRIM here */
396 draw_prim(draw, start, count);
397
398 /* draw any left-over buffered prims */
399 draw_flush(draw);
400
401 /* tell drawing pipeline we're done drawing */
402 draw->pipeline.first->end( draw->pipeline.first );
403
404 /*
405 * unmap vertex buffer
406 */
407 pipe->winsys->buffer_unmap(pipe->winsys, buf);
408
409 softpipe_unmap_surfaces(sp);
410 }
411
412
413
414 #define EMIT_ATTR( VF_ATTR, STYLE, SIZE ) \
415 do { \
416 if (draw->nr_attrs >= 2) \
417 draw->vf_attr_to_slot[VF_ATTR] = draw->nr_attrs - 2; \
418 draw->attrs[draw->nr_attrs].attrib = VF_ATTR; \
419 draw->attrs[draw->nr_attrs].format = STYLE; \
420 draw->nr_attrs++; \
421 draw->vertex_size += SIZE; \
422 } while (0)
423
424
425 /**
426 * XXX very similar to same func in draw_vb.c (which will go away)
427 */
428 void
429 draw_set_vertex_attributes2( struct draw_context *draw,
430 const unsigned *slot_to_vf_attr,
431 unsigned nr_attrs )
432 {
433 unsigned i;
434
435 memset(draw->vf_attr_to_slot, 0, sizeof(draw->vf_attr_to_slot));
436 draw->nr_attrs = 0;
437 draw->vertex_size = 0;
438
439 /*
440 * First three attribs are always the same: header, clip pos, winpos
441 */
442 EMIT_ATTR(VF_ATTRIB_VERTEX_HEADER, EMIT_1F, 1);
443 EMIT_ATTR(VF_ATTRIB_CLIP_POS, EMIT_4F, 4);
444
445 assert(slot_to_vf_attr[0] == VF_ATTRIB_POS);
446 EMIT_ATTR(slot_to_vf_attr[0], EMIT_4F_VIEWPORT, 4);
447
448 /*
449 * Remaining attribs (color, texcoords, etc)
450 */
451 for (i = 1; i < nr_attrs; i++)
452 EMIT_ATTR(slot_to_vf_attr[i], EMIT_4F, 4);
453
454 draw->vertex_size *= 4; /* floats to bytes */
455 }
456
457