draw: turn fse path into a middle end
[mesa.git] / src / gallium / auxiliary / draw / draw_pt_fetch_shade_emit.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Keith Whitwell <keith@tungstengraphics.com>
31 */
32
33
34 #include "pipe/p_util.h"
35 #include "draw/draw_context.h"
36 #include "draw/draw_private.h"
37 #include "draw/draw_vbuf.h"
38 #include "draw/draw_vertex.h"
39 #include "draw/draw_pt.h"
40 #include "draw/draw_vs.h"
41
42 #include "translate/translate.h"
43
44 struct fetch_shade_emit;
45
46 struct fse_shader {
47 struct translate_key key;
48
49 void (*run_linear)( const struct fetch_shade_emit *fse,
50 unsigned start,
51 unsigned count,
52 char *buffer );
53 };
54
55 /* Prototype fetch, shade, emit-hw-verts all in one go.
56 */
57 struct fetch_shade_emit {
58 struct draw_pt_front_end base;
59
60 struct draw_context *draw;
61
62 struct translate_key key;
63
64 /* Temporaries:
65 */
66 const float *constants;
67 unsigned pitch[PIPE_MAX_ATTRIBS];
68 const ubyte *src[PIPE_MAX_ATTRIBS];
69 unsigned prim;
70
71 /* Points to one of the three hardwired example shaders, below:
72 */
73 struct fse_shader *active;
74
75 /* Temporary: A list of hard-wired shaders. Of course the plan
76 * would be to generate these for a given (vertex-shader,
77 * translate-key) pair...
78 */
79 struct fse_shader shader[10];
80 int nr_shaders;
81 };
82
83
84
85 /* Not quite passthrough yet -- we're still running the 'shader' here,
86 * inlined into the vertex fetch function.
87 */
88 static void fetch_xyz_rgb_st( const struct fetch_shade_emit *fse,
89 unsigned start,
90 unsigned count,
91 char *buffer )
92 {
93 unsigned i;
94
95 const float *m = fse->constants;
96 const float m0 = m[0], m4 = m[4], m8 = m[8], m12 = m[12];
97 const float m1 = m[1], m5 = m[5], m9 = m[9], m13 = m[13];
98 const float m2 = m[2], m6 = m[6], m10 = m[10], m14 = m[14];
99 const float m3 = m[3], m7 = m[7], m11 = m[11], m15 = m[15];
100
101 const ubyte *xyz = fse->src[0] + start * fse->pitch[0];
102 const ubyte *st = fse->src[2] + start * fse->pitch[2];
103
104 float *out = (float *)buffer;
105
106
107 assert(fse->pitch[1] == 0);
108
109 /* loop over vertex attributes (vertex shader inputs)
110 */
111 for (i = 0; i < count; i++) {
112 {
113 const float *in = (const float *)xyz;
114 const float ix = in[0], iy = in[1], iz = in[2];
115
116 out[0] = m0 * ix + m4 * iy + m8 * iz + m12;
117 out[1] = m1 * ix + m5 * iy + m9 * iz + m13;
118 out[2] = m2 * ix + m6 * iy + m10 * iz + m14;
119 out[3] = m3 * ix + m7 * iy + m11 * iz + m15;
120 xyz += fse->pitch[0];
121 }
122
123 {
124 out[4] = 1.0f;
125 out[5] = 1.0f;
126 out[6] = 1.0f;
127 out[7] = 1.0f;
128 }
129
130 {
131 const float *in = (const float *)st; st += fse->pitch[2];
132 out[8] = in[0];
133 out[9] = in[1];
134 out[10] = 0.0f;
135 out[11] = 1.0f;
136 }
137
138 out += 12;
139 }
140 }
141
142
143
144 static void fetch_xyz_rgb( const struct fetch_shade_emit *fse,
145 unsigned start,
146 unsigned count,
147 char *buffer )
148 {
149 unsigned i;
150
151 const float *m = (const float *)fse->constants;
152 const float m0 = m[0], m4 = m[4], m8 = m[8], m12 = m[12];
153 const float m1 = m[1], m5 = m[5], m9 = m[9], m13 = m[13];
154 const float m2 = m[2], m6 = m[6], m10 = m[10], m14 = m[14];
155 const float m3 = m[3], m7 = m[7], m11 = m[11], m15 = m[15];
156
157 const ubyte *xyz = fse->src[0] + start * fse->pitch[0];
158 const ubyte *rgb = fse->src[1] + start * fse->pitch[1];
159
160 float *out = (float *)buffer;
161
162 // debug_printf("rgb %f %f %f\n", rgb[0], rgb[1], rgb[2]);
163
164
165 for (i = 0; i < count; i++) {
166 {
167 const float *in = (const float *)xyz;
168 const float ix = in[0], iy = in[1], iz = in[2];
169
170 out[0] = m0 * ix + m4 * iy + m8 * iz + m12;
171 out[1] = m1 * ix + m5 * iy + m9 * iz + m13;
172 out[2] = m2 * ix + m6 * iy + m10 * iz + m14;
173 out[3] = m3 * ix + m7 * iy + m11 * iz + m15;
174 xyz += fse->pitch[0];
175 }
176
177 {
178 const float *in = (const float *)rgb;
179 out[4] = in[0];
180 out[5] = in[1];
181 out[6] = in[2];
182 out[7] = 1.0f;
183 rgb += fse->pitch[1];
184 }
185
186 out += 8;
187 }
188 }
189
190
191
192
193 static void fetch_xyz_rgb_psiz( const struct fetch_shade_emit *fse,
194 unsigned start,
195 unsigned count,
196 char *buffer )
197 {
198 unsigned i;
199
200 const float *m = (const float *)fse->constants;
201 const float m0 = m[0], m4 = m[4], m8 = m[8], m12 = m[12];
202 const float m1 = m[1], m5 = m[5], m9 = m[9], m13 = m[13];
203 const float m2 = m[2], m6 = m[6], m10 = m[10], m14 = m[14];
204 const float m3 = m[3], m7 = m[7], m11 = m[11], m15 = m[15];
205
206 const ubyte *xyz = fse->src[0] + start * fse->pitch[0];
207 const float *rgb = (const float *)(fse->src[1] + start * fse->pitch[1]);
208 const float psiz = 1.0;
209
210 float *out = (float *)buffer;
211
212
213 assert(fse->pitch[1] == 0);
214
215 for (i = 0; i < count; i++) {
216 {
217 const float *in = (const float *)xyz;
218 const float ix = in[0], iy = in[1], iz = in[2];
219
220 out[0] = m0 * ix + m4 * iy + m8 * iz + m12;
221 out[1] = m1 * ix + m5 * iy + m9 * iz + m13;
222 out[2] = m2 * ix + m6 * iy + m10 * iz + m14;
223 out[3] = m3 * ix + m7 * iy + m11 * iz + m15;
224 xyz += fse->pitch[0];
225 }
226
227 {
228 out[4] = rgb[0];
229 out[5] = rgb[1];
230 out[6] = rgb[2];
231 out[7] = 1.0f;
232 }
233
234 {
235 out[8] = psiz;
236 }
237
238 out += 9;
239 }
240 }
241
242
243
244
245 static boolean set_prim( struct fetch_shade_emit *fse,
246 unsigned prim,
247 unsigned count )
248 {
249 struct draw_context *draw = fse->draw;
250
251 fse->prim = prim;
252
253 switch (prim) {
254 case PIPE_PRIM_LINE_LOOP:
255 if (count > 1024)
256 return FALSE;
257 draw->render->set_primitive( draw->render, PIPE_PRIM_LINE_STRIP );
258 break;
259
260 case PIPE_PRIM_TRIANGLE_FAN:
261 case PIPE_PRIM_POLYGON:
262 if (count > 1024)
263 return FALSE;
264 draw->render->set_primitive( draw->render, prim );
265 break;
266
267 case PIPE_PRIM_QUADS:
268 case PIPE_PRIM_QUAD_STRIP:
269 draw->render->set_primitive( draw->render, PIPE_PRIM_TRIANGLES );
270 break;
271
272 default:
273 draw->render->set_primitive( draw->render, prim );
274 break;
275 }
276
277 return TRUE;
278 }
279
280
281
282
283
284
285 static void fse_prepare( struct draw_pt_front_end *fe,
286 unsigned prim,
287 struct draw_pt_middle_end *unused,
288 unsigned opt )
289 {
290 struct fetch_shade_emit *fse = (struct fetch_shade_emit *)fe;
291 struct draw_context *draw = fse->draw;
292 unsigned num_vs_inputs = draw->vertex_shader->info.num_inputs;
293 unsigned num_vs_outputs = draw->vertex_shader->info.num_outputs;
294 const struct vertex_info *vinfo;
295 unsigned i;
296 boolean need_psize = 0;
297
298
299 if (draw->pt.user.elts) {
300 assert(0);
301 return ;
302 }
303
304 if (!set_prim(fse, prim, /*count*/1022 )) {
305 assert(0);
306 return ;
307 }
308
309 /* Must do this after set_primitive() above:
310 */
311 vinfo = draw->render->get_vertex_info(draw->render);
312
313
314
315 fse->key.nr_elements = MAX2(num_vs_outputs, /* outputs - translate to hw format */
316 num_vs_inputs); /* inputs - fetch from api format */
317
318 fse->key.output_stride = vinfo->size * 4;
319 memset(fse->key.element, 0,
320 fse->key.nr_elements * sizeof(fse->key.element[0]));
321
322 for (i = 0; i < num_vs_inputs; i++) {
323 const struct pipe_vertex_element *src = &draw->pt.vertex_element[i];
324 fse->key.element[i].input_format = src->src_format;
325
326 /* Consider ignoring these at this point, ie make generated
327 * programs independent of this state:
328 */
329 fse->key.element[i].input_buffer = 0; //src->vertex_buffer_index;
330 fse->key.element[i].input_offset = 0; //src->src_offset;
331 }
332
333
334 {
335 unsigned dst_offset = 0;
336
337 for (i = 0; i < vinfo->num_attribs; i++) {
338 unsigned emit_sz = 0;
339 unsigned output_format = PIPE_FORMAT_NONE;
340 unsigned vs_output = vinfo->src_index[i];
341
342 switch (vinfo->emit[i]) {
343 case EMIT_4F:
344 output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
345 emit_sz = 4 * sizeof(float);
346 break;
347 case EMIT_3F:
348 output_format = PIPE_FORMAT_R32G32B32_FLOAT;
349 emit_sz = 3 * sizeof(float);
350 break;
351 case EMIT_2F:
352 output_format = PIPE_FORMAT_R32G32_FLOAT;
353 emit_sz = 2 * sizeof(float);
354 break;
355 case EMIT_1F:
356 output_format = PIPE_FORMAT_R32_FLOAT;
357 emit_sz = 1 * sizeof(float);
358 break;
359 case EMIT_1F_PSIZE:
360 need_psize = 1;
361 output_format = PIPE_FORMAT_R32_FLOAT;
362 emit_sz = 1 * sizeof(float);
363 vs_output = num_vs_outputs + 1;
364
365 break;
366 default:
367 assert(0);
368 break;
369 }
370
371 /* The elements in the key correspond to vertex shader output
372 * numbers, not to positions in the hw vertex description --
373 * that's handled by the output_offset field.
374 */
375 fse->key.element[vs_output].output_format = output_format;
376 fse->key.element[vs_output].output_offset = dst_offset;
377
378 dst_offset += emit_sz;
379 assert(fse->key.output_stride >= dst_offset);
380 }
381 }
382
383 /* To make psize work, really need to tell the vertex shader to
384 * copy that value from input->output. For 'translate' this was
385 * implicit for all elements.
386 */
387 #if 0
388 if (need_psize) {
389 unsigned input = num_vs_inputs + 1;
390 const struct pipe_vertex_element *src = &draw->pt.vertex_element[i];
391 fse->key.element[i].input_format = PIPE_FORMAT_R32_FLOAT;
392 fse->key.element[i].input_buffer = 0; //nr_buffers + 1;
393 fse->key.element[i].input_offset = 0;
394
395 fse->key.nr_elements += 1;
396
397 }
398 #endif
399
400 fse->constants = draw->pt.user.constants;
401
402 /* Would normally look up a vertex shader and peruse its list of
403 * varients somehow. We omitted that step and put all the
404 * hardcoded "shaders" into an array. We're just making the
405 * assumption that this happens to be a matching shader... ie
406 * you're running isosurf, aren't you?
407 */
408 fse->active = NULL;
409 for (i = 0; i < fse->nr_shaders; i++) {
410 if (translate_key_compare( &fse->key, &fse->shader[i].key) == 0)
411 fse->active = &fse->shader[i];
412 }
413
414 if (!fse->active) {
415 assert(0);
416 return ;
417 }
418
419 /* Now set buffer pointers:
420 */
421 for (i = 0; i < num_vs_inputs; i++) {
422 unsigned buf = draw->pt.vertex_element[i].vertex_buffer_index;
423
424 fse->src[i] = ((const ubyte *) draw->pt.user.vbuffer[buf] +
425 draw->pt.vertex_buffer[buf].buffer_offset +
426 draw->pt.vertex_element[i].src_offset);
427
428 fse->pitch[i] = draw->pt.vertex_buffer[buf].pitch;
429
430 }
431
432
433 //return TRUE;
434 }
435
436
437
438
439
440
441 #define INDEX(i) (start + (i))
442 static void fse_render_linear( struct vbuf_render *render,
443 unsigned prim,
444 unsigned start,
445 unsigned length )
446 {
447 ushort *tmp = NULL;
448 unsigned i, j;
449
450 switch (prim) {
451 case PIPE_PRIM_LINE_LOOP:
452 tmp = MALLOC( sizeof(ushort) * (length + 1) );
453
454 for (i = 0; i < length; i++)
455 tmp[i] = INDEX(i);
456 tmp[length] = 0;
457
458 render->draw( render,
459 tmp,
460 length+1 );
461 break;
462
463
464 case PIPE_PRIM_QUAD_STRIP:
465 tmp = MALLOC( sizeof(ushort) * (length / 2 * 6) );
466
467 for (j = i = 0; i + 3 < length; i += 2, j += 6) {
468 tmp[j+0] = INDEX(i+0);
469 tmp[j+1] = INDEX(i+1);
470 tmp[j+2] = INDEX(i+3);
471
472 tmp[j+3] = INDEX(i+2);
473 tmp[j+4] = INDEX(i+0);
474 tmp[j+5] = INDEX(i+3);
475 }
476
477 if (j)
478 render->draw( render, tmp, j );
479 break;
480
481 case PIPE_PRIM_QUADS:
482 tmp = MALLOC( sizeof(int) * (length / 4 * 6) );
483
484 for (j = i = 0; i + 3 < length; i += 4, j += 6) {
485 tmp[j+0] = INDEX(i+0);
486 tmp[j+1] = INDEX(i+1);
487 tmp[j+2] = INDEX(i+3);
488
489 tmp[j+3] = INDEX(i+1);
490 tmp[j+4] = INDEX(i+2);
491 tmp[j+5] = INDEX(i+3);
492 }
493
494 if (j)
495 render->draw( render, tmp, j );
496 break;
497
498 default:
499 render->draw_arrays( render,
500 start,
501 length );
502 break;
503 }
504
505 if (tmp)
506 FREE(tmp);
507 }
508
509
510
511 static boolean do_draw( struct fetch_shade_emit *fse,
512 unsigned start, unsigned count )
513 {
514 struct draw_context *draw = fse->draw;
515
516 char *hw_verts =
517 draw->render->allocate_vertices( draw->render,
518 (ushort)fse->key.output_stride,
519 (ushort)count );
520
521 if (!hw_verts)
522 return FALSE;
523
524 /* Single routine to fetch vertices, run shader and emit HW verts.
525 * Clipping and viewport transformation are done on hardware.
526 */
527 fse->active->run_linear( fse,
528 start, count,
529 hw_verts );
530
531 /* Draw arrays path to avoid re-emitting index list again and
532 * again.
533 */
534 fse_render_linear( draw->render,
535 fse->prim,
536 0,
537 count );
538
539
540 draw->render->release_vertices( draw->render,
541 hw_verts,
542 fse->key.output_stride,
543 count );
544
545 return TRUE;
546 }
547
548
549 static void
550 fse_run(struct draw_pt_front_end *fe,
551 pt_elt_func elt_func,
552 const void *elt_ptr,
553 unsigned count)
554 {
555 struct fetch_shade_emit *fse = (struct fetch_shade_emit *)fe;
556 unsigned i = 0;
557 unsigned first, incr;
558 unsigned start = elt_func(elt_ptr, 0);
559
560 //debug_printf("%s prim %d start %d count %d\n", __FUNCTION__, prim, start, count);
561
562 draw_pt_split_prim(fse->prim, &first, &incr);
563
564 count -= (count - first) % incr;
565
566 while (i + first <= count) {
567 int nr = MIN2( count - i, 1024 );
568
569 /* snap to prim boundary
570 */
571 nr -= (nr - first) % incr;
572
573 if (!do_draw( fse, start + i, nr )) {
574 assert(0);
575 return ;
576 }
577
578 /* increment allowing for repeated vertices
579 */
580 i += nr - (first - incr);
581 }
582
583 //return TRUE;
584 }
585
586
587 static void fse_finish( struct draw_pt_front_end *frontend )
588 {
589 }
590
591
592 static void
593 fse_destroy( struct draw_pt_front_end *frontend )
594 {
595 FREE(frontend);
596 }
597
598 struct draw_pt_front_end *draw_pt_fetch_shade_emit( struct draw_context *draw )
599 {
600 struct fetch_shade_emit *fse = CALLOC_STRUCT(fetch_shade_emit);
601 if (!fse)
602 return NULL;
603
604 fse->base.prepare = fse_prepare;
605 fse->base.run = fse_run;
606 fse->base.finish = fse_finish;
607 fse->base.destroy = fse_destroy;
608 fse->draw = draw;
609
610 fse->shader[0].run_linear = fetch_xyz_rgb_st;
611 fse->shader[0].key.nr_elements = 3;
612 fse->shader[0].key.output_stride = 12 * sizeof(float);
613
614 fse->shader[0].key.element[0].input_format = PIPE_FORMAT_R32G32B32_FLOAT;
615 fse->shader[0].key.element[0].input_buffer = 0;
616 fse->shader[0].key.element[0].input_offset = 0;
617 fse->shader[0].key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
618 fse->shader[0].key.element[0].output_offset = 0;
619
620 fse->shader[0].key.element[1].input_format = PIPE_FORMAT_R32G32B32_FLOAT;
621 fse->shader[0].key.element[1].input_buffer = 0;
622 fse->shader[0].key.element[1].input_offset = 0;
623 fse->shader[0].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
624 fse->shader[0].key.element[1].output_offset = 16;
625
626 fse->shader[0].key.element[1].input_format = PIPE_FORMAT_R32G32_FLOAT;
627 fse->shader[0].key.element[1].input_buffer = 0;
628 fse->shader[0].key.element[1].input_offset = 0;
629 fse->shader[0].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
630 fse->shader[0].key.element[1].output_offset = 32;
631
632 fse->shader[1].run_linear = fetch_xyz_rgb;
633 fse->shader[1].key.nr_elements = 2;
634 fse->shader[1].key.output_stride = 8 * sizeof(float);
635
636 fse->shader[1].key.element[0].input_format = PIPE_FORMAT_R32G32B32_FLOAT;
637 fse->shader[1].key.element[0].input_buffer = 0;
638 fse->shader[1].key.element[0].input_offset = 0;
639 fse->shader[1].key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
640 fse->shader[1].key.element[0].output_offset = 0;
641
642 fse->shader[1].key.element[1].input_format = PIPE_FORMAT_R32G32B32_FLOAT;
643 fse->shader[1].key.element[1].input_buffer = 0;
644 fse->shader[1].key.element[1].input_offset = 0;
645 fse->shader[1].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
646 fse->shader[1].key.element[1].output_offset = 16;
647
648 fse->shader[2].run_linear = fetch_xyz_rgb_psiz;
649 fse->shader[2].key.nr_elements = 3;
650 fse->shader[2].key.output_stride = 9 * sizeof(float);
651
652 fse->shader[2].key.element[0].input_format = PIPE_FORMAT_R32G32B32_FLOAT;
653 fse->shader[2].key.element[0].input_buffer = 0;
654 fse->shader[2].key.element[0].input_offset = 0;
655 fse->shader[2].key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
656 fse->shader[2].key.element[0].output_offset = 0;
657
658 fse->shader[2].key.element[1].input_format = PIPE_FORMAT_R32G32B32_FLOAT;
659 fse->shader[2].key.element[1].input_buffer = 0;
660 fse->shader[2].key.element[1].input_offset = 0;
661 fse->shader[2].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
662 fse->shader[2].key.element[1].output_offset = 16;
663
664 /* psize is special
665 * -- effectively add it here as another input!?!
666 * -- who knows how to add it as a buffer?
667 */
668 fse->shader[2].key.element[2].input_format = PIPE_FORMAT_R32_FLOAT;
669 fse->shader[2].key.element[2].input_buffer = 0;
670 fse->shader[2].key.element[2].input_offset = 0;
671 fse->shader[2].key.element[2].output_format = PIPE_FORMAT_R32_FLOAT;
672 fse->shader[2].key.element[2].output_offset = 32;
673
674 fse->nr_shaders = 3;
675
676 return &fse->base;
677 }