1 /**************************************************************************
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Keith Whitwell <keith@tungstengraphics.com>
34 #include "pipe/p_util.h"
35 #include "draw/draw_context.h"
36 #include "draw/draw_private.h"
37 #include "draw/draw_vbuf.h"
38 #include "draw/draw_vertex.h"
39 #include "draw/draw_pt.h"
40 #include "draw/draw_vs.h"
42 #include "translate/translate.h"
44 struct fetch_shade_emit
;
47 struct translate_key key
;
49 void (*run_linear
)( const struct fetch_shade_emit
*fse
,
55 /* Prototype fetch, shade, emit-hw-verts all in one go.
57 struct fetch_shade_emit
{
58 struct draw_pt_front_end base
;
60 struct draw_context
*draw
;
62 struct translate_key key
;
66 const float *constants
;
67 unsigned pitch
[PIPE_MAX_ATTRIBS
];
68 const ubyte
*src
[PIPE_MAX_ATTRIBS
];
71 /* Points to one of the three hardwired example shaders, below:
73 struct fse_shader
*active
;
75 /* Temporary: A list of hard-wired shaders. Of course the plan
76 * would be to generate these for a given (vertex-shader,
77 * translate-key) pair...
79 struct fse_shader shader
[10];
85 /* Not quite passthrough yet -- we're still running the 'shader' here,
86 * inlined into the vertex fetch function.
88 static void fetch_xyz_rgb_st( const struct fetch_shade_emit
*fse
,
95 const float *m
= fse
->constants
;
96 const float m0
= m
[0], m4
= m
[4], m8
= m
[8], m12
= m
[12];
97 const float m1
= m
[1], m5
= m
[5], m9
= m
[9], m13
= m
[13];
98 const float m2
= m
[2], m6
= m
[6], m10
= m
[10], m14
= m
[14];
99 const float m3
= m
[3], m7
= m
[7], m11
= m
[11], m15
= m
[15];
101 const ubyte
*xyz
= fse
->src
[0] + start
* fse
->pitch
[0];
102 const ubyte
*st
= fse
->src
[2] + start
* fse
->pitch
[2];
104 float *out
= (float *)buffer
;
107 assert(fse
->pitch
[1] == 0);
109 /* loop over vertex attributes (vertex shader inputs)
111 for (i
= 0; i
< count
; i
++) {
113 const float *in
= (const float *)xyz
;
114 const float ix
= in
[0], iy
= in
[1], iz
= in
[2];
116 out
[0] = m0
* ix
+ m4
* iy
+ m8
* iz
+ m12
;
117 out
[1] = m1
* ix
+ m5
* iy
+ m9
* iz
+ m13
;
118 out
[2] = m2
* ix
+ m6
* iy
+ m10
* iz
+ m14
;
119 out
[3] = m3
* ix
+ m7
* iy
+ m11
* iz
+ m15
;
120 xyz
+= fse
->pitch
[0];
131 const float *in
= (const float *)st
; st
+= fse
->pitch
[2];
144 static void fetch_xyz_rgb( const struct fetch_shade_emit
*fse
,
151 const float *m
= (const float *)fse
->constants
;
152 const float m0
= m
[0], m4
= m
[4], m8
= m
[8], m12
= m
[12];
153 const float m1
= m
[1], m5
= m
[5], m9
= m
[9], m13
= m
[13];
154 const float m2
= m
[2], m6
= m
[6], m10
= m
[10], m14
= m
[14];
155 const float m3
= m
[3], m7
= m
[7], m11
= m
[11], m15
= m
[15];
157 const ubyte
*xyz
= fse
->src
[0] + start
* fse
->pitch
[0];
158 const ubyte
*rgb
= fse
->src
[1] + start
* fse
->pitch
[1];
160 float *out
= (float *)buffer
;
162 // debug_printf("rgb %f %f %f\n", rgb[0], rgb[1], rgb[2]);
165 for (i
= 0; i
< count
; i
++) {
167 const float *in
= (const float *)xyz
;
168 const float ix
= in
[0], iy
= in
[1], iz
= in
[2];
170 out
[0] = m0
* ix
+ m4
* iy
+ m8
* iz
+ m12
;
171 out
[1] = m1
* ix
+ m5
* iy
+ m9
* iz
+ m13
;
172 out
[2] = m2
* ix
+ m6
* iy
+ m10
* iz
+ m14
;
173 out
[3] = m3
* ix
+ m7
* iy
+ m11
* iz
+ m15
;
174 xyz
+= fse
->pitch
[0];
178 const float *in
= (const float *)rgb
;
183 rgb
+= fse
->pitch
[1];
193 static void fetch_xyz_rgb_psiz( const struct fetch_shade_emit
*fse
,
200 const float *m
= (const float *)fse
->constants
;
201 const float m0
= m
[0], m4
= m
[4], m8
= m
[8], m12
= m
[12];
202 const float m1
= m
[1], m5
= m
[5], m9
= m
[9], m13
= m
[13];
203 const float m2
= m
[2], m6
= m
[6], m10
= m
[10], m14
= m
[14];
204 const float m3
= m
[3], m7
= m
[7], m11
= m
[11], m15
= m
[15];
206 const ubyte
*xyz
= fse
->src
[0] + start
* fse
->pitch
[0];
207 const float *rgb
= (const float *)(fse
->src
[1] + start
* fse
->pitch
[1]);
208 const float psiz
= 1.0;
210 float *out
= (float *)buffer
;
213 assert(fse
->pitch
[1] == 0);
215 for (i
= 0; i
< count
; i
++) {
217 const float *in
= (const float *)xyz
;
218 const float ix
= in
[0], iy
= in
[1], iz
= in
[2];
220 out
[0] = m0
* ix
+ m4
* iy
+ m8
* iz
+ m12
;
221 out
[1] = m1
* ix
+ m5
* iy
+ m9
* iz
+ m13
;
222 out
[2] = m2
* ix
+ m6
* iy
+ m10
* iz
+ m14
;
223 out
[3] = m3
* ix
+ m7
* iy
+ m11
* iz
+ m15
;
224 xyz
+= fse
->pitch
[0];
245 static boolean
set_prim( struct fetch_shade_emit
*fse
,
249 struct draw_context
*draw
= fse
->draw
;
254 case PIPE_PRIM_LINE_LOOP
:
257 draw
->render
->set_primitive( draw
->render
, PIPE_PRIM_LINE_STRIP
);
260 case PIPE_PRIM_TRIANGLE_FAN
:
261 case PIPE_PRIM_POLYGON
:
264 draw
->render
->set_primitive( draw
->render
, prim
);
267 case PIPE_PRIM_QUADS
:
268 case PIPE_PRIM_QUAD_STRIP
:
269 draw
->render
->set_primitive( draw
->render
, PIPE_PRIM_TRIANGLES
);
273 draw
->render
->set_primitive( draw
->render
, prim
);
285 static void fse_prepare( struct draw_pt_front_end
*fe
,
287 struct draw_pt_middle_end
*unused
,
290 struct fetch_shade_emit
*fse
= (struct fetch_shade_emit
*)fe
;
291 struct draw_context
*draw
= fse
->draw
;
292 unsigned num_vs_inputs
= draw
->vertex_shader
->info
.num_inputs
;
293 unsigned num_vs_outputs
= draw
->vertex_shader
->info
.num_outputs
;
294 const struct vertex_info
*vinfo
;
296 boolean need_psize
= 0;
299 if (draw
->pt
.user
.elts
) {
304 if (!set_prim(fse
, prim
, /*count*/1022 )) {
309 /* Must do this after set_primitive() above:
311 vinfo
= draw
->render
->get_vertex_info(draw
->render
);
315 fse
->key
.nr_elements
= MAX2(num_vs_outputs
, /* outputs - translate to hw format */
316 num_vs_inputs
); /* inputs - fetch from api format */
318 fse
->key
.output_stride
= vinfo
->size
* 4;
319 memset(fse
->key
.element
, 0,
320 fse
->key
.nr_elements
* sizeof(fse
->key
.element
[0]));
322 for (i
= 0; i
< num_vs_inputs
; i
++) {
323 const struct pipe_vertex_element
*src
= &draw
->pt
.vertex_element
[i
];
324 fse
->key
.element
[i
].input_format
= src
->src_format
;
326 /* Consider ignoring these at this point, ie make generated
327 * programs independent of this state:
329 fse
->key
.element
[i
].input_buffer
= 0; //src->vertex_buffer_index;
330 fse
->key
.element
[i
].input_offset
= 0; //src->src_offset;
335 unsigned dst_offset
= 0;
337 for (i
= 0; i
< vinfo
->num_attribs
; i
++) {
338 unsigned emit_sz
= 0;
339 unsigned output_format
= PIPE_FORMAT_NONE
;
340 unsigned vs_output
= vinfo
->src_index
[i
];
342 switch (vinfo
->emit
[i
]) {
344 output_format
= PIPE_FORMAT_R32G32B32A32_FLOAT
;
345 emit_sz
= 4 * sizeof(float);
348 output_format
= PIPE_FORMAT_R32G32B32_FLOAT
;
349 emit_sz
= 3 * sizeof(float);
352 output_format
= PIPE_FORMAT_R32G32_FLOAT
;
353 emit_sz
= 2 * sizeof(float);
356 output_format
= PIPE_FORMAT_R32_FLOAT
;
357 emit_sz
= 1 * sizeof(float);
361 output_format
= PIPE_FORMAT_R32_FLOAT
;
362 emit_sz
= 1 * sizeof(float);
363 vs_output
= num_vs_outputs
+ 1;
371 /* The elements in the key correspond to vertex shader output
372 * numbers, not to positions in the hw vertex description --
373 * that's handled by the output_offset field.
375 fse
->key
.element
[vs_output
].output_format
= output_format
;
376 fse
->key
.element
[vs_output
].output_offset
= dst_offset
;
378 dst_offset
+= emit_sz
;
379 assert(fse
->key
.output_stride
>= dst_offset
);
383 /* To make psize work, really need to tell the vertex shader to
384 * copy that value from input->output. For 'translate' this was
385 * implicit for all elements.
389 unsigned input
= num_vs_inputs
+ 1;
390 const struct pipe_vertex_element
*src
= &draw
->pt
.vertex_element
[i
];
391 fse
->key
.element
[i
].input_format
= PIPE_FORMAT_R32_FLOAT
;
392 fse
->key
.element
[i
].input_buffer
= 0; //nr_buffers + 1;
393 fse
->key
.element
[i
].input_offset
= 0;
395 fse
->key
.nr_elements
+= 1;
400 fse
->constants
= draw
->pt
.user
.constants
;
402 /* Would normally look up a vertex shader and peruse its list of
403 * varients somehow. We omitted that step and put all the
404 * hardcoded "shaders" into an array. We're just making the
405 * assumption that this happens to be a matching shader... ie
406 * you're running isosurf, aren't you?
409 for (i
= 0; i
< fse
->nr_shaders
; i
++) {
410 if (translate_key_compare( &fse
->key
, &fse
->shader
[i
].key
) == 0)
411 fse
->active
= &fse
->shader
[i
];
419 /* Now set buffer pointers:
421 for (i
= 0; i
< num_vs_inputs
; i
++) {
422 unsigned buf
= draw
->pt
.vertex_element
[i
].vertex_buffer_index
;
424 fse
->src
[i
] = ((const ubyte
*) draw
->pt
.user
.vbuffer
[buf
] +
425 draw
->pt
.vertex_buffer
[buf
].buffer_offset
+
426 draw
->pt
.vertex_element
[i
].src_offset
);
428 fse
->pitch
[i
] = draw
->pt
.vertex_buffer
[buf
].pitch
;
441 #define INDEX(i) (start + (i))
442 static void fse_render_linear( struct vbuf_render
*render
,
451 case PIPE_PRIM_LINE_LOOP
:
452 tmp
= MALLOC( sizeof(ushort
) * (length
+ 1) );
454 for (i
= 0; i
< length
; i
++)
458 render
->draw( render
,
464 case PIPE_PRIM_QUAD_STRIP
:
465 tmp
= MALLOC( sizeof(ushort
) * (length
/ 2 * 6) );
467 for (j
= i
= 0; i
+ 3 < length
; i
+= 2, j
+= 6) {
468 tmp
[j
+0] = INDEX(i
+0);
469 tmp
[j
+1] = INDEX(i
+1);
470 tmp
[j
+2] = INDEX(i
+3);
472 tmp
[j
+3] = INDEX(i
+2);
473 tmp
[j
+4] = INDEX(i
+0);
474 tmp
[j
+5] = INDEX(i
+3);
478 render
->draw( render
, tmp
, j
);
481 case PIPE_PRIM_QUADS
:
482 tmp
= MALLOC( sizeof(int) * (length
/ 4 * 6) );
484 for (j
= i
= 0; i
+ 3 < length
; i
+= 4, j
+= 6) {
485 tmp
[j
+0] = INDEX(i
+0);
486 tmp
[j
+1] = INDEX(i
+1);
487 tmp
[j
+2] = INDEX(i
+3);
489 tmp
[j
+3] = INDEX(i
+1);
490 tmp
[j
+4] = INDEX(i
+2);
491 tmp
[j
+5] = INDEX(i
+3);
495 render
->draw( render
, tmp
, j
);
499 render
->draw_arrays( render
,
511 static boolean
do_draw( struct fetch_shade_emit
*fse
,
512 unsigned start
, unsigned count
)
514 struct draw_context
*draw
= fse
->draw
;
517 draw
->render
->allocate_vertices( draw
->render
,
518 (ushort
)fse
->key
.output_stride
,
524 /* Single routine to fetch vertices, run shader and emit HW verts.
525 * Clipping and viewport transformation are done on hardware.
527 fse
->active
->run_linear( fse
,
531 /* Draw arrays path to avoid re-emitting index list again and
534 fse_render_linear( draw
->render
,
540 draw
->render
->release_vertices( draw
->render
,
542 fse
->key
.output_stride
,
550 fse_run(struct draw_pt_front_end
*fe
,
551 pt_elt_func elt_func
,
555 struct fetch_shade_emit
*fse
= (struct fetch_shade_emit
*)fe
;
557 unsigned first
, incr
;
558 unsigned start
= elt_func(elt_ptr
, 0);
560 //debug_printf("%s prim %d start %d count %d\n", __FUNCTION__, prim, start, count);
562 draw_pt_split_prim(fse
->prim
, &first
, &incr
);
564 count
-= (count
- first
) % incr
;
566 while (i
+ first
<= count
) {
567 int nr
= MIN2( count
- i
, 1024 );
569 /* snap to prim boundary
571 nr
-= (nr
- first
) % incr
;
573 if (!do_draw( fse
, start
+ i
, nr
)) {
578 /* increment allowing for repeated vertices
580 i
+= nr
- (first
- incr
);
587 static void fse_finish( struct draw_pt_front_end
*frontend
)
593 fse_destroy( struct draw_pt_front_end
*frontend
)
598 struct draw_pt_front_end
*draw_pt_fetch_shade_emit( struct draw_context
*draw
)
600 struct fetch_shade_emit
*fse
= CALLOC_STRUCT(fetch_shade_emit
);
604 fse
->base
.prepare
= fse_prepare
;
605 fse
->base
.run
= fse_run
;
606 fse
->base
.finish
= fse_finish
;
607 fse
->base
.destroy
= fse_destroy
;
610 fse
->shader
[0].run_linear
= fetch_xyz_rgb_st
;
611 fse
->shader
[0].key
.nr_elements
= 3;
612 fse
->shader
[0].key
.output_stride
= 12 * sizeof(float);
614 fse
->shader
[0].key
.element
[0].input_format
= PIPE_FORMAT_R32G32B32_FLOAT
;
615 fse
->shader
[0].key
.element
[0].input_buffer
= 0;
616 fse
->shader
[0].key
.element
[0].input_offset
= 0;
617 fse
->shader
[0].key
.element
[0].output_format
= PIPE_FORMAT_R32G32B32A32_FLOAT
;
618 fse
->shader
[0].key
.element
[0].output_offset
= 0;
620 fse
->shader
[0].key
.element
[1].input_format
= PIPE_FORMAT_R32G32B32_FLOAT
;
621 fse
->shader
[0].key
.element
[1].input_buffer
= 0;
622 fse
->shader
[0].key
.element
[1].input_offset
= 0;
623 fse
->shader
[0].key
.element
[1].output_format
= PIPE_FORMAT_R32G32B32A32_FLOAT
;
624 fse
->shader
[0].key
.element
[1].output_offset
= 16;
626 fse
->shader
[0].key
.element
[1].input_format
= PIPE_FORMAT_R32G32_FLOAT
;
627 fse
->shader
[0].key
.element
[1].input_buffer
= 0;
628 fse
->shader
[0].key
.element
[1].input_offset
= 0;
629 fse
->shader
[0].key
.element
[1].output_format
= PIPE_FORMAT_R32G32B32A32_FLOAT
;
630 fse
->shader
[0].key
.element
[1].output_offset
= 32;
632 fse
->shader
[1].run_linear
= fetch_xyz_rgb
;
633 fse
->shader
[1].key
.nr_elements
= 2;
634 fse
->shader
[1].key
.output_stride
= 8 * sizeof(float);
636 fse
->shader
[1].key
.element
[0].input_format
= PIPE_FORMAT_R32G32B32_FLOAT
;
637 fse
->shader
[1].key
.element
[0].input_buffer
= 0;
638 fse
->shader
[1].key
.element
[0].input_offset
= 0;
639 fse
->shader
[1].key
.element
[0].output_format
= PIPE_FORMAT_R32G32B32A32_FLOAT
;
640 fse
->shader
[1].key
.element
[0].output_offset
= 0;
642 fse
->shader
[1].key
.element
[1].input_format
= PIPE_FORMAT_R32G32B32_FLOAT
;
643 fse
->shader
[1].key
.element
[1].input_buffer
= 0;
644 fse
->shader
[1].key
.element
[1].input_offset
= 0;
645 fse
->shader
[1].key
.element
[1].output_format
= PIPE_FORMAT_R32G32B32A32_FLOAT
;
646 fse
->shader
[1].key
.element
[1].output_offset
= 16;
648 fse
->shader
[2].run_linear
= fetch_xyz_rgb_psiz
;
649 fse
->shader
[2].key
.nr_elements
= 3;
650 fse
->shader
[2].key
.output_stride
= 9 * sizeof(float);
652 fse
->shader
[2].key
.element
[0].input_format
= PIPE_FORMAT_R32G32B32_FLOAT
;
653 fse
->shader
[2].key
.element
[0].input_buffer
= 0;
654 fse
->shader
[2].key
.element
[0].input_offset
= 0;
655 fse
->shader
[2].key
.element
[0].output_format
= PIPE_FORMAT_R32G32B32A32_FLOAT
;
656 fse
->shader
[2].key
.element
[0].output_offset
= 0;
658 fse
->shader
[2].key
.element
[1].input_format
= PIPE_FORMAT_R32G32B32_FLOAT
;
659 fse
->shader
[2].key
.element
[1].input_buffer
= 0;
660 fse
->shader
[2].key
.element
[1].input_offset
= 0;
661 fse
->shader
[2].key
.element
[1].output_format
= PIPE_FORMAT_R32G32B32A32_FLOAT
;
662 fse
->shader
[2].key
.element
[1].output_offset
= 16;
665 * -- effectively add it here as another input!?!
666 * -- who knows how to add it as a buffer?
668 fse
->shader
[2].key
.element
[2].input_format
= PIPE_FORMAT_R32_FLOAT
;
669 fse
->shader
[2].key
.element
[2].input_buffer
= 0;
670 fse
->shader
[2].key
.element
[2].input_offset
= 0;
671 fse
->shader
[2].key
.element
[2].output_format
= PIPE_FORMAT_R32_FLOAT
;
672 fse
->shader
[2].key
.element
[2].output_offset
= 32;