draw: account for separate shader objects in geometry shader code
[mesa.git] / src / gallium / auxiliary / draw / draw_gs.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "draw_gs.h"
29
30 #include "draw_private.h"
31 #include "draw_context.h"
32
33 #include "tgsi/tgsi_parse.h"
34 #include "tgsi/tgsi_exec.h"
35
36 #include "pipe/p_shader_tokens.h"
37
38 #include "util/u_math.h"
39 #include "util/u_memory.h"
40 #include "util/u_prim.h"
41
42 /* fixme: move it from here */
43 #define MAX_PRIMITIVES 64
44
45 boolean
46 draw_gs_init( struct draw_context *draw )
47 {
48 draw->gs.tgsi.machine = tgsi_exec_machine_create();
49 if (!draw->gs.tgsi.machine)
50 return FALSE;
51
52 draw->gs.tgsi.machine->Primitives = align_malloc(
53 MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector), 16);
54 if (!draw->gs.tgsi.machine->Primitives)
55 return FALSE;
56 memset(draw->gs.tgsi.machine->Primitives, 0,
57 MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector));
58
59 return TRUE;
60 }
61
62 void draw_gs_destroy( struct draw_context *draw )
63 {
64 if (!draw->gs.tgsi.machine)
65 return;
66
67 align_free(draw->gs.tgsi.machine->Primitives);
68
69 tgsi_exec_machine_destroy(draw->gs.tgsi.machine);
70 }
71
72 struct draw_geometry_shader *
73 draw_create_geometry_shader(struct draw_context *draw,
74 const struct pipe_shader_state *state)
75 {
76 struct draw_geometry_shader *gs;
77 unsigned i;
78
79 gs = CALLOC_STRUCT(draw_geometry_shader);
80
81 if (!gs)
82 return NULL;
83
84 gs->draw = draw;
85 gs->state = *state;
86 gs->state.tokens = tgsi_dup_tokens(state->tokens);
87 if (!gs->state.tokens) {
88 FREE(gs);
89 return NULL;
90 }
91
92 tgsi_scan_shader(state->tokens, &gs->info);
93
94 /* setup the defaults */
95 gs->input_primitive = PIPE_PRIM_TRIANGLES;
96 gs->output_primitive = PIPE_PRIM_TRIANGLE_STRIP;
97 gs->max_output_vertices = 32;
98
99 for (i = 0; i < gs->info.num_properties; ++i) {
100 if (gs->info.properties[i].name ==
101 TGSI_PROPERTY_GS_INPUT_PRIM)
102 gs->input_primitive = gs->info.properties[i].data[0];
103 else if (gs->info.properties[i].name ==
104 TGSI_PROPERTY_GS_OUTPUT_PRIM)
105 gs->output_primitive = gs->info.properties[i].data[0];
106 else if (gs->info.properties[i].name ==
107 TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES)
108 gs->max_output_vertices = gs->info.properties[i].data[0];
109 }
110
111 gs->machine = draw->gs.tgsi.machine;
112
113 if (gs)
114 {
115 uint i;
116 for (i = 0; i < gs->info.num_outputs; i++) {
117 if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_POSITION &&
118 gs->info.output_semantic_index[i] == 0)
119 gs->position_output = i;
120 }
121 }
122
123 return gs;
124 }
125
126 void draw_bind_geometry_shader(struct draw_context *draw,
127 struct draw_geometry_shader *dgs)
128 {
129 draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE);
130
131 if (dgs) {
132 draw->gs.geometry_shader = dgs;
133 draw->gs.num_gs_outputs = dgs->info.num_outputs;
134 draw->gs.position_output = dgs->position_output;
135 draw_geometry_shader_prepare(dgs, draw);
136 }
137 else {
138 draw->gs.geometry_shader = NULL;
139 draw->gs.num_gs_outputs = 0;
140 }
141 }
142
143 void draw_delete_geometry_shader(struct draw_context *draw,
144 struct draw_geometry_shader *dgs)
145 {
146 FREE(dgs->primitive_lengths);
147 FREE((void*) dgs->state.tokens);
148 FREE(dgs);
149 }
150
151 static INLINE int
152 draw_gs_get_input_index(int semantic, int index,
153 const struct tgsi_shader_info *input_info)
154 {
155 int i;
156 const ubyte *input_semantic_names = input_info->output_semantic_name;
157 const ubyte *input_semantic_indices = input_info->output_semantic_index;
158 for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
159 if (input_semantic_names[i] == semantic &&
160 input_semantic_indices[i] == index)
161 return i;
162 }
163 debug_assert(0);
164 return -1;
165 }
166
167 /*#define DEBUG_OUTPUTS 1*/
168 static INLINE void
169 draw_geometry_fetch_outputs(struct draw_geometry_shader *shader,
170 unsigned num_primitives,
171 float (**p_output)[4])
172 {
173 struct tgsi_exec_machine *machine = shader->machine;
174 unsigned prim_idx, j, slot;
175 float (*output)[4];
176
177 output = *p_output;
178
179 /* Unswizzle all output results.
180 */
181
182 for (prim_idx = 0; prim_idx < num_primitives; ++prim_idx) {
183 unsigned num_verts_per_prim = machine->Primitives[prim_idx];
184 shader->primitive_lengths[prim_idx + shader->emitted_primitives] =
185 machine->Primitives[prim_idx];
186 shader->emitted_vertices += num_verts_per_prim;
187 for (j = 0; j < num_verts_per_prim; j++) {
188 int idx = (prim_idx * num_verts_per_prim + j) *
189 shader->info.num_outputs;
190 #ifdef DEBUG_OUTPUTS
191 debug_printf("%d) Output vert:\n", idx / shader->info.num_outputs);
192 #endif
193 for (slot = 0; slot < shader->info.num_outputs; slot++) {
194 output[slot][0] = machine->Outputs[idx + slot].xyzw[0].f[0];
195 output[slot][1] = machine->Outputs[idx + slot].xyzw[1].f[0];
196 output[slot][2] = machine->Outputs[idx + slot].xyzw[2].f[0];
197 output[slot][3] = machine->Outputs[idx + slot].xyzw[3].f[0];
198 #ifdef DEBUG_OUTPUTS
199 debug_printf("\t%d: %f %f %f %f\n", slot,
200 output[slot][0],
201 output[slot][1],
202 output[slot][2],
203 output[slot][3]);
204 #endif
205 debug_assert(!util_is_inf_or_nan(output[slot][0]));
206 }
207 output = (float (*)[4])((char *)output + shader->vertex_size);
208 }
209 }
210 *p_output = output;
211 shader->emitted_primitives += num_primitives;
212 }
213
214 /*#define DEBUG_INPUTS 1*/
215 static void draw_fetch_gs_input(struct draw_geometry_shader *shader,
216 unsigned *indices,
217 unsigned num_vertices,
218 unsigned prim_idx)
219 {
220 struct tgsi_exec_machine *machine = shader->machine;
221 unsigned slot, vs_slot, i;
222 unsigned input_vertex_stride = shader->input_vertex_stride;
223 const float (*input_ptr)[4];
224
225 input_ptr = shader->input;
226
227 for (i = 0; i < num_vertices; ++i) {
228 const float (*input)[4];
229 #if DEBUG_INPUTS
230 debug_printf("%d) vertex index = %d (prim idx = %d)\n",
231 i, indices[i], prim_idx);
232 #endif
233 input = (const float (*)[4])(
234 (const char *)input_ptr + (indices[i] * input_vertex_stride));
235 for (slot = 0, vs_slot = 0; slot < shader->info.num_inputs; ++slot) {
236 unsigned idx = i * TGSI_EXEC_MAX_INPUT_ATTRIBS + slot;
237 if (shader->info.input_semantic_name[slot] == TGSI_SEMANTIC_PRIMID) {
238 machine->Inputs[idx].xyzw[0].f[prim_idx] =
239 (float)shader->in_prim_idx;
240 machine->Inputs[idx].xyzw[1].f[prim_idx] =
241 (float)shader->in_prim_idx;
242 machine->Inputs[idx].xyzw[2].f[prim_idx] =
243 (float)shader->in_prim_idx;
244 machine->Inputs[idx].xyzw[3].f[prim_idx] =
245 (float)shader->in_prim_idx;
246 } else {
247 vs_slot = draw_gs_get_input_index(
248 shader->info.input_semantic_name[slot],
249 shader->info.input_semantic_index[slot],
250 shader->input_info);
251 #if DEBUG_INPUTS
252 debug_printf("\tSlot = %d, vs_slot = %d, idx = %d:\n",
253 slot, vs_slot, idx);
254 #endif
255 #if 1
256 assert(!util_is_inf_or_nan(input[vs_slot][0]));
257 assert(!util_is_inf_or_nan(input[vs_slot][1]));
258 assert(!util_is_inf_or_nan(input[vs_slot][2]));
259 assert(!util_is_inf_or_nan(input[vs_slot][3]));
260 #endif
261 machine->Inputs[idx].xyzw[0].f[prim_idx] = input[vs_slot][0];
262 machine->Inputs[idx].xyzw[1].f[prim_idx] = input[vs_slot][1];
263 machine->Inputs[idx].xyzw[2].f[prim_idx] = input[vs_slot][2];
264 machine->Inputs[idx].xyzw[3].f[prim_idx] = input[vs_slot][3];
265 #if DEBUG_INPUTS
266 debug_printf("\t\t%f %f %f %f\n",
267 machine->Inputs[idx].xyzw[0].f[prim_idx],
268 machine->Inputs[idx].xyzw[1].f[prim_idx],
269 machine->Inputs[idx].xyzw[2].f[prim_idx],
270 machine->Inputs[idx].xyzw[3].f[prim_idx]);
271 #endif
272 ++vs_slot;
273 }
274 }
275 }
276 }
277
278 static void gs_flush(struct draw_geometry_shader *shader,
279 unsigned input_primitives)
280 {
281 unsigned out_prim_count;
282 struct tgsi_exec_machine *machine = shader->machine;
283
284 debug_assert(input_primitives > 0 &&
285 input_primitives < 4);
286
287 tgsi_set_exec_mask(machine,
288 1,
289 input_primitives > 1,
290 input_primitives > 2,
291 input_primitives > 3);
292
293 /* run interpreter */
294 tgsi_exec_machine_run(machine);
295
296 out_prim_count =
297 machine->Temps[TGSI_EXEC_TEMP_PRIMITIVE_I].xyzw[TGSI_EXEC_TEMP_PRIMITIVE_C].u[0];
298
299 #if 0
300 debug_printf("PRIM emitted prims = %d (verts=%d), cur prim count = %d\n",
301 shader->emitted_primitives, shader->emitted_vertices,
302 out_prim_count);
303 #endif
304 draw_geometry_fetch_outputs(shader, out_prim_count,
305 &shader->tmp_output);
306 }
307
308 static void gs_point(struct draw_geometry_shader *shader,
309 int idx)
310 {
311 unsigned indices[1];
312
313 indices[0] = idx;
314
315 draw_fetch_gs_input(shader, indices, 1, 0);
316 ++shader->in_prim_idx;
317
318 gs_flush(shader, 1);
319 }
320
321 static void gs_line(struct draw_geometry_shader *shader,
322 int i0, int i1)
323 {
324 unsigned indices[2];
325
326 indices[0] = i0;
327 indices[1] = i1;
328
329 draw_fetch_gs_input(shader, indices, 2, 0);
330 ++shader->in_prim_idx;
331
332 gs_flush(shader, 1);
333 }
334
335 static void gs_line_adj(struct draw_geometry_shader *shader,
336 int i0, int i1, int i2, int i3)
337 {
338 unsigned indices[4];
339
340 indices[0] = i0;
341 indices[1] = i1;
342 indices[2] = i2;
343 indices[3] = i3;
344
345 draw_fetch_gs_input(shader, indices, 4, 0);
346 ++shader->in_prim_idx;
347
348 gs_flush(shader, 1);
349 }
350
351 static void gs_tri(struct draw_geometry_shader *shader,
352 int i0, int i1, int i2)
353 {
354 unsigned indices[3];
355
356 indices[0] = i0;
357 indices[1] = i1;
358 indices[2] = i2;
359
360 draw_fetch_gs_input(shader, indices, 3, 0);
361 ++shader->in_prim_idx;
362
363 gs_flush(shader, 1);
364 }
365
366 static void gs_tri_adj(struct draw_geometry_shader *shader,
367 int i0, int i1, int i2,
368 int i3, int i4, int i5)
369 {
370 unsigned indices[6];
371
372 indices[0] = i0;
373 indices[1] = i1;
374 indices[2] = i2;
375 indices[3] = i3;
376 indices[4] = i4;
377 indices[5] = i5;
378
379 draw_fetch_gs_input(shader, indices, 6, 0);
380 ++shader->in_prim_idx;
381
382 gs_flush(shader, 1);
383 }
384
385 #define FUNC gs_run
386 #define GET_ELT(idx) (idx)
387 #include "draw_gs_tmp.h"
388
389
390 #define FUNC gs_run_elts
391 #define LOCAL_VARS const ushort *elts = input_prims->elts;
392 #define GET_ELT(idx) (elts[idx])
393 #include "draw_gs_tmp.h"
394
395
396 /**
397 * Execute geometry shader using TGSI interpreter.
398 */
399 int draw_geometry_shader_run(struct draw_geometry_shader *shader,
400 const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
401 const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS],
402 const struct draw_vertex_info *input_verts,
403 const struct draw_prim_info *input_prim,
404 const struct tgsi_shader_info *input_info,
405 struct draw_vertex_info *output_verts,
406 struct draw_prim_info *output_prims )
407 {
408 const float (*input)[4] = (const float (*)[4])input_verts->verts->data;
409 unsigned input_stride = input_verts->vertex_size;
410 unsigned num_outputs = shader->info.num_outputs;
411 unsigned vertex_size = sizeof(struct vertex_header) + num_outputs * 4 * sizeof(float);
412 struct tgsi_exec_machine *machine = shader->machine;
413 unsigned num_input_verts = input_prim->linear ?
414 input_verts->count :
415 input_prim->count;
416 unsigned num_in_primitives =
417 MAX2(u_gs_prims_for_vertices(input_prim->prim, num_input_verts),
418 u_gs_prims_for_vertices(shader->input_primitive, num_input_verts));
419 unsigned max_out_prims = u_gs_prims_for_vertices(shader->output_primitive,
420 shader->max_output_vertices)
421 * num_in_primitives;
422
423 output_verts->vertex_size = vertex_size;
424 output_verts->stride = output_verts->vertex_size;
425 output_verts->verts =
426 (struct vertex_header *)MALLOC(output_verts->vertex_size *
427 num_in_primitives *
428 shader->max_output_vertices);
429
430
431 #if 0
432 debug_printf("%s count = %d (in prims # = %d)\n",
433 __FUNCTION__, num_input_verts, num_in_primitives);
434 debug_printf("\tlinear = %d, prim_info->count = %d\n",
435 input_prim->linear, input_prim->count);
436 debug_printf("\tprim pipe = %s, shader in = %s, shader out = %s, max out = %d\n",
437 u_prim_name(input_prim->prim),
438 u_prim_name(shader->input_primitive),
439 u_prim_name(shader->output_primitive),
440 shader->max_output_vertices);
441 #endif
442
443 shader->emitted_vertices = 0;
444 shader->emitted_primitives = 0;
445 shader->vertex_size = vertex_size;
446 shader->tmp_output = (float (*)[4])output_verts->verts->data;
447 shader->in_prim_idx = 0;
448 shader->input_vertex_stride = input_stride;
449 shader->input = input;
450 shader->input_info = input_info;
451 FREE(shader->primitive_lengths);
452 shader->primitive_lengths = MALLOC(max_out_prims * sizeof(unsigned));
453
454 tgsi_exec_set_constant_buffers(machine, PIPE_MAX_CONSTANT_BUFFERS,
455 constants, constants_size);
456
457 if (input_prim->linear)
458 gs_run(shader, input_prim, input_verts,
459 output_prims, output_verts);
460 else
461 gs_run_elts(shader, input_prim, input_verts,
462 output_prims, output_verts);
463
464 /* Update prim_info:
465 */
466 output_prims->linear = TRUE;
467 output_prims->elts = NULL;
468 output_prims->start = 0;
469 output_prims->count = shader->emitted_vertices;
470 output_prims->prim = shader->output_primitive;
471 output_prims->flags = 0x0;
472 output_prims->primitive_lengths = shader->primitive_lengths;
473 output_prims->primitive_count = shader->emitted_primitives;
474 output_verts->count = shader->emitted_vertices;
475
476 #if 0
477 debug_printf("GS finished, prims = %d, verts = %d\n",
478 output_prims->primitive_count,
479 output_verts->count);
480 #endif
481
482 return shader->emitted_vertices;
483 }
484
485 void draw_geometry_shader_prepare(struct draw_geometry_shader *shader,
486 struct draw_context *draw)
487 {
488 if (shader && shader->machine->Tokens != shader->state.tokens) {
489 tgsi_exec_machine_bind_shader(shader->machine,
490 shader->state.tokens,
491 draw->gs.tgsi.sampler);
492 }
493 }