Use write posting in the kickoff function too.
[mesa.git] / src / mesa / state_tracker / st_program.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 * Brian Paul
31 */
32
33
34 #include "main/imports.h"
35 #include "main/mtypes.h"
36
37 #include "pipe/p_context.h"
38 #include "pipe/p_defines.h"
39 #include "pipe/draw/draw_context.h"
40 #include "pipe/tgsi/exec/tgsi_core.h"
41
42 #include "st_context.h"
43 #include "st_cache.h"
44 #include "st_atom.h"
45 #include "st_program.h"
46 #include "st_mesa_to_tgsi.h"
47
48
49 #define TGSI_DEBUG 0
50
51
52 /**
53 * Translate a Mesa vertex shader into a TGSI shader.
54 * \param outputMapping to map vertex program output registers to TGSI
55 * output slots
56 * \param tokensOut destination for TGSI tokens
57 * \return pointer to cached pipe_shader object.
58 */
59 const struct cso_vertex_shader *
60 st_translate_vertex_program(struct st_context *st,
61 struct st_vertex_program *stvp,
62 const GLuint outputMapping[],
63 struct tgsi_token *tokensOut,
64 GLuint maxTokens)
65 {
66 GLuint defaultOutputMapping[VERT_RESULT_MAX];
67 struct pipe_shader_state vs;
68 const struct cso_vertex_shader *cso;
69 GLuint attr, i;
70
71 memset(&vs, 0, sizeof(vs));
72
73 /*
74 * Determine number of inputs, the mappings between VERT_ATTRIB_x
75 * and TGSI generic input indexes, plus input attrib semantic info.
76 */
77 for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
78 if (stvp->Base.Base.InputsRead & (1 << attr)) {
79 const GLuint slot = vs.num_inputs;
80
81 vs.num_inputs++;
82
83 stvp->input_to_index[attr] = slot;
84 stvp->index_to_input[slot] = attr;
85
86 switch (attr) {
87 case VERT_ATTRIB_POS:
88 vs.input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
89 vs.input_semantic_index[slot] = 0;
90 break;
91 case VERT_ATTRIB_WEIGHT:
92 /* fall-through */
93 case VERT_ATTRIB_NORMAL:
94 /* just label as a generic */
95 vs.input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
96 vs.input_semantic_index[slot] = 0;
97 break;
98 case VERT_ATTRIB_COLOR0:
99 vs.input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
100 vs.input_semantic_index[slot] = 0;
101 break;
102 case VERT_ATTRIB_COLOR1:
103 vs.input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
104 vs.input_semantic_index[slot] = 1;
105 break;
106 case VERT_ATTRIB_FOG:
107 vs.input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
108 vs.input_semantic_index[slot] = 0;
109 break;
110 case VERT_ATTRIB_TEX0:
111 case VERT_ATTRIB_TEX1:
112 case VERT_ATTRIB_TEX2:
113 case VERT_ATTRIB_TEX3:
114 case VERT_ATTRIB_TEX4:
115 case VERT_ATTRIB_TEX5:
116 case VERT_ATTRIB_TEX6:
117 case VERT_ATTRIB_TEX7:
118 vs.input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
119 vs.input_semantic_index[slot] = attr - VERT_ATTRIB_TEX0;
120 break;
121 case VERT_ATTRIB_GENERIC0:
122 case VERT_ATTRIB_GENERIC1:
123 case VERT_ATTRIB_GENERIC2:
124 case VERT_ATTRIB_GENERIC3:
125 case VERT_ATTRIB_GENERIC4:
126 case VERT_ATTRIB_GENERIC5:
127 case VERT_ATTRIB_GENERIC6:
128 case VERT_ATTRIB_GENERIC7:
129 assert(attr < VERT_ATTRIB_MAX);
130 vs.input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
131 vs.input_semantic_index[slot] = attr - VERT_ATTRIB_GENERIC0;
132 break;
133 default:
134 assert(0);
135 }
136 }
137 }
138
139 /* initialize output semantics to defaults */
140 for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
141 vs.output_semantic_name[i] = TGSI_SEMANTIC_GENERIC;
142 vs.output_semantic_index[i] = 0;
143 }
144
145 /*
146 * Determine number of outputs, the (default) output register
147 * mapping and the semantic information for each output.
148 */
149 for (attr = 0; attr < VERT_RESULT_MAX; attr++) {
150 if (stvp->Base.Base.OutputsWritten & (1 << attr)) {
151 GLuint slot;
152
153 if (outputMapping) {
154 slot = outputMapping[attr];
155 assert(slot != ~0);
156 }
157 else {
158 slot = vs.num_outputs;
159 vs.num_outputs++;
160 defaultOutputMapping[attr] = slot;
161 }
162
163 /*
164 printf("Output %u -> slot %u\n", attr, slot);
165 */
166
167 switch (attr) {
168 case VERT_RESULT_HPOS:
169 vs.output_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
170 vs.output_semantic_index[slot] = 0;
171 break;
172 case VERT_RESULT_COL0:
173 vs.output_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
174 vs.output_semantic_index[slot] = 0;
175 break;
176 case VERT_RESULT_COL1:
177 vs.output_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
178 vs.output_semantic_index[slot] = 1;
179 break;
180 case VERT_RESULT_BFC0:
181 vs.output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR;
182 vs.output_semantic_index[slot] = 0;
183 break;
184 case VERT_RESULT_BFC1:
185 vs.output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR;
186 vs.output_semantic_index[slot] = 1;
187 break;
188 case VERT_RESULT_FOGC:
189 vs.output_semantic_name[slot] = TGSI_SEMANTIC_FOG;
190 vs.output_semantic_index[slot] = 0;
191 break;
192 case VERT_RESULT_PSIZ:
193 vs.output_semantic_name[slot] = TGSI_SEMANTIC_PSIZE;
194 vs.output_semantic_index[slot] = 0;
195 break;
196 case VERT_RESULT_EDGE:
197 assert(0);
198 break;
199 case VERT_RESULT_TEX0:
200 case VERT_RESULT_TEX1:
201 case VERT_RESULT_TEX2:
202 case VERT_RESULT_TEX3:
203 case VERT_RESULT_TEX4:
204 case VERT_RESULT_TEX5:
205 case VERT_RESULT_TEX6:
206 case VERT_RESULT_TEX7:
207 vs.output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
208 vs.output_semantic_index[slot] = attr - VERT_RESULT_TEX0;
209 break;
210 case VERT_RESULT_VAR0:
211 /* fall-through */
212 default:
213 assert(attr - VERT_RESULT_VAR0 < MAX_VARYING);
214 vs.output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
215 vs.output_semantic_index[slot] = attr - VERT_RESULT_VAR0;
216 }
217 }
218 }
219
220
221 if (outputMapping) {
222 /* find max output slot referenced to compute vs.num_outputs */
223 GLuint maxSlot = 0;
224 for (attr = 0; attr < VERT_RESULT_MAX; attr++) {
225 if (outputMapping[attr] != ~0 && outputMapping[attr] > maxSlot)
226 maxSlot = outputMapping[attr];
227 }
228 vs.num_outputs = maxSlot + 1;
229 }
230 else {
231 outputMapping = defaultOutputMapping;
232 }
233
234 /* XXX: fix static allocation of tokens:
235 */
236 tgsi_translate_mesa_program( TGSI_PROCESSOR_VERTEX,
237 &stvp->Base.Base,
238 /* inputs */
239 vs.num_inputs,
240 stvp->input_to_index,
241 vs.input_semantic_name,
242 vs.input_semantic_index,
243 NULL,
244 /* outputs */
245 vs.num_outputs,
246 outputMapping,
247 vs.output_semantic_name,
248 vs.output_semantic_index,
249 /* tokenized result */
250 tokensOut, maxTokens);
251
252 vs.tokens = tokensOut;
253
254 cso = st_cached_vs_state(st, &vs);
255 stvp->vs = cso;
256
257 if (TGSI_DEBUG)
258 tgsi_dump( tokensOut, 0 );
259
260 return cso;
261 }
262
263
264
265 /**
266 * Translate a Mesa fragment shader into a TGSI shader.
267 * \param inputMapping to map fragment program input registers to TGSI
268 * input slots
269 * \param tokensOut destination for TGSI tokens
270 * \return pointer to cached pipe_shader object.
271 */
272 const struct cso_fragment_shader *
273 st_translate_fragment_program(struct st_context *st,
274 struct st_fragment_program *stfp,
275 const GLuint inputMapping[],
276 struct tgsi_token *tokensOut,
277 GLuint maxTokens)
278 {
279 GLuint outputMapping[FRAG_RESULT_MAX];
280 GLuint defaultInputMapping[FRAG_ATTRIB_MAX];
281 struct pipe_shader_state fs;
282 const struct cso_fragment_shader *cso;
283 GLuint interpMode[16]; /* XXX size? */
284 GLuint attr;
285 GLbitfield inputsRead = stfp->Base.Base.InputsRead;
286
287 /* For software rendering, we always need the fragment input position
288 * in order to calculate interpolated values.
289 * For i915, we always want to emit the semantic info for position.
290 */
291 inputsRead |= FRAG_BIT_WPOS;
292
293 memset(&fs, 0, sizeof(fs));
294
295 /*
296 * Convert Mesa program inputs to TGSI input register semantics.
297 */
298 for (attr = 0; attr < FRAG_ATTRIB_MAX; attr++) {
299 if (inputsRead & (1 << attr)) {
300 const GLuint slot = fs.num_inputs;
301
302 fs.num_inputs++;
303
304 defaultInputMapping[attr] = slot;
305
306 switch (attr) {
307 case FRAG_ATTRIB_WPOS:
308 fs.input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
309 fs.input_semantic_index[slot] = 0;
310 interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
311 break;
312 case FRAG_ATTRIB_COL0:
313 fs.input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
314 fs.input_semantic_index[slot] = 0;
315 interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
316 break;
317 case FRAG_ATTRIB_COL1:
318 fs.input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
319 fs.input_semantic_index[slot] = 1;
320 interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
321 break;
322 case FRAG_ATTRIB_FOGC:
323 fs.input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
324 fs.input_semantic_index[slot] = 0;
325 interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
326 break;
327 case FRAG_ATTRIB_TEX0:
328 case FRAG_ATTRIB_TEX1:
329 case FRAG_ATTRIB_TEX2:
330 case FRAG_ATTRIB_TEX3:
331 case FRAG_ATTRIB_TEX4:
332 case FRAG_ATTRIB_TEX5:
333 case FRAG_ATTRIB_TEX6:
334 case FRAG_ATTRIB_TEX7:
335 fs.input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
336 fs.input_semantic_index[slot] = attr - FRAG_ATTRIB_TEX0;
337 interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
338 break;
339 case FRAG_ATTRIB_VAR0:
340 /* fall-through */
341 default:
342 fs.input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
343 fs.input_semantic_index[slot] = attr - FRAG_ATTRIB_VAR0;
344 interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
345 }
346 }
347 }
348
349 /*
350 * Semantics and mapping for outputs
351 */
352 {
353 uint numColors = 0;
354 GLbitfield outputsWritten = stfp->Base.Base.OutputsWritten;
355
356 /* if z is written, emit that first */
357 if (outputsWritten & (1 << FRAG_RESULT_DEPR)) {
358 fs.output_semantic_name[fs.num_outputs] = TGSI_SEMANTIC_POSITION;
359 fs.output_semantic_index[fs.num_outputs] = 0;
360 outputMapping[FRAG_RESULT_DEPR] = fs.num_outputs;
361 fs.num_outputs++;
362 outputsWritten &= ~(1 << FRAG_RESULT_DEPR);
363 }
364
365 /* handle remaning outputs (color) */
366 for (attr = 0; attr < FRAG_RESULT_MAX; attr++) {
367 if (outputsWritten & (1 << attr)) {
368 switch (attr) {
369 case FRAG_RESULT_DEPR:
370 /* handled above */
371 assert(0);
372 break;
373 case FRAG_RESULT_COLR:
374 fs.output_semantic_name[fs.num_outputs] = TGSI_SEMANTIC_COLOR;
375 fs.output_semantic_index[fs.num_outputs] = numColors;
376 outputMapping[attr] = fs.num_outputs;
377 numColors++;
378 break;
379 default:
380 assert(0);
381 }
382 fs.num_outputs++;
383 }
384 }
385 }
386
387 if (!inputMapping)
388 inputMapping = defaultInputMapping;
389
390 /* XXX: fix static allocation of tokens:
391 */
392 tgsi_translate_mesa_program( TGSI_PROCESSOR_FRAGMENT,
393 &stfp->Base.Base,
394 /* inputs */
395 fs.num_inputs,
396 inputMapping,
397 fs.input_semantic_name,
398 fs.input_semantic_index,
399 interpMode,
400 /* outputs */
401 fs.num_outputs,
402 outputMapping,
403 fs.output_semantic_name,
404 fs.output_semantic_index,
405 /* tokenized result */
406 tokensOut, maxTokens);
407
408 fs.tokens = tokensOut;
409
410 cso = st_cached_fs_state(st, &fs);
411 stfp->fs = cso;
412
413 if (TGSI_DEBUG)
414 tgsi_dump( tokensOut, 0/*TGSI_DUMP_VERBOSE*/ );
415
416 return cso;
417 }
418