gallium: add PIPE_SHADER_CAP_SUBROUTINES
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_exec.h
1 /**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 * Copyright 2009-2010 VMware, Inc. All rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 #ifndef TGSI_EXEC_H
30 #define TGSI_EXEC_H
31
32 #include "pipe/p_compiler.h"
33 #include "pipe/p_state.h"
34
35 #if defined __cplusplus
36 extern "C" {
37 #endif
38
39
40 #define NUM_CHANNELS 4 /* R,G,B,A */
41 #define QUAD_SIZE 4 /* 4 pixel/quad */
42
43
44 /**
45 * Registers may be treated as float, signed int or unsigned int.
46 */
47 union tgsi_exec_channel
48 {
49 float f[QUAD_SIZE];
50 int i[QUAD_SIZE];
51 unsigned u[QUAD_SIZE];
52 };
53
54 /**
55 * A vector[RGBA] of channels[4 pixels]
56 */
57 struct tgsi_exec_vector
58 {
59 union tgsi_exec_channel xyzw[NUM_CHANNELS];
60 };
61
62 /**
63 * For fragment programs, information for computing fragment input
64 * values from plane equation of the triangle/line.
65 */
66 struct tgsi_interp_coef
67 {
68 float a0[NUM_CHANNELS]; /* in an xyzw layout */
69 float dadx[NUM_CHANNELS];
70 float dady[NUM_CHANNELS];
71 };
72
73 enum tgsi_sampler_control {
74 tgsi_sampler_lod_bias,
75 tgsi_sampler_lod_explicit
76 };
77
78 /**
79 * Information for sampling textures, which must be implemented
80 * by code outside the TGSI executor.
81 */
82 struct tgsi_sampler
83 {
84 /** Get samples for four fragments in a quad */
85 void (*get_samples)(struct tgsi_sampler *sampler,
86 const float s[QUAD_SIZE],
87 const float t[QUAD_SIZE],
88 const float p[QUAD_SIZE],
89 const float c0[QUAD_SIZE],
90 enum tgsi_sampler_control control,
91 float rgba[NUM_CHANNELS][QUAD_SIZE]);
92 };
93
94 #define TGSI_EXEC_NUM_TEMPS 128
95 #define TGSI_EXEC_NUM_IMMEDIATES 256
96 #define TGSI_EXEC_NUM_TEMP_ARRAYS 8
97
98 /*
99 * Locations of various utility registers (_I = Index, _C = Channel)
100 */
101 #define TGSI_EXEC_TEMP_00000000_I (TGSI_EXEC_NUM_TEMPS + 0)
102 #define TGSI_EXEC_TEMP_00000000_C 0
103
104 #define TGSI_EXEC_TEMP_7FFFFFFF_I (TGSI_EXEC_NUM_TEMPS + 0)
105 #define TGSI_EXEC_TEMP_7FFFFFFF_C 1
106
107 #define TGSI_EXEC_TEMP_80000000_I (TGSI_EXEC_NUM_TEMPS + 0)
108 #define TGSI_EXEC_TEMP_80000000_C 2
109
110 #define TGSI_EXEC_TEMP_FFFFFFFF_I (TGSI_EXEC_NUM_TEMPS + 0)
111 #define TGSI_EXEC_TEMP_FFFFFFFF_C 3
112
113 #define TGSI_EXEC_TEMP_ONE_I (TGSI_EXEC_NUM_TEMPS + 1)
114 #define TGSI_EXEC_TEMP_ONE_C 0
115
116 #define TGSI_EXEC_TEMP_TWO_I (TGSI_EXEC_NUM_TEMPS + 1)
117 #define TGSI_EXEC_TEMP_TWO_C 1
118
119 #define TGSI_EXEC_TEMP_128_I (TGSI_EXEC_NUM_TEMPS + 1)
120 #define TGSI_EXEC_TEMP_128_C 2
121
122 #define TGSI_EXEC_TEMP_MINUS_128_I (TGSI_EXEC_NUM_TEMPS + 1)
123 #define TGSI_EXEC_TEMP_MINUS_128_C 3
124
125 #define TGSI_EXEC_TEMP_KILMASK_I (TGSI_EXEC_NUM_TEMPS + 2)
126 #define TGSI_EXEC_TEMP_KILMASK_C 0
127
128 #define TGSI_EXEC_TEMP_OUTPUT_I (TGSI_EXEC_NUM_TEMPS + 2)
129 #define TGSI_EXEC_TEMP_OUTPUT_C 1
130
131 #define TGSI_EXEC_TEMP_PRIMITIVE_I (TGSI_EXEC_NUM_TEMPS + 2)
132 #define TGSI_EXEC_TEMP_PRIMITIVE_C 2
133
134 #define TGSI_EXEC_TEMP_THREE_I (TGSI_EXEC_NUM_TEMPS + 2)
135 #define TGSI_EXEC_TEMP_THREE_C 3
136
137 #define TGSI_EXEC_TEMP_HALF_I (TGSI_EXEC_NUM_TEMPS + 3)
138 #define TGSI_EXEC_TEMP_HALF_C 0
139
140 /* execution mask, each value is either 0 or ~0 */
141 #define TGSI_EXEC_MASK_I (TGSI_EXEC_NUM_TEMPS + 3)
142 #define TGSI_EXEC_MASK_C 1
143
144 /* 4 register buffer for various purposes */
145 #define TGSI_EXEC_TEMP_R0 (TGSI_EXEC_NUM_TEMPS + 4)
146 #define TGSI_EXEC_NUM_TEMP_R 4
147
148 #define TGSI_EXEC_TEMP_ADDR (TGSI_EXEC_NUM_TEMPS + 8)
149 #define TGSI_EXEC_NUM_ADDRS 1
150
151 /* predicate register */
152 #define TGSI_EXEC_TEMP_P0 (TGSI_EXEC_NUM_TEMPS + 9)
153 #define TGSI_EXEC_NUM_PREDS 1
154
155 #define TGSI_EXEC_NUM_TEMP_EXTRAS 10
156
157
158
159 #define TGSI_EXEC_MAX_NESTING 32
160 #define TGSI_EXEC_MAX_COND_NESTING TGSI_EXEC_MAX_NESTING
161 #define TGSI_EXEC_MAX_LOOP_NESTING TGSI_EXEC_MAX_NESTING
162 #define TGSI_EXEC_MAX_SWITCH_NESTING TGSI_EXEC_MAX_NESTING
163 #define TGSI_EXEC_MAX_CALL_NESTING TGSI_EXEC_MAX_NESTING
164
165 /* The maximum number of input attributes per vertex. For 2D
166 * input register files, this is the stride between two 1D
167 * arrays.
168 */
169 #define TGSI_EXEC_MAX_INPUT_ATTRIBS 17
170
171 /* The maximum number of constant vectors per constant buffer.
172 */
173 #define TGSI_EXEC_MAX_CONST_BUFFER 4096
174
175 /* The maximum number of vertices per primitive */
176 #define TGSI_MAX_PRIM_VERTICES 6
177
178 /* The maximum number of primitives to be generated */
179 #define TGSI_MAX_PRIMITIVES 64
180
181 /* The maximum total number of vertices */
182 #define TGSI_MAX_TOTAL_VERTICES (TGSI_MAX_PRIM_VERTICES * TGSI_MAX_PRIMITIVES * PIPE_MAX_ATTRIBS)
183
184 /** function call/activation record */
185 struct tgsi_call_record
186 {
187 uint CondStackTop;
188 uint LoopStackTop;
189 uint ContStackTop;
190 int SwitchStackTop;
191 int BreakStackTop;
192 uint ReturnAddr;
193 };
194
195
196 /* Switch-case block state. */
197 struct tgsi_switch_record {
198 uint mask; /**< execution mask */
199 union tgsi_exec_channel selector; /**< a value case statements are compared to */
200 uint defaultMask; /**< non-execute mask for default case */
201 };
202
203
204 enum tgsi_break_type {
205 TGSI_EXEC_BREAK_INSIDE_LOOP,
206 TGSI_EXEC_BREAK_INSIDE_SWITCH
207 };
208
209
210 #define TGSI_EXEC_MAX_BREAK_STACK (TGSI_EXEC_MAX_LOOP_NESTING + TGSI_EXEC_MAX_SWITCH_NESTING)
211
212
213 /**
214 * Run-time virtual machine state for executing TGSI shader.
215 */
216 struct tgsi_exec_machine
217 {
218 /* Total = program temporaries + internal temporaries
219 */
220 struct tgsi_exec_vector Temps[TGSI_EXEC_NUM_TEMPS +
221 TGSI_EXEC_NUM_TEMP_EXTRAS];
222 struct tgsi_exec_vector TempArray[TGSI_EXEC_NUM_TEMP_ARRAYS][TGSI_EXEC_NUM_TEMPS];
223
224 float Imms[TGSI_EXEC_NUM_IMMEDIATES][4];
225
226 float ImmArray[TGSI_EXEC_NUM_IMMEDIATES][4];
227
228 struct tgsi_exec_vector Inputs[TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS];
229 struct tgsi_exec_vector Outputs[TGSI_MAX_TOTAL_VERTICES];
230
231 struct tgsi_exec_vector *Addrs;
232 struct tgsi_exec_vector *Predicates;
233
234 struct tgsi_sampler **Samplers;
235
236 unsigned ImmLimit;
237
238 const void *Consts[PIPE_MAX_CONSTANT_BUFFERS];
239 unsigned ConstsSize[PIPE_MAX_CONSTANT_BUFFERS];
240
241 const struct tgsi_token *Tokens; /**< Declarations, instructions */
242 unsigned Processor; /**< TGSI_PROCESSOR_x */
243
244 /* GEOMETRY processor only. */
245 unsigned *Primitives;
246 unsigned NumOutputs;
247 unsigned MaxGeometryShaderOutputs;
248
249 /* FRAGMENT processor only. */
250 const struct tgsi_interp_coef *InterpCoefs;
251 struct tgsi_exec_vector QuadPos;
252 float Face; /**< +1 if front facing, -1 if back facing */
253
254 /* Conditional execution masks */
255 uint CondMask; /**< For IF/ELSE/ENDIF */
256 uint LoopMask; /**< For BGNLOOP/ENDLOOP */
257 uint ContMask; /**< For loop CONT statements */
258 uint FuncMask; /**< For function calls */
259 uint ExecMask; /**< = CondMask & LoopMask */
260
261 /* Current switch-case state. */
262 struct tgsi_switch_record Switch;
263
264 /* Current break type. */
265 enum tgsi_break_type BreakType;
266
267 /** Condition mask stack (for nested conditionals) */
268 uint CondStack[TGSI_EXEC_MAX_COND_NESTING];
269 int CondStackTop;
270
271 /** Loop mask stack (for nested loops) */
272 uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING];
273 int LoopStackTop;
274
275 /** Loop label stack */
276 uint LoopLabelStack[TGSI_EXEC_MAX_LOOP_NESTING];
277 int LoopLabelStackTop;
278
279 /** Loop continue mask stack (see comments in tgsi_exec.c) */
280 uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING];
281 int ContStackTop;
282
283 /** Switch case stack */
284 struct tgsi_switch_record SwitchStack[TGSI_EXEC_MAX_SWITCH_NESTING];
285 int SwitchStackTop;
286
287 enum tgsi_break_type BreakStack[TGSI_EXEC_MAX_BREAK_STACK];
288 int BreakStackTop;
289
290 /** Function execution mask stack (for executing subroutine code) */
291 uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING];
292 int FuncStackTop;
293
294 /** Function call stack for saving/restoring the program counter */
295 struct tgsi_call_record CallStack[TGSI_EXEC_MAX_CALL_NESTING];
296 int CallStackTop;
297
298 struct tgsi_full_instruction *Instructions;
299 uint NumInstructions;
300
301 struct tgsi_full_declaration *Declarations;
302 uint NumDeclarations;
303
304 };
305
306 struct tgsi_exec_machine *
307 tgsi_exec_machine_create( void );
308
309 void
310 tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach);
311
312
313 void
314 tgsi_exec_machine_bind_shader(
315 struct tgsi_exec_machine *mach,
316 const struct tgsi_token *tokens,
317 uint numSamplers,
318 struct tgsi_sampler **samplers);
319
320 uint
321 tgsi_exec_machine_run(
322 struct tgsi_exec_machine *mach );
323
324
325 void
326 tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach);
327
328
329 boolean
330 tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst);
331
332
333 static INLINE void
334 tgsi_set_kill_mask(struct tgsi_exec_machine *mach, unsigned mask)
335 {
336 mach->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0] =
337 mask;
338 }
339
340
341 /** Set execution mask values prior to executing the shader */
342 static INLINE void
343 tgsi_set_exec_mask(struct tgsi_exec_machine *mach,
344 boolean ch0, boolean ch1, boolean ch2, boolean ch3)
345 {
346 int *mask = mach->Temps[TGSI_EXEC_MASK_I].xyzw[TGSI_EXEC_MASK_C].i;
347 mask[0] = ch0 ? ~0 : 0;
348 mask[1] = ch1 ? ~0 : 0;
349 mask[2] = ch2 ? ~0 : 0;
350 mask[3] = ch3 ? ~0 : 0;
351 }
352
353
354 extern void
355 tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach,
356 unsigned num_bufs,
357 const void **bufs,
358 const unsigned *buf_sizes);
359
360
361 static INLINE int
362 tgsi_exec_get_shader_param(enum pipe_shader_cap param)
363 {
364 switch(param) {
365 case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
366 case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
367 case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
368 case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
369 return INT_MAX;
370 case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
371 return TGSI_EXEC_MAX_NESTING;
372 case PIPE_SHADER_CAP_MAX_INPUTS:
373 return TGSI_EXEC_MAX_INPUT_ATTRIBS;
374 case PIPE_SHADER_CAP_MAX_CONSTS:
375 return TGSI_EXEC_MAX_CONST_BUFFER;
376 case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
377 return PIPE_MAX_CONSTANT_BUFFERS;
378 case PIPE_SHADER_CAP_MAX_TEMPS:
379 return TGSI_EXEC_NUM_TEMPS;
380 case PIPE_SHADER_CAP_MAX_ADDRS:
381 return TGSI_EXEC_NUM_ADDRS;
382 case PIPE_SHADER_CAP_MAX_PREDS:
383 return TGSI_EXEC_NUM_PREDS;
384 case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
385 return 1;
386 case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
387 case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
388 case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
389 case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
390 return 1;
391 case PIPE_SHADER_CAP_SUBROUTINES:
392 return 1;
393 default:
394 return 0;
395 }
396 }
397
398 #if defined __cplusplus
399 } /* extern "C" */
400 #endif
401
402 #endif /* TGSI_EXEC_H */