gallivm: implement switch opcode
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi.h
1 /**************************************************************************
2 *
3 * Copyright 2011-2012 Advanced Micro Devices, Inc.
4 * Copyright 2009 VMware, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 * @author Tom Stellard <thomas.stellard@amd.com>
35 */
36
37 #ifndef LP_BLD_TGSI_H
38 #define LP_BLD_TGSI_H
39
40 #include "gallivm/lp_bld.h"
41 #include "gallivm/lp_bld_tgsi_action.h"
42 #include "gallivm/lp_bld_limits.h"
43 #include "lp_bld_type.h"
44 #include "pipe/p_compiler.h"
45 #include "pipe/p_state.h"
46 #include "tgsi/tgsi_exec.h"
47 #include "tgsi/tgsi_scan.h"
48 #include "tgsi/tgsi_info.h"
49
50 #define LP_CHAN_ALL ~0
51
52 #define LP_MAX_INSTRUCTIONS 256
53
54 struct tgsi_full_declaration;
55 struct tgsi_full_immediate;
56 struct tgsi_full_instruction;
57 struct tgsi_full_src_register;
58 struct tgsi_opcode_info;
59 struct tgsi_token;
60 struct tgsi_shader_info;
61 struct lp_build_mask_context;
62 struct gallivm_state;
63 struct lp_derivatives;
64 struct lp_build_tgsi_gs_iface;
65
66
67 enum lp_build_tex_modifier {
68 LP_BLD_TEX_MODIFIER_NONE = 0,
69 LP_BLD_TEX_MODIFIER_PROJECTED,
70 LP_BLD_TEX_MODIFIER_LOD_BIAS,
71 LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
72 LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
73 LP_BLD_TEX_MODIFIER_LOD_ZERO
74 };
75
76
77 /**
78 * Describe a channel of a register.
79 *
80 * The value can be a:
81 * - immediate value (i.e. derived from a IMM register)
82 * - CONST[n].x/y/z/w
83 * - IN[n].x/y/z/w
84 * - undetermined (when .file == TGSI_FILE_NULL)
85 *
86 * This is one of the analysis results, and is used to described
87 * the output color in terms of inputs.
88 */
89 struct lp_tgsi_channel_info
90 {
91 unsigned file:4; /* TGSI_FILE_* */
92 unsigned swizzle:3; /* PIPE_SWIZZLE_x */
93 union {
94 uint32_t index;
95 float value; /* for TGSI_FILE_IMMEDIATE */
96 } u;
97 };
98
99
100 /**
101 * Describe a texture sampler interpolator.
102 *
103 * The interpolation is described in terms of regular inputs.
104 */
105 struct lp_tgsi_texture_info
106 {
107 struct lp_tgsi_channel_info coord[4];
108 unsigned target:8; /* TGSI_TEXTURE_* */
109 unsigned sampler_unit:8; /* Sampler unit */
110 unsigned texture_unit:8; /* Texture unit */
111 unsigned modifier:8; /* LP_BLD_TEX_MODIFIER_* */
112 };
113
114
115 struct lp_tgsi_info
116 {
117 struct tgsi_shader_info base;
118
119 /*
120 * Whether any of the texture opcodes access a register file other than
121 * TGSI_FILE_INPUT.
122 *
123 * We could also handle TGSI_FILE_CONST/IMMEDIATE here, but there is little
124 * benefit.
125 */
126 unsigned indirect_textures:1;
127
128 /*
129 * Whether any immediate values are outside the range of 0 and 1
130 */
131 unsigned unclamped_immediates:1;
132
133 /*
134 * Texture opcode description. Aimed at detecting and described direct
135 * texture opcodes.
136 */
137 unsigned num_texs;
138 struct lp_tgsi_texture_info tex[PIPE_MAX_SAMPLERS];
139
140 /*
141 * Output description. Aimed at detecting and describing simple blit
142 * shaders.
143 */
144 struct lp_tgsi_channel_info output[PIPE_MAX_SHADER_OUTPUTS][4];
145
146 /*
147 * Shortcut pointers into the above (for fragment shaders).
148 */
149 const struct lp_tgsi_channel_info *cbuf[PIPE_MAX_COLOR_BUFS];
150 };
151
152 /**
153 * Reference to system values.
154 */
155 struct lp_bld_tgsi_system_values {
156 LLVMValueRef instance_id;
157 LLVMValueRef vertex_id;
158 LLVMValueRef prim_id;
159 };
160
161
162 /**
163 * Sampler code generation interface.
164 *
165 * Although texture sampling is a requirement for TGSI translation, it is
166 * a very different problem with several different approaches to it. This
167 * structure establishes an interface for texture sampling code generation, so
168 * that we can easily use different texture sampling strategies.
169 */
170 struct lp_build_sampler_soa
171 {
172 void
173 (*destroy)( struct lp_build_sampler_soa *sampler );
174
175 void
176 (*emit_fetch_texel)( const struct lp_build_sampler_soa *sampler,
177 struct gallivm_state *gallivm,
178 struct lp_type type,
179 boolean is_fetch,
180 unsigned texture_index,
181 unsigned sampler_index,
182 const LLVMValueRef *coords,
183 const LLVMValueRef *offsets,
184 const struct lp_derivatives *derivs,
185 LLVMValueRef lod_bias, /* optional */
186 LLVMValueRef explicit_lod, /* optional */
187 LLVMValueRef *texel);
188
189 void
190 (*emit_size_query)( const struct lp_build_sampler_soa *sampler,
191 struct gallivm_state *gallivm,
192 struct lp_type type,
193 unsigned unit,
194 boolean need_nr_mips,
195 LLVMValueRef explicit_lod, /* optional */
196 LLVMValueRef *sizes_out);
197 };
198
199
200 struct lp_build_sampler_aos
201 {
202 LLVMValueRef
203 (*emit_fetch_texel)( struct lp_build_sampler_aos *sampler,
204 struct lp_build_context *bld,
205 unsigned target, /* TGSI_TEXTURE_* */
206 unsigned unit,
207 LLVMValueRef coords,
208 const struct lp_derivatives derivs,
209 enum lp_build_tex_modifier modifier);
210 };
211
212
213 void
214 lp_build_tgsi_info(const struct tgsi_token *tokens,
215 struct lp_tgsi_info *info);
216
217
218 void
219 lp_build_tgsi_soa(struct gallivm_state *gallivm,
220 const struct tgsi_token *tokens,
221 struct lp_type type,
222 struct lp_build_mask_context *mask,
223 LLVMValueRef consts_ptr,
224 const struct lp_bld_tgsi_system_values *system_values,
225 const LLVMValueRef (*inputs)[4],
226 LLVMValueRef (*outputs)[4],
227 struct lp_build_sampler_soa *sampler,
228 const struct tgsi_shader_info *info,
229 const struct lp_build_tgsi_gs_iface *gs_iface);
230
231
232 void
233 lp_build_tgsi_aos(struct gallivm_state *gallivm,
234 const struct tgsi_token *tokens,
235 struct lp_type type,
236 const unsigned char swizzles[4],
237 LLVMValueRef consts_ptr,
238 const LLVMValueRef *inputs,
239 LLVMValueRef *outputs,
240 struct lp_build_sampler_aos *sampler,
241 const struct tgsi_shader_info *info);
242
243
244 enum lp_exec_mask_break_type {
245 LP_EXEC_MASK_BREAK_TYPE_LOOP,
246 LP_EXEC_MASK_BREAK_TYPE_SWITCH
247 };
248
249
250 struct lp_exec_mask {
251 struct lp_build_context *bld;
252
253 boolean has_mask;
254 boolean ret_in_main;
255
256 LLVMTypeRef int_vec_type;
257
258 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
259 int cond_stack_size;
260 LLVMValueRef cond_mask;
261
262 /* keep track if break belongs to switch or loop */
263 enum lp_exec_mask_break_type break_type_stack[LP_MAX_TGSI_NESTING];
264 enum lp_exec_mask_break_type break_type;
265
266 struct {
267 LLVMValueRef switch_val;
268 LLVMValueRef switch_mask;
269 LLVMValueRef switch_mask_default;
270 boolean switch_in_default;
271 unsigned switch_pc;
272 } switch_stack[LP_MAX_TGSI_NESTING];
273 int switch_stack_size;
274 LLVMValueRef switch_val;
275 LLVMValueRef switch_mask; /* current switch exec mask */
276 LLVMValueRef switch_mask_default; /* reverse of switch mask used for default */
277 boolean switch_in_default; /* if switch exec is currently in default */
278 unsigned switch_pc; /* when used points to default or endswitch-1 */
279
280 LLVMBasicBlockRef loop_block;
281 LLVMValueRef cont_mask;
282 LLVMValueRef break_mask;
283 LLVMValueRef break_var;
284 struct {
285 LLVMBasicBlockRef loop_block;
286 LLVMValueRef cont_mask;
287 LLVMValueRef break_mask;
288 LLVMValueRef break_var;
289 } loop_stack[LP_MAX_TGSI_NESTING];
290 int loop_stack_size;
291
292 LLVMValueRef ret_mask;
293 struct {
294 int pc;
295 LLVMValueRef ret_mask;
296 } call_stack[LP_MAX_TGSI_NESTING];
297 int call_stack_size;
298
299 LLVMValueRef exec_mask;
300 LLVMValueRef loop_limiter;
301 };
302
303 struct lp_build_tgsi_inst_list
304 {
305 struct tgsi_full_instruction *instructions;
306 uint max_instructions;
307 uint num_instructions;
308 };
309
310 unsigned lp_bld_tgsi_list_init(struct lp_build_tgsi_context * bld_base);
311
312
313 unsigned lp_bld_tgsi_add_instruction(
314 struct lp_build_tgsi_context * bld_base,
315 struct tgsi_full_instruction *inst_to_add);
316
317
318 struct lp_build_tgsi_context;
319
320
321 typedef LLVMValueRef (*lp_build_emit_fetch_fn)(struct lp_build_tgsi_context *,
322 const struct tgsi_full_src_register *,
323 enum tgsi_opcode_type,
324 unsigned);
325
326 struct lp_build_tgsi_context
327 {
328 struct lp_build_context base;
329
330 struct lp_build_context uint_bld;
331 struct lp_build_context int_bld;
332
333 /** This array stores functions that are used to transform TGSI opcodes to
334 * LLVM instructions.
335 */
336 struct lp_build_tgsi_action op_actions[TGSI_OPCODE_LAST];
337
338 /* TGSI_OPCODE_RSQ is defined as 1 / sqrt( abs(src0.x) ), rsq_action
339 * should compute 1 / sqrt (src0.x) */
340 struct lp_build_tgsi_action rsq_action;
341
342 struct lp_build_tgsi_action sqrt_action;
343
344 const struct tgsi_shader_info *info;
345
346 lp_build_emit_fetch_fn emit_fetch_funcs[TGSI_FILE_COUNT];
347
348 LLVMValueRef (*emit_swizzle)(struct lp_build_tgsi_context *,
349 LLVMValueRef, unsigned, unsigned, unsigned, unsigned);
350
351 void (*emit_store)(struct lp_build_tgsi_context *,
352 const struct tgsi_full_instruction *,
353 const struct tgsi_opcode_info *,
354 LLVMValueRef dst[4]);
355
356 void (*emit_declaration)(struct lp_build_tgsi_context *,
357 const struct tgsi_full_declaration *decl);
358
359 void (*emit_immediate)(struct lp_build_tgsi_context *,
360 const struct tgsi_full_immediate *imm);
361
362
363 /* Allow the user to store data in this structure rather than passing it
364 * to every function. */
365 void * userdata;
366
367 boolean soa;
368
369 int pc;
370
371 struct tgsi_full_instruction *instructions;
372 uint max_instructions;
373 uint num_instructions;
374
375 /** This function allows the user to insert some instructions at the
376 * beginning of the program. It is optional and does not need to be
377 * implemented.
378 */
379 void (*emit_prologue)(struct lp_build_tgsi_context*);
380
381 /** This function allows the user to insert some instructions at the end of
382 * the program. This callback is intended to be used for emitting
383 * instructions to handle the export for the output registers, but it can
384 * be used for any purpose. Implementing this function is optiona, but
385 * recommended.
386 */
387 void (*emit_epilogue)(struct lp_build_tgsi_context*);
388 };
389
390 struct lp_build_tgsi_gs_iface
391 {
392 LLVMValueRef (*fetch_input)(const struct lp_build_tgsi_gs_iface *gs_iface,
393 struct lp_build_tgsi_context * bld_base,
394 boolean is_indirect,
395 LLVMValueRef vertex_index,
396 LLVMValueRef attrib_index,
397 LLVMValueRef swizzle_index);
398 void (*emit_vertex)(const struct lp_build_tgsi_gs_iface *gs_iface,
399 struct lp_build_tgsi_context * bld_base,
400 LLVMValueRef (*outputs)[4],
401 LLVMValueRef emitted_vertices_vec);
402 void (*end_primitive)(const struct lp_build_tgsi_gs_iface *gs_iface,
403 struct lp_build_tgsi_context * bld_base,
404 LLVMValueRef verts_per_prim_vec,
405 LLVMValueRef emitted_prims_vec);
406 void (*gs_epilogue)(const struct lp_build_tgsi_gs_iface *gs_iface,
407 struct lp_build_tgsi_context * bld_base,
408 LLVMValueRef total_emitted_vertices_vec,
409 LLVMValueRef emitted_prims_vec);
410 };
411
412 struct lp_build_tgsi_soa_context
413 {
414 struct lp_build_tgsi_context bld_base;
415
416 /* Builder for scalar elements of shader's data type (float) */
417 struct lp_build_context elem_bld;
418
419 const struct lp_build_tgsi_gs_iface *gs_iface;
420 LLVMValueRef emitted_prims_vec_ptr;
421 LLVMValueRef total_emitted_vertices_vec_ptr;
422 LLVMValueRef emitted_vertices_vec_ptr;
423 LLVMValueRef max_output_vertices_vec;
424
425 LLVMValueRef consts_ptr;
426 const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS];
427 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS];
428
429 const struct lp_build_sampler_soa *sampler;
430
431 struct tgsi_declaration_sampler_view sv[PIPE_MAX_SHADER_SAMPLER_VIEWS];
432
433 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][TGSI_NUM_CHANNELS];
434 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][TGSI_NUM_CHANNELS];
435 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][TGSI_NUM_CHANNELS];
436 LLVMValueRef preds[LP_MAX_TGSI_PREDS][TGSI_NUM_CHANNELS];
437
438 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
439 * set in the indirect_files field.
440 * The temps[] array above is unused then.
441 */
442 LLVMValueRef temps_array;
443
444 /* We allocate/use this array of output if (1 << TGSI_FILE_OUTPUT) is
445 * set in the indirect_files field.
446 * The outputs[] array above is unused then.
447 */
448 LLVMValueRef outputs_array;
449
450 /* We allocate/use this array of inputs if (1 << TGSI_FILE_INPUT) is
451 * set in the indirect_files field.
452 * The inputs[] array above is unused then.
453 */
454 LLVMValueRef inputs_array;
455
456 struct lp_bld_tgsi_system_values system_values;
457
458 /** bitmask indicating which register files are accessed indirectly */
459 unsigned indirect_files;
460
461 struct lp_build_mask_context *mask;
462 struct lp_exec_mask exec_mask;
463
464 uint num_immediates;
465
466 };
467
468 void
469 lp_emit_declaration_soa(
470 struct lp_build_tgsi_context *bld,
471 const struct tgsi_full_declaration *decl);
472
473 void lp_emit_immediate_soa(
474 struct lp_build_tgsi_context *bld_base,
475 const struct tgsi_full_immediate *imm);
476
477 boolean
478 lp_emit_instruction_soa(
479 struct lp_build_tgsi_soa_context *bld,
480 const struct tgsi_full_instruction *inst,
481 const struct tgsi_opcode_info *info);
482
483
484 LLVMValueRef
485 lp_get_temp_ptr_soa(
486 struct lp_build_tgsi_soa_context *bld,
487 unsigned index,
488 unsigned chan);
489
490 LLVMValueRef
491 lp_get_output_ptr(
492 struct lp_build_tgsi_soa_context *bld,
493 unsigned index,
494 unsigned chan);
495
496 struct lp_build_tgsi_aos_context
497 {
498 struct lp_build_tgsi_context bld_base;
499
500 /* Builder for integer masks and indices */
501 struct lp_build_context int_bld;
502
503 /*
504 * AoS swizzle used:
505 * - swizzles[0] = red index
506 * - swizzles[1] = green index
507 * - swizzles[2] = blue index
508 * - swizzles[3] = alpha index
509 */
510 unsigned char swizzles[4];
511 unsigned char inv_swizzles[4];
512
513 LLVMValueRef consts_ptr;
514 const LLVMValueRef *inputs;
515 LLVMValueRef *outputs;
516
517 struct lp_build_sampler_aos *sampler;
518
519 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES];
520 LLVMValueRef temps[LP_MAX_TGSI_TEMPS];
521 LLVMValueRef addr[LP_MAX_TGSI_ADDRS];
522 LLVMValueRef preds[LP_MAX_TGSI_PREDS];
523
524 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
525 * set in the indirect_files field.
526 * The temps[] array above is unused then.
527 */
528 LLVMValueRef temps_array;
529
530 /** bitmask indicating which register files are accessed indirectly */
531 unsigned indirect_files;
532
533 };
534
535 static INLINE struct lp_build_tgsi_soa_context *
536 lp_soa_context(struct lp_build_tgsi_context *bld_base)
537 {
538 return (struct lp_build_tgsi_soa_context *)bld_base;
539 }
540
541 static INLINE struct lp_build_tgsi_aos_context *
542 lp_aos_context(struct lp_build_tgsi_context *bld_base)
543 {
544 return (struct lp_build_tgsi_aos_context *)bld_base;
545 }
546
547 void
548 lp_emit_declaration_aos(
549 struct lp_build_tgsi_aos_context *bld,
550 const struct tgsi_full_declaration *decl);
551
552
553 boolean
554 lp_emit_instruction_aos(
555 struct lp_build_tgsi_aos_context *bld,
556 const struct tgsi_full_instruction *inst,
557 const struct tgsi_opcode_info *info,
558 int *pc);
559
560 void
561 lp_emit_store_aos(
562 struct lp_build_tgsi_aos_context *bld,
563 const struct tgsi_full_instruction *inst,
564 unsigned index,
565 LLVMValueRef value);
566
567 void lp_build_fetch_args(
568 struct lp_build_tgsi_context * bld_base,
569 struct lp_build_emit_data * emit_data);
570
571 LLVMValueRef
572 lp_build_tgsi_inst_llvm_aos(
573 struct lp_build_tgsi_context * bld_base,
574 const struct tgsi_full_instruction *inst);
575
576 void
577 lp_build_tgsi_intrinsic(
578 const struct lp_build_tgsi_action * action,
579 struct lp_build_tgsi_context * bld_base,
580 struct lp_build_emit_data * emit_data);
581
582 LLVMValueRef
583 lp_build_emit_llvm(
584 struct lp_build_tgsi_context *bld_base,
585 unsigned tgsi_opcode,
586 struct lp_build_emit_data * emit_data);
587
588 LLVMValueRef
589 lp_build_emit_llvm_unary(
590 struct lp_build_tgsi_context *bld_base,
591 unsigned tgsi_opcode,
592 LLVMValueRef arg0);
593
594 LLVMValueRef
595 lp_build_emit_llvm_binary(
596 struct lp_build_tgsi_context *bld_base,
597 unsigned tgsi_opcode,
598 LLVMValueRef arg0,
599 LLVMValueRef arg1);
600
601 LLVMValueRef
602 lp_build_emit_llvm_ternary(
603 struct lp_build_tgsi_context *bld_base,
604 unsigned tgsi_opcode,
605 LLVMValueRef arg0,
606 LLVMValueRef arg1,
607 LLVMValueRef arg2);
608
609 boolean
610 lp_build_tgsi_inst_llvm(
611 struct lp_build_tgsi_context * bld_base,
612 const struct tgsi_full_instruction *inst);
613
614 LLVMValueRef
615 lp_build_emit_fetch(
616 struct lp_build_tgsi_context *bld_base,
617 const struct tgsi_full_instruction *inst,
618 unsigned src_op,
619 const unsigned chan_index);
620
621
622 LLVMValueRef
623 lp_build_emit_fetch_texoffset(
624 struct lp_build_tgsi_context *bld_base,
625 const struct tgsi_full_instruction *inst,
626 unsigned tex_off_op,
627 const unsigned chan_index);
628
629 boolean
630 lp_build_tgsi_llvm(
631 struct lp_build_tgsi_context * bld_base,
632 const struct tgsi_token *tokens);
633
634 #endif /* LP_BLD_TGSI_H */