gallivm: pass jit_context pointer through to sampling
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi.h
1 /**************************************************************************
2 *
3 * Copyright 2011-2012 Advanced Micro Devices, Inc.
4 * Copyright 2009 VMware, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 * @author Tom Stellard <thomas.stellard@amd.com>
35 */
36
37 #ifndef LP_BLD_TGSI_H
38 #define LP_BLD_TGSI_H
39
40 #include "gallivm/lp_bld.h"
41 #include "gallivm/lp_bld_tgsi_action.h"
42 #include "gallivm/lp_bld_limits.h"
43 #include "gallivm/lp_bld_sample.h"
44 #include "lp_bld_type.h"
45 #include "pipe/p_compiler.h"
46 #include "pipe/p_state.h"
47 #include "tgsi/tgsi_exec.h"
48 #include "tgsi/tgsi_scan.h"
49 #include "tgsi/tgsi_info.h"
50
51 #define LP_CHAN_ALL ~0
52
53 #define LP_MAX_INSTRUCTIONS 256
54
55 struct tgsi_full_declaration;
56 struct tgsi_full_immediate;
57 struct tgsi_full_instruction;
58 struct tgsi_full_src_register;
59 struct tgsi_opcode_info;
60 struct tgsi_token;
61 struct tgsi_shader_info;
62 struct lp_build_mask_context;
63 struct gallivm_state;
64 struct lp_derivatives;
65 struct lp_build_tgsi_gs_iface;
66
67
68 enum lp_build_tex_modifier {
69 LP_BLD_TEX_MODIFIER_NONE = 0,
70 LP_BLD_TEX_MODIFIER_PROJECTED,
71 LP_BLD_TEX_MODIFIER_LOD_BIAS,
72 LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
73 LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
74 LP_BLD_TEX_MODIFIER_LOD_ZERO
75 };
76
77
78 /**
79 * Describe a channel of a register.
80 *
81 * The value can be a:
82 * - immediate value (i.e. derived from a IMM register)
83 * - CONST[n].x/y/z/w
84 * - IN[n].x/y/z/w
85 * - undetermined (when .file == TGSI_FILE_NULL)
86 *
87 * This is one of the analysis results, and is used to described
88 * the output color in terms of inputs.
89 */
90 struct lp_tgsi_channel_info
91 {
92 unsigned file:4; /* TGSI_FILE_* */
93 unsigned swizzle:3; /* PIPE_SWIZZLE_x */
94 union {
95 uint32_t index;
96 float value; /* for TGSI_FILE_IMMEDIATE */
97 } u;
98 };
99
100
101 /**
102 * Describe a texture sampler interpolator.
103 *
104 * The interpolation is described in terms of regular inputs.
105 */
106 struct lp_tgsi_texture_info
107 {
108 struct lp_tgsi_channel_info coord[4];
109 unsigned target:8; /* TGSI_TEXTURE_* */
110 unsigned sampler_unit:8; /* Sampler unit */
111 unsigned texture_unit:8; /* Texture unit */
112 unsigned modifier:8; /* LP_BLD_TEX_MODIFIER_* */
113 };
114
115
116 struct lp_tgsi_info
117 {
118 struct tgsi_shader_info base;
119
120 /*
121 * Whether any of the texture opcodes access a register file other than
122 * TGSI_FILE_INPUT.
123 *
124 * We could also handle TGSI_FILE_CONST/IMMEDIATE here, but there is little
125 * benefit.
126 */
127 unsigned indirect_textures:1;
128
129 /*
130 * Whether any of the texture (sample) ocpodes use different sampler
131 * and sampler view unit.
132 */
133 unsigned sampler_texture_units_different:1;
134
135 /*
136 * Whether any immediate values are outside the range of 0 and 1
137 */
138 unsigned unclamped_immediates:1;
139
140 /*
141 * Texture opcode description. Aimed at detecting and described direct
142 * texture opcodes.
143 */
144 unsigned num_texs;
145 struct lp_tgsi_texture_info tex[PIPE_MAX_SAMPLERS];
146
147 /*
148 * Output description. Aimed at detecting and describing simple blit
149 * shaders.
150 */
151 struct lp_tgsi_channel_info output[PIPE_MAX_SHADER_OUTPUTS][4];
152
153 /*
154 * Shortcut pointers into the above (for fragment shaders).
155 */
156 const struct lp_tgsi_channel_info *cbuf[PIPE_MAX_COLOR_BUFS];
157 };
158
159 /**
160 * Reference to system values.
161 */
162 struct lp_bld_tgsi_system_values {
163 LLVMValueRef instance_id;
164 LLVMValueRef vertex_id;
165 LLVMValueRef vertex_id_nobase;
166 LLVMValueRef prim_id;
167 LLVMValueRef basevertex;
168 };
169
170
171 /**
172 * Sampler code generation interface.
173 *
174 * Although texture sampling is a requirement for TGSI translation, it is
175 * a very different problem with several different approaches to it. This
176 * structure establishes an interface for texture sampling code generation, so
177 * that we can easily use different texture sampling strategies.
178 */
179 struct lp_build_sampler_soa
180 {
181 void
182 (*destroy)( struct lp_build_sampler_soa *sampler );
183
184 void
185 (*emit_fetch_texel)( const struct lp_build_sampler_soa *sampler,
186 struct gallivm_state *gallivm,
187 struct lp_type type,
188 boolean is_fetch,
189 unsigned texture_index,
190 unsigned sampler_index,
191 LLVMValueRef context_ptr,
192 const LLVMValueRef *coords,
193 const LLVMValueRef *offsets,
194 const struct lp_derivatives *derivs,
195 LLVMValueRef lod_bias, /* optional */
196 LLVMValueRef explicit_lod, /* optional */
197 enum lp_sampler_lod_property,
198 LLVMValueRef *texel);
199
200 void
201 (*emit_size_query)( const struct lp_build_sampler_soa *sampler,
202 struct gallivm_state *gallivm,
203 struct lp_type type,
204 unsigned unit,
205 unsigned target,
206 LLVMValueRef context_ptr,
207 boolean need_nr_mips,
208 enum lp_sampler_lod_property,
209 LLVMValueRef explicit_lod, /* optional */
210 LLVMValueRef *sizes_out);
211 };
212
213
214 struct lp_build_sampler_aos
215 {
216 LLVMValueRef
217 (*emit_fetch_texel)( struct lp_build_sampler_aos *sampler,
218 struct lp_build_context *bld,
219 unsigned target, /* TGSI_TEXTURE_* */
220 unsigned unit,
221 LLVMValueRef coords,
222 const struct lp_derivatives derivs,
223 enum lp_build_tex_modifier modifier);
224 };
225
226
227 void
228 lp_build_tgsi_info(const struct tgsi_token *tokens,
229 struct lp_tgsi_info *info);
230
231
232 void
233 lp_build_tgsi_soa(struct gallivm_state *gallivm,
234 const struct tgsi_token *tokens,
235 struct lp_type type,
236 struct lp_build_mask_context *mask,
237 LLVMValueRef consts_ptr,
238 LLVMValueRef const_sizes_ptr,
239 const struct lp_bld_tgsi_system_values *system_values,
240 const LLVMValueRef (*inputs)[4],
241 LLVMValueRef (*outputs)[4],
242 LLVMValueRef context_ptr,
243 struct lp_build_sampler_soa *sampler,
244 const struct tgsi_shader_info *info,
245 const struct lp_build_tgsi_gs_iface *gs_iface);
246
247
248 void
249 lp_build_tgsi_aos(struct gallivm_state *gallivm,
250 const struct tgsi_token *tokens,
251 struct lp_type type,
252 const unsigned char swizzles[4],
253 LLVMValueRef consts_ptr,
254 const LLVMValueRef *inputs,
255 LLVMValueRef *outputs,
256 struct lp_build_sampler_aos *sampler,
257 const struct tgsi_shader_info *info);
258
259
260 enum lp_exec_mask_break_type {
261 LP_EXEC_MASK_BREAK_TYPE_LOOP,
262 LP_EXEC_MASK_BREAK_TYPE_SWITCH
263 };
264
265
266 struct lp_exec_mask {
267 struct lp_build_context *bld;
268
269 boolean has_mask;
270 boolean ret_in_main;
271
272 LLVMTypeRef int_vec_type;
273
274 LLVMValueRef exec_mask;
275
276 LLVMValueRef ret_mask;
277 LLVMValueRef cond_mask;
278 LLVMValueRef switch_mask; /* current switch exec mask */
279 LLVMValueRef cont_mask;
280 LLVMValueRef break_mask;
281
282 struct function_ctx {
283 int pc;
284 LLVMValueRef ret_mask;
285
286 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
287 int cond_stack_size;
288
289 /* keep track if break belongs to switch or loop */
290 enum lp_exec_mask_break_type break_type_stack[LP_MAX_TGSI_NESTING];
291 enum lp_exec_mask_break_type break_type;
292
293 struct {
294 LLVMValueRef switch_val;
295 LLVMValueRef switch_mask;
296 LLVMValueRef switch_mask_default;
297 boolean switch_in_default;
298 unsigned switch_pc;
299 } switch_stack[LP_MAX_TGSI_NESTING];
300 int switch_stack_size;
301 LLVMValueRef switch_val;
302 LLVMValueRef switch_mask_default; /* reverse of switch mask used for default */
303 boolean switch_in_default; /* if switch exec is currently in default */
304 unsigned switch_pc; /* when used points to default or endswitch-1 */
305
306 LLVMValueRef loop_limiter;
307 LLVMBasicBlockRef loop_block;
308 LLVMValueRef break_var;
309 struct {
310 LLVMBasicBlockRef loop_block;
311 LLVMValueRef cont_mask;
312 LLVMValueRef break_mask;
313 LLVMValueRef break_var;
314 } loop_stack[LP_MAX_TGSI_NESTING];
315 int loop_stack_size;
316
317 } *function_stack;
318 int function_stack_size;
319 };
320
321 struct lp_build_tgsi_inst_list
322 {
323 struct tgsi_full_instruction *instructions;
324 uint max_instructions;
325 uint num_instructions;
326 };
327
328 unsigned lp_bld_tgsi_list_init(struct lp_build_tgsi_context * bld_base);
329
330
331 unsigned lp_bld_tgsi_add_instruction(
332 struct lp_build_tgsi_context * bld_base,
333 const struct tgsi_full_instruction *inst_to_add);
334
335
336 struct lp_build_tgsi_context;
337
338
339 typedef LLVMValueRef (*lp_build_emit_fetch_fn)(struct lp_build_tgsi_context *,
340 const struct tgsi_full_src_register *,
341 enum tgsi_opcode_type,
342 unsigned);
343
344 struct lp_build_tgsi_context
345 {
346 struct lp_build_context base;
347
348 struct lp_build_context uint_bld;
349 struct lp_build_context int_bld;
350
351 /** This array stores functions that are used to transform TGSI opcodes to
352 * LLVM instructions.
353 */
354 struct lp_build_tgsi_action op_actions[TGSI_OPCODE_LAST];
355
356 /* TGSI_OPCODE_RSQ is defined as 1 / sqrt( abs(src0.x) ), rsq_action
357 * should compute 1 / sqrt (src0.x) */
358 struct lp_build_tgsi_action rsq_action;
359
360 struct lp_build_tgsi_action sqrt_action;
361
362 const struct tgsi_shader_info *info;
363
364 lp_build_emit_fetch_fn emit_fetch_funcs[TGSI_FILE_COUNT];
365
366 LLVMValueRef (*emit_swizzle)(struct lp_build_tgsi_context *,
367 LLVMValueRef, unsigned, unsigned, unsigned, unsigned);
368
369
370 void (*emit_debug)(struct lp_build_tgsi_context *,
371 const struct tgsi_full_instruction *,
372 const struct tgsi_opcode_info *);
373
374 void (*emit_store)(struct lp_build_tgsi_context *,
375 const struct tgsi_full_instruction *,
376 const struct tgsi_opcode_info *,
377 LLVMValueRef dst[4]);
378
379 void (*emit_declaration)(struct lp_build_tgsi_context *,
380 const struct tgsi_full_declaration *decl);
381
382 void (*emit_immediate)(struct lp_build_tgsi_context *,
383 const struct tgsi_full_immediate *imm);
384
385
386 /* Allow the user to store data in this structure rather than passing it
387 * to every function. */
388 void * userdata;
389
390 boolean soa;
391
392 int pc;
393
394 struct tgsi_full_instruction *instructions;
395 uint max_instructions;
396 uint num_instructions;
397
398 /** This function allows the user to insert some instructions at the
399 * beginning of the program. It is optional and does not need to be
400 * implemented.
401 */
402 void (*emit_prologue)(struct lp_build_tgsi_context*);
403
404 /** This function allows the user to insert some instructions at the end of
405 * the program. This callback is intended to be used for emitting
406 * instructions to handle the export for the output registers, but it can
407 * be used for any purpose. Implementing this function is optiona, but
408 * recommended.
409 */
410 void (*emit_epilogue)(struct lp_build_tgsi_context*);
411 };
412
413 struct lp_build_tgsi_gs_iface
414 {
415 LLVMValueRef (*fetch_input)(const struct lp_build_tgsi_gs_iface *gs_iface,
416 struct lp_build_tgsi_context * bld_base,
417 boolean is_vindex_indirect,
418 LLVMValueRef vertex_index,
419 boolean is_aindex_indirect,
420 LLVMValueRef attrib_index,
421 LLVMValueRef swizzle_index);
422 void (*emit_vertex)(const struct lp_build_tgsi_gs_iface *gs_iface,
423 struct lp_build_tgsi_context * bld_base,
424 LLVMValueRef (*outputs)[4],
425 LLVMValueRef emitted_vertices_vec);
426 void (*end_primitive)(const struct lp_build_tgsi_gs_iface *gs_iface,
427 struct lp_build_tgsi_context * bld_base,
428 LLVMValueRef verts_per_prim_vec,
429 LLVMValueRef emitted_prims_vec);
430 void (*gs_epilogue)(const struct lp_build_tgsi_gs_iface *gs_iface,
431 struct lp_build_tgsi_context * bld_base,
432 LLVMValueRef total_emitted_vertices_vec,
433 LLVMValueRef emitted_prims_vec);
434 };
435
436 struct lp_build_tgsi_soa_context
437 {
438 struct lp_build_tgsi_context bld_base;
439
440 /* Builder for scalar elements of shader's data type (float) */
441 struct lp_build_context elem_bld;
442
443 const struct lp_build_tgsi_gs_iface *gs_iface;
444 LLVMValueRef emitted_prims_vec_ptr;
445 LLVMValueRef total_emitted_vertices_vec_ptr;
446 LLVMValueRef emitted_vertices_vec_ptr;
447 LLVMValueRef max_output_vertices_vec;
448
449 LLVMValueRef consts_ptr;
450 LLVMValueRef const_sizes_ptr;
451 LLVMValueRef consts[LP_MAX_TGSI_CONST_BUFFERS];
452 LLVMValueRef consts_sizes[LP_MAX_TGSI_CONST_BUFFERS];
453 const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS];
454 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS];
455 LLVMValueRef context_ptr;
456
457 const struct lp_build_sampler_soa *sampler;
458
459 struct tgsi_declaration_sampler_view sv[PIPE_MAX_SHADER_SAMPLER_VIEWS];
460
461 LLVMValueRef immediates[LP_MAX_INLINED_IMMEDIATES][TGSI_NUM_CHANNELS];
462 LLVMValueRef temps[LP_MAX_INLINED_TEMPS][TGSI_NUM_CHANNELS];
463 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][TGSI_NUM_CHANNELS];
464 LLVMValueRef preds[LP_MAX_TGSI_PREDS][TGSI_NUM_CHANNELS];
465
466 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
467 * set in the indirect_files field.
468 * The temps[] array above is unused then.
469 */
470 LLVMValueRef temps_array;
471
472 /* We allocate/use this array of output if (1 << TGSI_FILE_OUTPUT) is
473 * set in the indirect_files field.
474 * The outputs[] array above is unused then.
475 */
476 LLVMValueRef outputs_array;
477
478 /* We allocate/use this array of inputs if (1 << TGSI_FILE_INPUT) is
479 * set in the indirect_files field.
480 * The inputs[] array above is unused then.
481 */
482 LLVMValueRef inputs_array;
483
484 /* We allocate/use this array of temps if (1 << TGSI_FILE_IMMEDIATE) is
485 * set in the indirect_files field.
486 */
487 LLVMValueRef imms_array;
488
489
490 struct lp_bld_tgsi_system_values system_values;
491
492 /** bitmask indicating which register files are accessed indirectly */
493 unsigned indirect_files;
494
495 struct lp_build_mask_context *mask;
496 struct lp_exec_mask exec_mask;
497
498 uint num_immediates;
499 boolean use_immediates_array;
500 };
501
502 void
503 lp_emit_declaration_soa(
504 struct lp_build_tgsi_context *bld,
505 const struct tgsi_full_declaration *decl);
506
507 void lp_emit_immediate_soa(
508 struct lp_build_tgsi_context *bld_base,
509 const struct tgsi_full_immediate *imm);
510
511 boolean
512 lp_emit_instruction_soa(
513 struct lp_build_tgsi_soa_context *bld,
514 const struct tgsi_full_instruction *inst,
515 const struct tgsi_opcode_info *info);
516
517
518 LLVMValueRef
519 lp_get_temp_ptr_soa(
520 struct lp_build_tgsi_soa_context *bld,
521 unsigned index,
522 unsigned chan);
523
524 LLVMValueRef
525 lp_get_output_ptr(
526 struct lp_build_tgsi_soa_context *bld,
527 unsigned index,
528 unsigned chan);
529
530 struct lp_build_tgsi_aos_context
531 {
532 struct lp_build_tgsi_context bld_base;
533
534 /* Builder for integer masks and indices */
535 struct lp_build_context int_bld;
536
537 /*
538 * AoS swizzle used:
539 * - swizzles[0] = red index
540 * - swizzles[1] = green index
541 * - swizzles[2] = blue index
542 * - swizzles[3] = alpha index
543 */
544 unsigned char swizzles[4];
545 unsigned char inv_swizzles[4];
546
547 LLVMValueRef consts_ptr;
548 const LLVMValueRef *inputs;
549 LLVMValueRef *outputs;
550
551 struct lp_build_sampler_aos *sampler;
552
553 struct tgsi_declaration_sampler_view sv[PIPE_MAX_SHADER_SAMPLER_VIEWS];
554
555 LLVMValueRef immediates[LP_MAX_INLINED_IMMEDIATES];
556 LLVMValueRef temps[LP_MAX_INLINED_TEMPS];
557 LLVMValueRef addr[LP_MAX_TGSI_ADDRS];
558 LLVMValueRef preds[LP_MAX_TGSI_PREDS];
559
560 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
561 * set in the indirect_files field.
562 * The temps[] array above is unused then.
563 */
564 LLVMValueRef temps_array;
565
566 /** bitmask indicating which register files are accessed indirectly */
567 unsigned indirect_files;
568
569 };
570
571 static INLINE struct lp_build_tgsi_soa_context *
572 lp_soa_context(struct lp_build_tgsi_context *bld_base)
573 {
574 return (struct lp_build_tgsi_soa_context *)bld_base;
575 }
576
577 static INLINE struct lp_build_tgsi_aos_context *
578 lp_aos_context(struct lp_build_tgsi_context *bld_base)
579 {
580 return (struct lp_build_tgsi_aos_context *)bld_base;
581 }
582
583 void
584 lp_emit_declaration_aos(
585 struct lp_build_tgsi_aos_context *bld,
586 const struct tgsi_full_declaration *decl);
587
588
589 boolean
590 lp_emit_instruction_aos(
591 struct lp_build_tgsi_aos_context *bld,
592 const struct tgsi_full_instruction *inst,
593 const struct tgsi_opcode_info *info,
594 int *pc);
595
596 void
597 lp_emit_store_aos(
598 struct lp_build_tgsi_aos_context *bld,
599 const struct tgsi_full_instruction *inst,
600 unsigned index,
601 LLVMValueRef value);
602
603 void lp_build_fetch_args(
604 struct lp_build_tgsi_context * bld_base,
605 struct lp_build_emit_data * emit_data);
606
607 LLVMValueRef
608 lp_build_tgsi_inst_llvm_aos(
609 struct lp_build_tgsi_context * bld_base,
610 const struct tgsi_full_instruction *inst);
611
612 void
613 lp_build_tgsi_intrinsic(
614 const struct lp_build_tgsi_action * action,
615 struct lp_build_tgsi_context * bld_base,
616 struct lp_build_emit_data * emit_data);
617
618 LLVMValueRef
619 lp_build_emit_llvm(
620 struct lp_build_tgsi_context *bld_base,
621 unsigned tgsi_opcode,
622 struct lp_build_emit_data * emit_data);
623
624 LLVMValueRef
625 lp_build_emit_llvm_unary(
626 struct lp_build_tgsi_context *bld_base,
627 unsigned tgsi_opcode,
628 LLVMValueRef arg0);
629
630 LLVMValueRef
631 lp_build_emit_llvm_binary(
632 struct lp_build_tgsi_context *bld_base,
633 unsigned tgsi_opcode,
634 LLVMValueRef arg0,
635 LLVMValueRef arg1);
636
637 LLVMValueRef
638 lp_build_emit_llvm_ternary(
639 struct lp_build_tgsi_context *bld_base,
640 unsigned tgsi_opcode,
641 LLVMValueRef arg0,
642 LLVMValueRef arg1,
643 LLVMValueRef arg2);
644
645 boolean
646 lp_build_tgsi_inst_llvm(
647 struct lp_build_tgsi_context * bld_base,
648 const struct tgsi_full_instruction *inst);
649
650 LLVMValueRef
651 lp_build_emit_fetch(
652 struct lp_build_tgsi_context *bld_base,
653 const struct tgsi_full_instruction *inst,
654 unsigned src_op,
655 const unsigned chan_index);
656
657
658 LLVMValueRef
659 lp_build_emit_fetch_texoffset(
660 struct lp_build_tgsi_context *bld_base,
661 const struct tgsi_full_instruction *inst,
662 unsigned tex_off_op,
663 const unsigned chan_index);
664
665 boolean
666 lp_build_tgsi_llvm(
667 struct lp_build_tgsi_context * bld_base,
668 const struct tgsi_token *tokens);
669
670 #endif /* LP_BLD_TGSI_H */