gallivm,llvmpipe: handle TXF (texelFetch) instruction, including offsets
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi.h
1 /**************************************************************************
2 *
3 * Copyright 2011-2012 Advanced Micro Devices, Inc.
4 * Copyright 2009 VMware, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 * @author Tom Stellard <thomas.stellard@amd.com>
35 */
36
37 #ifndef LP_BLD_TGSI_H
38 #define LP_BLD_TGSI_H
39
40 #include "gallivm/lp_bld.h"
41 #include "gallivm/lp_bld_tgsi_action.h"
42 #include "gallivm/lp_bld_limits.h"
43 #include "lp_bld_type.h"
44 #include "pipe/p_compiler.h"
45 #include "pipe/p_state.h"
46 #include "tgsi/tgsi_exec.h"
47 #include "tgsi/tgsi_scan.h"
48 #include "tgsi/tgsi_info.h"
49
50 #define LP_CHAN_ALL ~0
51
52 #define LP_MAX_INSTRUCTIONS 256
53
54 struct tgsi_full_declaration;
55 struct tgsi_full_immediate;
56 struct tgsi_full_instruction;
57 struct tgsi_full_src_register;
58 struct tgsi_opcode_info;
59 struct tgsi_token;
60 struct tgsi_shader_info;
61 struct lp_build_mask_context;
62 struct gallivm_state;
63 struct lp_derivatives;
64
65
66 enum lp_build_tex_modifier {
67 LP_BLD_TEX_MODIFIER_NONE = 0,
68 LP_BLD_TEX_MODIFIER_PROJECTED,
69 LP_BLD_TEX_MODIFIER_LOD_BIAS,
70 LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
71 LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV
72 };
73
74
75 /**
76 * Describe a channel of a register.
77 *
78 * The value can be a:
79 * - immediate value (i.e. derived from a IMM register)
80 * - CONST[n].x/y/z/w
81 * - IN[n].x/y/z/w
82 * - undetermined (when .file == TGSI_FILE_NULL)
83 *
84 * This is one of the analysis results, and is used to described
85 * the output color in terms of inputs.
86 */
87 struct lp_tgsi_channel_info
88 {
89 unsigned file:4; /* TGSI_FILE_* */
90 unsigned swizzle:3; /* PIPE_SWIZZLE_x */
91 union {
92 uint32_t index;
93 float value; /* for TGSI_FILE_IMMEDIATE */
94 } u;
95 };
96
97
98 /**
99 * Describe a texture sampler interpolator.
100 *
101 * The interpolation is described in terms of regular inputs.
102 */
103 struct lp_tgsi_texture_info
104 {
105 struct lp_tgsi_channel_info coord[4];
106 unsigned target:8; /* TGSI_TEXTURE_* */
107 unsigned unit:8; /* Sampler unit */
108 unsigned modifier:8; /* LP_BLD_TEX_MODIFIER_* */
109 };
110
111
112 struct lp_tgsi_info
113 {
114 struct tgsi_shader_info base;
115
116 /*
117 * Whether any of the texture opcodes access a register file other than
118 * TGSI_FILE_INPUT.
119 *
120 * We could also handle TGSI_FILE_CONST/IMMEDIATE here, but there is little
121 * benefit.
122 */
123 unsigned indirect_textures:1;
124
125 /*
126 * Whether any immediate values are outside the range of 0 and 1
127 */
128 unsigned unclamped_immediates:1;
129
130 /*
131 * Texture opcode description. Aimed at detecting and described direct
132 * texture opcodes.
133 */
134 unsigned num_texs;
135 struct lp_tgsi_texture_info tex[PIPE_MAX_SAMPLERS];
136
137 /*
138 * Output description. Aimed at detecting and describing simple blit
139 * shaders.
140 */
141 struct lp_tgsi_channel_info output[PIPE_MAX_SHADER_OUTPUTS][4];
142
143 /*
144 * Shortcut pointers into the above (for fragment shaders).
145 */
146 const struct lp_tgsi_channel_info *cbuf[PIPE_MAX_COLOR_BUFS];
147 };
148
149 /**
150 * Reference to system values.
151 */
152 struct lp_bld_tgsi_system_values {
153 LLVMValueRef instance_id;
154 LLVMValueRef vertex_id;
155 };
156
157
158 /**
159 * Sampler code generation interface.
160 *
161 * Although texture sampling is a requirement for TGSI translation, it is
162 * a very different problem with several different approaches to it. This
163 * structure establishes an interface for texture sampling code generation, so
164 * that we can easily use different texture sampling strategies.
165 */
166 struct lp_build_sampler_soa
167 {
168 void
169 (*destroy)( struct lp_build_sampler_soa *sampler );
170
171 void
172 (*emit_fetch_texel)( const struct lp_build_sampler_soa *sampler,
173 struct gallivm_state *gallivm,
174 struct lp_type type,
175 boolean is_fetch,
176 unsigned unit,
177 const LLVMValueRef *coords,
178 const LLVMValueRef *offsets,
179 const struct lp_derivatives *derivs,
180 LLVMValueRef lod_bias, /* optional */
181 LLVMValueRef explicit_lod, /* optional */
182 LLVMValueRef *texel);
183
184 void
185 (*emit_size_query)( const struct lp_build_sampler_soa *sampler,
186 struct gallivm_state *gallivm,
187 struct lp_type type,
188 unsigned unit,
189 LLVMValueRef explicit_lod, /* optional */
190 LLVMValueRef *sizes_out);
191 };
192
193
194 struct lp_build_sampler_aos
195 {
196 LLVMValueRef
197 (*emit_fetch_texel)( struct lp_build_sampler_aos *sampler,
198 struct lp_build_context *bld,
199 unsigned target, /* TGSI_TEXTURE_* */
200 unsigned unit,
201 LLVMValueRef coords,
202 const struct lp_derivatives derivs,
203 enum lp_build_tex_modifier modifier);
204 };
205
206
207 void
208 lp_build_tgsi_info(const struct tgsi_token *tokens,
209 struct lp_tgsi_info *info);
210
211
212 void
213 lp_build_tgsi_soa(struct gallivm_state *gallivm,
214 const struct tgsi_token *tokens,
215 struct lp_type type,
216 struct lp_build_mask_context *mask,
217 LLVMValueRef consts_ptr,
218 const struct lp_bld_tgsi_system_values *system_values,
219 const LLVMValueRef *pos,
220 const LLVMValueRef (*inputs)[4],
221 LLVMValueRef (*outputs)[4],
222 struct lp_build_sampler_soa *sampler,
223 const struct tgsi_shader_info *info);
224
225
226 void
227 lp_build_tgsi_aos(struct gallivm_state *gallivm,
228 const struct tgsi_token *tokens,
229 struct lp_type type,
230 const unsigned char swizzles[4],
231 LLVMValueRef consts_ptr,
232 const LLVMValueRef *inputs,
233 LLVMValueRef *outputs,
234 struct lp_build_sampler_aos *sampler,
235 const struct tgsi_shader_info *info);
236
237
238 struct lp_exec_mask {
239 struct lp_build_context *bld;
240
241 boolean has_mask;
242
243 LLVMTypeRef int_vec_type;
244
245 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
246 int cond_stack_size;
247 LLVMValueRef cond_mask;
248
249 LLVMBasicBlockRef loop_block;
250 LLVMValueRef cont_mask;
251 LLVMValueRef break_mask;
252 LLVMValueRef break_var;
253 struct {
254 LLVMBasicBlockRef loop_block;
255 LLVMValueRef cont_mask;
256 LLVMValueRef break_mask;
257 LLVMValueRef break_var;
258 } loop_stack[LP_MAX_TGSI_NESTING];
259 int loop_stack_size;
260
261 LLVMValueRef ret_mask;
262 struct {
263 int pc;
264 LLVMValueRef ret_mask;
265 } call_stack[LP_MAX_TGSI_NESTING];
266 int call_stack_size;
267
268 LLVMValueRef exec_mask;
269 LLVMValueRef loop_limiter;
270 };
271
272 struct lp_build_tgsi_inst_list
273 {
274 struct tgsi_full_instruction *instructions;
275 uint max_instructions;
276 uint num_instructions;
277 };
278
279 unsigned lp_bld_tgsi_list_init(struct lp_build_tgsi_context * bld_base);
280
281
282 unsigned lp_bld_tgsi_add_instruction(
283 struct lp_build_tgsi_context * bld_base,
284 struct tgsi_full_instruction *inst_to_add);
285
286
287 struct lp_build_tgsi_context;
288
289
290 typedef LLVMValueRef (*lp_build_emit_fetch_fn)(struct lp_build_tgsi_context *,
291 const struct tgsi_full_src_register *,
292 enum tgsi_opcode_type,
293 unsigned);
294
295 struct lp_build_tgsi_context
296 {
297 struct lp_build_context base;
298
299 struct lp_build_context uint_bld;
300 struct lp_build_context int_bld;
301
302 /** This array stores functions that are used to transform TGSI opcodes to
303 * LLVM instructions.
304 */
305 struct lp_build_tgsi_action op_actions[TGSI_OPCODE_LAST];
306
307 /* TGSI_OPCODE_RSQ is defined as 1 / sqrt( abs(src0.x) ), rsq_action
308 * should compute 1 / sqrt (src0.x) */
309 struct lp_build_tgsi_action rsq_action;
310
311 const struct tgsi_shader_info *info;
312
313 lp_build_emit_fetch_fn emit_fetch_funcs[TGSI_FILE_COUNT];
314
315 LLVMValueRef (*emit_swizzle)(struct lp_build_tgsi_context *,
316 LLVMValueRef, unsigned, unsigned, unsigned, unsigned);
317
318 void (*emit_store)(struct lp_build_tgsi_context *,
319 const struct tgsi_full_instruction *,
320 const struct tgsi_opcode_info *,
321 LLVMValueRef dst[4]);
322
323 void (*emit_declaration)(struct lp_build_tgsi_context *,
324 const struct tgsi_full_declaration *decl);
325
326 void (*emit_immediate)(struct lp_build_tgsi_context *,
327 const struct tgsi_full_immediate *imm);
328
329
330 /* Allow the user to store data in this structure rather than passing it
331 * to every function. */
332 void * userdata;
333
334 boolean soa;
335
336 int pc;
337
338 struct tgsi_full_instruction *instructions;
339 uint max_instructions;
340 uint num_instructions;
341
342 /** This function allows the user to insert some instructions at the
343 * beginning of the program. It is optional and does not need to be
344 * implemented.
345 */
346 void (*emit_prologue)(struct lp_build_tgsi_context*);
347
348 /** This function allows the user to insert some instructions at the end of
349 * the program. This callback is intended to be used for emitting
350 * instructions to handle the export for the output registers, but it can
351 * be used for any purpose. Implementing this function is optiona, but
352 * recommended.
353 */
354 void (*emit_epilogue)(struct lp_build_tgsi_context*);
355 };
356
357 struct lp_build_tgsi_soa_context
358 {
359 struct lp_build_tgsi_context bld_base;
360
361 /* Builder for scalar elements of shader's data type (float) */
362 struct lp_build_context elem_bld;
363
364 LLVMValueRef consts_ptr;
365 const LLVMValueRef *pos;
366 const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS];
367 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS];
368
369 const struct lp_build_sampler_soa *sampler;
370
371 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][TGSI_NUM_CHANNELS];
372 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][TGSI_NUM_CHANNELS];
373 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][TGSI_NUM_CHANNELS];
374 LLVMValueRef preds[LP_MAX_TGSI_PREDS][TGSI_NUM_CHANNELS];
375
376 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
377 * set in the indirect_files field.
378 * The temps[] array above is unused then.
379 */
380 LLVMValueRef temps_array;
381
382 /* We allocate/use this array of output if (1 << TGSI_FILE_OUTPUT) is
383 * set in the indirect_files field.
384 * The outputs[] array above is unused then.
385 */
386 LLVMValueRef outputs_array;
387
388 /* We allocate/use this array of inputs if (1 << TGSI_FILE_INPUT) is
389 * set in the indirect_files field.
390 * The inputs[] array above is unused then.
391 */
392 LLVMValueRef inputs_array;
393
394 struct lp_bld_tgsi_system_values system_values;
395
396 /** bitmask indicating which register files are accessed indirectly */
397 unsigned indirect_files;
398
399 struct lp_build_mask_context *mask;
400 struct lp_exec_mask exec_mask;
401
402 uint num_immediates;
403
404 };
405
406 void
407 lp_emit_declaration_soa(
408 struct lp_build_tgsi_context *bld,
409 const struct tgsi_full_declaration *decl);
410
411 void lp_emit_immediate_soa(
412 struct lp_build_tgsi_context *bld_base,
413 const struct tgsi_full_immediate *imm);
414
415 boolean
416 lp_emit_instruction_soa(
417 struct lp_build_tgsi_soa_context *bld,
418 const struct tgsi_full_instruction *inst,
419 const struct tgsi_opcode_info *info);
420
421
422 LLVMValueRef
423 lp_get_temp_ptr_soa(
424 struct lp_build_tgsi_soa_context *bld,
425 unsigned index,
426 unsigned chan);
427
428 LLVMValueRef
429 lp_get_output_ptr(
430 struct lp_build_tgsi_soa_context *bld,
431 unsigned index,
432 unsigned chan);
433
434 struct lp_build_tgsi_aos_context
435 {
436 struct lp_build_tgsi_context bld_base;
437
438 /* Builder for integer masks and indices */
439 struct lp_build_context int_bld;
440
441 /*
442 * AoS swizzle used:
443 * - swizzles[0] = red index
444 * - swizzles[1] = green index
445 * - swizzles[2] = blue index
446 * - swizzles[3] = alpha index
447 */
448 unsigned char swizzles[4];
449 unsigned char inv_swizzles[4];
450
451 LLVMValueRef consts_ptr;
452 const LLVMValueRef *inputs;
453 LLVMValueRef *outputs;
454
455 struct lp_build_sampler_aos *sampler;
456
457 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES];
458 LLVMValueRef temps[LP_MAX_TGSI_TEMPS];
459 LLVMValueRef addr[LP_MAX_TGSI_ADDRS];
460 LLVMValueRef preds[LP_MAX_TGSI_PREDS];
461
462 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
463 * set in the indirect_files field.
464 * The temps[] array above is unused then.
465 */
466 LLVMValueRef temps_array;
467
468 /** bitmask indicating which register files are accessed indirectly */
469 unsigned indirect_files;
470
471 };
472
473 static INLINE struct lp_build_tgsi_soa_context *
474 lp_soa_context(struct lp_build_tgsi_context *bld_base)
475 {
476 return (struct lp_build_tgsi_soa_context *)bld_base;
477 }
478
479 static INLINE struct lp_build_tgsi_aos_context *
480 lp_aos_context(struct lp_build_tgsi_context *bld_base)
481 {
482 return (struct lp_build_tgsi_aos_context *)bld_base;
483 }
484
485 void
486 lp_emit_declaration_aos(
487 struct lp_build_tgsi_aos_context *bld,
488 const struct tgsi_full_declaration *decl);
489
490
491 boolean
492 lp_emit_instruction_aos(
493 struct lp_build_tgsi_aos_context *bld,
494 const struct tgsi_full_instruction *inst,
495 const struct tgsi_opcode_info *info,
496 int *pc);
497
498 void
499 lp_emit_store_aos(
500 struct lp_build_tgsi_aos_context *bld,
501 const struct tgsi_full_instruction *inst,
502 unsigned index,
503 LLVMValueRef value);
504
505 void lp_build_fetch_args(
506 struct lp_build_tgsi_context * bld_base,
507 struct lp_build_emit_data * emit_data);
508
509 LLVMValueRef
510 lp_build_tgsi_inst_llvm_aos(
511 struct lp_build_tgsi_context * bld_base,
512 const struct tgsi_full_instruction *inst);
513
514 void
515 lp_build_tgsi_intrinsic(
516 const struct lp_build_tgsi_action * action,
517 struct lp_build_tgsi_context * bld_base,
518 struct lp_build_emit_data * emit_data);
519
520 LLVMValueRef
521 lp_build_emit_llvm(
522 struct lp_build_tgsi_context *bld_base,
523 unsigned tgsi_opcode,
524 struct lp_build_emit_data * emit_data);
525
526 LLVMValueRef
527 lp_build_emit_llvm_unary(
528 struct lp_build_tgsi_context *bld_base,
529 unsigned tgsi_opcode,
530 LLVMValueRef arg0);
531
532 LLVMValueRef
533 lp_build_emit_llvm_binary(
534 struct lp_build_tgsi_context *bld_base,
535 unsigned tgsi_opcode,
536 LLVMValueRef arg0,
537 LLVMValueRef arg1);
538
539 LLVMValueRef
540 lp_build_emit_llvm_ternary(
541 struct lp_build_tgsi_context *bld_base,
542 unsigned tgsi_opcode,
543 LLVMValueRef arg0,
544 LLVMValueRef arg1,
545 LLVMValueRef arg2);
546
547 boolean
548 lp_build_tgsi_inst_llvm(
549 struct lp_build_tgsi_context * bld_base,
550 const struct tgsi_full_instruction *inst);
551
552 LLVMValueRef
553 lp_build_emit_fetch(
554 struct lp_build_tgsi_context *bld_base,
555 const struct tgsi_full_instruction *inst,
556 unsigned src_op,
557 const unsigned chan_index);
558
559
560 LLVMValueRef
561 lp_build_emit_fetch_texoffset(
562 struct lp_build_tgsi_context *bld_base,
563 const struct tgsi_full_instruction *inst,
564 unsigned tex_off_op,
565 const unsigned chan_index);
566
567 boolean
568 lp_build_tgsi_llvm(
569 struct lp_build_tgsi_context * bld_base,
570 const struct tgsi_token *tokens);
571
572 #endif /* LP_BLD_TGSI_H */