st/mesa: use pipe_sampler_view_release() in st_destroy_context_priv()
[mesa.git] / src / mesa / state_tracker / st_glsl_to_tgsi.cpp
1 /*
2 * Copyright (C) 2005-2007 Brian Paul All Rights Reserved.
3 * Copyright (C) 2008 VMware, Inc. All Rights Reserved.
4 * Copyright © 2010 Intel Corporation
5 * Copyright © 2011 Bryan Cain
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 * DEALINGS IN THE SOFTWARE.
25 */
26
27 /**
28 * \file glsl_to_tgsi.cpp
29 *
30 * Translate GLSL IR to TGSI.
31 */
32
33 #include <stdio.h>
34 #include "main/compiler.h"
35 #include "ir.h"
36 #include "ir_visitor.h"
37 #include "ir_print_visitor.h"
38 #include "ir_expression_flattening.h"
39 #include "glsl_types.h"
40 #include "glsl_parser_extras.h"
41 #include "../glsl/program.h"
42 #include "ir_optimization.h"
43 #include "ast.h"
44
45 #include "main/mtypes.h"
46 #include "main/shaderobj.h"
47 #include "program/hash_table.h"
48
49 extern "C" {
50 #include "main/shaderapi.h"
51 #include "main/uniforms.h"
52 #include "program/prog_instruction.h"
53 #include "program/prog_optimize.h"
54 #include "program/prog_print.h"
55 #include "program/program.h"
56 #include "program/prog_parameter.h"
57 #include "program/sampler.h"
58
59 #include "pipe/p_compiler.h"
60 #include "pipe/p_context.h"
61 #include "pipe/p_screen.h"
62 #include "pipe/p_shader_tokens.h"
63 #include "pipe/p_state.h"
64 #include "util/u_math.h"
65 #include "tgsi/tgsi_ureg.h"
66 #include "tgsi/tgsi_info.h"
67 #include "st_context.h"
68 #include "st_program.h"
69 #include "st_glsl_to_tgsi.h"
70 #include "st_mesa_to_tgsi.h"
71 }
72
73 #define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX
74 #define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) | \
75 (1 << PROGRAM_ENV_PARAM) | \
76 (1 << PROGRAM_STATE_VAR) | \
77 (1 << PROGRAM_NAMED_PARAM) | \
78 (1 << PROGRAM_CONSTANT) | \
79 (1 << PROGRAM_UNIFORM))
80
81 /**
82 * Maximum number of temporary registers.
83 *
84 * It is too big for stack allocated arrays -- it will cause stack overflow on
85 * Windows and likely Mac OS X.
86 */
87 #define MAX_TEMPS 4096
88
89 /* will be 4 for GLSL 4.00 */
90 #define MAX_GLSL_TEXTURE_OFFSET 1
91
92 class st_src_reg;
93 class st_dst_reg;
94
95 static int swizzle_for_size(int size);
96
97 /**
98 * This struct is a corresponding struct to TGSI ureg_src.
99 */
100 class st_src_reg {
101 public:
102 st_src_reg(gl_register_file file, int index, const glsl_type *type)
103 {
104 this->file = file;
105 this->index = index;
106 if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
107 this->swizzle = swizzle_for_size(type->vector_elements);
108 else
109 this->swizzle = SWIZZLE_XYZW;
110 this->negate = 0;
111 this->type = type ? type->base_type : GLSL_TYPE_ERROR;
112 this->reladdr = NULL;
113 }
114
115 st_src_reg(gl_register_file file, int index, int type)
116 {
117 this->type = type;
118 this->file = file;
119 this->index = index;
120 this->swizzle = SWIZZLE_XYZW;
121 this->negate = 0;
122 this->reladdr = NULL;
123 }
124
125 st_src_reg()
126 {
127 this->type = GLSL_TYPE_ERROR;
128 this->file = PROGRAM_UNDEFINED;
129 this->index = 0;
130 this->swizzle = 0;
131 this->negate = 0;
132 this->reladdr = NULL;
133 }
134
135 explicit st_src_reg(st_dst_reg reg);
136
137 gl_register_file file; /**< PROGRAM_* from Mesa */
138 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
139 GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
140 int negate; /**< NEGATE_XYZW mask from mesa */
141 int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
142 /** Register index should be offset by the integer in this reg. */
143 st_src_reg *reladdr;
144 };
145
146 class st_dst_reg {
147 public:
148 st_dst_reg(gl_register_file file, int writemask, int type)
149 {
150 this->file = file;
151 this->index = 0;
152 this->writemask = writemask;
153 this->cond_mask = COND_TR;
154 this->reladdr = NULL;
155 this->type = type;
156 }
157
158 st_dst_reg()
159 {
160 this->type = GLSL_TYPE_ERROR;
161 this->file = PROGRAM_UNDEFINED;
162 this->index = 0;
163 this->writemask = 0;
164 this->cond_mask = COND_TR;
165 this->reladdr = NULL;
166 }
167
168 explicit st_dst_reg(st_src_reg reg);
169
170 gl_register_file file; /**< PROGRAM_* from Mesa */
171 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
172 int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
173 GLuint cond_mask:4;
174 int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
175 /** Register index should be offset by the integer in this reg. */
176 st_src_reg *reladdr;
177 };
178
179 st_src_reg::st_src_reg(st_dst_reg reg)
180 {
181 this->type = reg.type;
182 this->file = reg.file;
183 this->index = reg.index;
184 this->swizzle = SWIZZLE_XYZW;
185 this->negate = 0;
186 this->reladdr = reg.reladdr;
187 }
188
189 st_dst_reg::st_dst_reg(st_src_reg reg)
190 {
191 this->type = reg.type;
192 this->file = reg.file;
193 this->index = reg.index;
194 this->writemask = WRITEMASK_XYZW;
195 this->cond_mask = COND_TR;
196 this->reladdr = reg.reladdr;
197 }
198
199 class glsl_to_tgsi_instruction : public exec_node {
200 public:
201 /* Callers of this ralloc-based new need not call delete. It's
202 * easier to just ralloc_free 'ctx' (or any of its ancestors). */
203 static void* operator new(size_t size, void *ctx)
204 {
205 void *node;
206
207 node = rzalloc_size(ctx, size);
208 assert(node != NULL);
209
210 return node;
211 }
212
213 unsigned op;
214 st_dst_reg dst;
215 st_src_reg src[3];
216 /** Pointer to the ir source this tree came from for debugging */
217 ir_instruction *ir;
218 GLboolean cond_update;
219 bool saturate;
220 int sampler; /**< sampler index */
221 int tex_target; /**< One of TEXTURE_*_INDEX */
222 GLboolean tex_shadow;
223 struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
224 unsigned tex_offset_num_offset;
225 int dead_mask; /**< Used in dead code elimination */
226
227 class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */
228 };
229
230 class variable_storage : public exec_node {
231 public:
232 variable_storage(ir_variable *var, gl_register_file file, int index)
233 : file(file), index(index), var(var)
234 {
235 /* empty */
236 }
237
238 gl_register_file file;
239 int index;
240 ir_variable *var; /* variable that maps to this, if any */
241 };
242
243 class immediate_storage : public exec_node {
244 public:
245 immediate_storage(gl_constant_value *values, int size, int type)
246 {
247 memcpy(this->values, values, size * sizeof(gl_constant_value));
248 this->size = size;
249 this->type = type;
250 }
251
252 gl_constant_value values[4];
253 int size; /**< Number of components (1-4) */
254 int type; /**< GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */
255 };
256
257 class function_entry : public exec_node {
258 public:
259 ir_function_signature *sig;
260
261 /**
262 * identifier of this function signature used by the program.
263 *
264 * At the point that TGSI instructions for function calls are
265 * generated, we don't know the address of the first instruction of
266 * the function body. So we make the BranchTarget that is called a
267 * small integer and rewrite them during set_branchtargets().
268 */
269 int sig_id;
270
271 /**
272 * Pointer to first instruction of the function body.
273 *
274 * Set during function body emits after main() is processed.
275 */
276 glsl_to_tgsi_instruction *bgn_inst;
277
278 /**
279 * Index of the first instruction of the function body in actual TGSI.
280 *
281 * Set after conversion from glsl_to_tgsi_instruction to TGSI.
282 */
283 int inst;
284
285 /** Storage for the return value. */
286 st_src_reg return_reg;
287 };
288
289 class glsl_to_tgsi_visitor : public ir_visitor {
290 public:
291 glsl_to_tgsi_visitor();
292 ~glsl_to_tgsi_visitor();
293
294 function_entry *current_function;
295
296 struct gl_context *ctx;
297 struct gl_program *prog;
298 struct gl_shader_program *shader_program;
299 struct gl_shader_compiler_options *options;
300
301 int next_temp;
302
303 int num_address_regs;
304 int samplers_used;
305 bool indirect_addr_temps;
306 bool indirect_addr_consts;
307 int num_clip_distances;
308
309 int glsl_version;
310 bool native_integers;
311
312 variable_storage *find_variable_storage(ir_variable *var);
313
314 int add_constant(gl_register_file file, gl_constant_value values[4],
315 int size, int datatype, GLuint *swizzle_out);
316
317 function_entry *get_function_signature(ir_function_signature *sig);
318
319 st_src_reg get_temp(const glsl_type *type);
320 void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr);
321
322 st_src_reg st_src_reg_for_float(float val);
323 st_src_reg st_src_reg_for_int(int val);
324 st_src_reg st_src_reg_for_type(int type, int val);
325
326 /**
327 * \name Visit methods
328 *
329 * As typical for the visitor pattern, there must be one \c visit method for
330 * each concrete subclass of \c ir_instruction. Virtual base classes within
331 * the hierarchy should not have \c visit methods.
332 */
333 /*@{*/
334 virtual void visit(ir_variable *);
335 virtual void visit(ir_loop *);
336 virtual void visit(ir_loop_jump *);
337 virtual void visit(ir_function_signature *);
338 virtual void visit(ir_function *);
339 virtual void visit(ir_expression *);
340 virtual void visit(ir_swizzle *);
341 virtual void visit(ir_dereference_variable *);
342 virtual void visit(ir_dereference_array *);
343 virtual void visit(ir_dereference_record *);
344 virtual void visit(ir_assignment *);
345 virtual void visit(ir_constant *);
346 virtual void visit(ir_call *);
347 virtual void visit(ir_return *);
348 virtual void visit(ir_discard *);
349 virtual void visit(ir_texture *);
350 virtual void visit(ir_if *);
351 /*@}*/
352
353 st_src_reg result;
354
355 /** List of variable_storage */
356 exec_list variables;
357
358 /** List of immediate_storage */
359 exec_list immediates;
360 unsigned num_immediates;
361
362 /** List of function_entry */
363 exec_list function_signatures;
364 int next_signature_id;
365
366 /** List of glsl_to_tgsi_instruction */
367 exec_list instructions;
368
369 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op);
370
371 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
372 st_dst_reg dst, st_src_reg src0);
373
374 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
375 st_dst_reg dst, st_src_reg src0, st_src_reg src1);
376
377 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
378 st_dst_reg dst,
379 st_src_reg src0, st_src_reg src1, st_src_reg src2);
380
381 unsigned get_opcode(ir_instruction *ir, unsigned op,
382 st_dst_reg dst,
383 st_src_reg src0, st_src_reg src1);
384
385 /**
386 * Emit the correct dot-product instruction for the type of arguments
387 */
388 glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir,
389 st_dst_reg dst,
390 st_src_reg src0,
391 st_src_reg src1,
392 unsigned elements);
393
394 void emit_scalar(ir_instruction *ir, unsigned op,
395 st_dst_reg dst, st_src_reg src0);
396
397 void emit_scalar(ir_instruction *ir, unsigned op,
398 st_dst_reg dst, st_src_reg src0, st_src_reg src1);
399
400 void try_emit_float_set(ir_instruction *ir, unsigned op, st_dst_reg dst);
401
402 void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0);
403
404 void emit_scs(ir_instruction *ir, unsigned op,
405 st_dst_reg dst, const st_src_reg &src);
406
407 bool try_emit_mad(ir_expression *ir,
408 int mul_operand);
409 bool try_emit_mad_for_and_not(ir_expression *ir,
410 int mul_operand);
411 bool try_emit_sat(ir_expression *ir);
412
413 void emit_swz(ir_expression *ir);
414
415 bool process_move_condition(ir_rvalue *ir);
416
417 void simplify_cmp(void);
418
419 void rename_temp_register(int index, int new_index);
420 int get_first_temp_read(int index);
421 int get_first_temp_write(int index);
422 int get_last_temp_read(int index);
423 int get_last_temp_write(int index);
424
425 void copy_propagate(void);
426 void eliminate_dead_code(void);
427 int eliminate_dead_code_advanced(void);
428 void merge_registers(void);
429 void renumber_registers(void);
430
431 void *mem_ctx;
432 };
433
434 static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR);
435
436 static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR);
437
438 static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT);
439
440 static void
441 fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3);
442
443 static void
444 fail_link(struct gl_shader_program *prog, const char *fmt, ...)
445 {
446 va_list args;
447 va_start(args, fmt);
448 ralloc_vasprintf_append(&prog->InfoLog, fmt, args);
449 va_end(args);
450
451 prog->LinkStatus = GL_FALSE;
452 }
453
454 static int
455 swizzle_for_size(int size)
456 {
457 int size_swizzles[4] = {
458 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
459 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
460 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
461 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
462 };
463
464 assert((size >= 1) && (size <= 4));
465 return size_swizzles[size - 1];
466 }
467
468 static bool
469 is_tex_instruction(unsigned opcode)
470 {
471 const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
472 return info->is_tex;
473 }
474
475 static unsigned
476 num_inst_dst_regs(unsigned opcode)
477 {
478 const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
479 return info->num_dst;
480 }
481
482 static unsigned
483 num_inst_src_regs(unsigned opcode)
484 {
485 const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
486 return info->is_tex ? info->num_src - 1 : info->num_src;
487 }
488
489 glsl_to_tgsi_instruction *
490 glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
491 st_dst_reg dst,
492 st_src_reg src0, st_src_reg src1, st_src_reg src2)
493 {
494 glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction();
495 int num_reladdr = 0, i;
496
497 op = get_opcode(ir, op, dst, src0, src1);
498
499 /* If we have to do relative addressing, we want to load the ARL
500 * reg directly for one of the regs, and preload the other reladdr
501 * sources into temps.
502 */
503 num_reladdr += dst.reladdr != NULL;
504 num_reladdr += src0.reladdr != NULL;
505 num_reladdr += src1.reladdr != NULL;
506 num_reladdr += src2.reladdr != NULL;
507
508 reladdr_to_temp(ir, &src2, &num_reladdr);
509 reladdr_to_temp(ir, &src1, &num_reladdr);
510 reladdr_to_temp(ir, &src0, &num_reladdr);
511
512 if (dst.reladdr) {
513 emit_arl(ir, address_reg, *dst.reladdr);
514 num_reladdr--;
515 }
516 assert(num_reladdr == 0);
517
518 inst->op = op;
519 inst->dst = dst;
520 inst->src[0] = src0;
521 inst->src[1] = src1;
522 inst->src[2] = src2;
523 inst->ir = ir;
524 inst->dead_mask = 0;
525
526 inst->function = NULL;
527
528 if (op == TGSI_OPCODE_ARL || op == TGSI_OPCODE_UARL)
529 this->num_address_regs = 1;
530
531 /* Update indirect addressing status used by TGSI */
532 if (dst.reladdr) {
533 switch(dst.file) {
534 case PROGRAM_TEMPORARY:
535 this->indirect_addr_temps = true;
536 break;
537 case PROGRAM_LOCAL_PARAM:
538 case PROGRAM_ENV_PARAM:
539 case PROGRAM_STATE_VAR:
540 case PROGRAM_NAMED_PARAM:
541 case PROGRAM_CONSTANT:
542 case PROGRAM_UNIFORM:
543 this->indirect_addr_consts = true;
544 break;
545 case PROGRAM_IMMEDIATE:
546 assert(!"immediates should not have indirect addressing");
547 break;
548 default:
549 break;
550 }
551 }
552 else {
553 for (i=0; i<3; i++) {
554 if(inst->src[i].reladdr) {
555 switch(inst->src[i].file) {
556 case PROGRAM_TEMPORARY:
557 this->indirect_addr_temps = true;
558 break;
559 case PROGRAM_LOCAL_PARAM:
560 case PROGRAM_ENV_PARAM:
561 case PROGRAM_STATE_VAR:
562 case PROGRAM_NAMED_PARAM:
563 case PROGRAM_CONSTANT:
564 case PROGRAM_UNIFORM:
565 this->indirect_addr_consts = true;
566 break;
567 case PROGRAM_IMMEDIATE:
568 assert(!"immediates should not have indirect addressing");
569 break;
570 default:
571 break;
572 }
573 }
574 }
575 }
576
577 this->instructions.push_tail(inst);
578
579 if (native_integers)
580 try_emit_float_set(ir, op, dst);
581
582 return inst;
583 }
584
585
586 glsl_to_tgsi_instruction *
587 glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
588 st_dst_reg dst, st_src_reg src0, st_src_reg src1)
589 {
590 return emit(ir, op, dst, src0, src1, undef_src);
591 }
592
593 glsl_to_tgsi_instruction *
594 glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
595 st_dst_reg dst, st_src_reg src0)
596 {
597 assert(dst.writemask != 0);
598 return emit(ir, op, dst, src0, undef_src, undef_src);
599 }
600
601 glsl_to_tgsi_instruction *
602 glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op)
603 {
604 return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
605 }
606
607 /**
608 * Emits the code to convert the result of float SET instructions to integers.
609 */
610 void
611 glsl_to_tgsi_visitor::try_emit_float_set(ir_instruction *ir, unsigned op,
612 st_dst_reg dst)
613 {
614 if ((op == TGSI_OPCODE_SEQ ||
615 op == TGSI_OPCODE_SNE ||
616 op == TGSI_OPCODE_SGE ||
617 op == TGSI_OPCODE_SLT))
618 {
619 st_src_reg src = st_src_reg(dst);
620 src.negate = ~src.negate;
621 dst.type = GLSL_TYPE_FLOAT;
622 emit(ir, TGSI_OPCODE_F2I, dst, src);
623 }
624 }
625
626 /**
627 * Determines whether to use an integer, unsigned integer, or float opcode
628 * based on the operands and input opcode, then emits the result.
629 */
630 unsigned
631 glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
632 st_dst_reg dst,
633 st_src_reg src0, st_src_reg src1)
634 {
635 int type = GLSL_TYPE_FLOAT;
636
637 if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
638 type = GLSL_TYPE_FLOAT;
639 else if (native_integers)
640 type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type;
641
642 #define case4(c, f, i, u) \
643 case TGSI_OPCODE_##c: \
644 if (type == GLSL_TYPE_INT) op = TGSI_OPCODE_##i; \
645 else if (type == GLSL_TYPE_UINT) op = TGSI_OPCODE_##u; \
646 else op = TGSI_OPCODE_##f; \
647 break;
648 #define case3(f, i, u) case4(f, f, i, u)
649 #define case2fi(f, i) case4(f, f, i, i)
650 #define case2iu(i, u) case4(i, LAST, i, u)
651
652 switch(op) {
653 case2fi(ADD, UADD);
654 case2fi(MUL, UMUL);
655 case2fi(MAD, UMAD);
656 case3(DIV, IDIV, UDIV);
657 case3(MAX, IMAX, UMAX);
658 case3(MIN, IMIN, UMIN);
659 case2iu(MOD, UMOD);
660
661 case2fi(SEQ, USEQ);
662 case2fi(SNE, USNE);
663 case3(SGE, ISGE, USGE);
664 case3(SLT, ISLT, USLT);
665
666 case2iu(ISHR, USHR);
667
668 case2fi(SSG, ISSG);
669 case3(ABS, IABS, IABS);
670
671 default: break;
672 }
673
674 assert(op != TGSI_OPCODE_LAST);
675 return op;
676 }
677
678 glsl_to_tgsi_instruction *
679 glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
680 st_dst_reg dst, st_src_reg src0, st_src_reg src1,
681 unsigned elements)
682 {
683 static const unsigned dot_opcodes[] = {
684 TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4
685 };
686
687 return emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
688 }
689
690 /**
691 * Emits TGSI scalar opcodes to produce unique answers across channels.
692 *
693 * Some TGSI opcodes are scalar-only, like ARB_fp/vp. The src X
694 * channel determines the result across all channels. So to do a vec4
695 * of this operation, we want to emit a scalar per source channel used
696 * to produce dest channels.
697 */
698 void
699 glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
700 st_dst_reg dst,
701 st_src_reg orig_src0, st_src_reg orig_src1)
702 {
703 int i, j;
704 int done_mask = ~dst.writemask;
705
706 /* TGSI RCP is a scalar operation splatting results to all channels,
707 * like ARB_fp/vp. So emit as many RCPs as necessary to cover our
708 * dst channels.
709 */
710 for (i = 0; i < 4; i++) {
711 GLuint this_mask = (1 << i);
712 glsl_to_tgsi_instruction *inst;
713 st_src_reg src0 = orig_src0;
714 st_src_reg src1 = orig_src1;
715
716 if (done_mask & this_mask)
717 continue;
718
719 GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
720 GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
721 for (j = i + 1; j < 4; j++) {
722 /* If there is another enabled component in the destination that is
723 * derived from the same inputs, generate its value on this pass as
724 * well.
725 */
726 if (!(done_mask & (1 << j)) &&
727 GET_SWZ(src0.swizzle, j) == src0_swiz &&
728 GET_SWZ(src1.swizzle, j) == src1_swiz) {
729 this_mask |= (1 << j);
730 }
731 }
732 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
733 src0_swiz, src0_swiz);
734 src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
735 src1_swiz, src1_swiz);
736
737 inst = emit(ir, op, dst, src0, src1);
738 inst->dst.writemask = this_mask;
739 done_mask |= this_mask;
740 }
741 }
742
743 void
744 glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
745 st_dst_reg dst, st_src_reg src0)
746 {
747 st_src_reg undef = undef_src;
748
749 undef.swizzle = SWIZZLE_XXXX;
750
751 emit_scalar(ir, op, dst, src0, undef);
752 }
753
754 void
755 glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
756 st_dst_reg dst, st_src_reg src0)
757 {
758 int op = TGSI_OPCODE_ARL;
759
760 if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT)
761 op = TGSI_OPCODE_UARL;
762
763 emit(NULL, op, dst, src0);
764 }
765
766 /**
767 * Emit an TGSI_OPCODE_SCS instruction
768 *
769 * The \c SCS opcode functions a bit differently than the other TGSI opcodes.
770 * Instead of splatting its result across all four components of the
771 * destination, it writes one value to the \c x component and another value to
772 * the \c y component.
773 *
774 * \param ir IR instruction being processed
775 * \param op Either \c TGSI_OPCODE_SIN or \c TGSI_OPCODE_COS depending
776 * on which value is desired.
777 * \param dst Destination register
778 * \param src Source register
779 */
780 void
781 glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op,
782 st_dst_reg dst,
783 const st_src_reg &src)
784 {
785 /* Vertex programs cannot use the SCS opcode.
786 */
787 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) {
788 emit_scalar(ir, op, dst, src);
789 return;
790 }
791
792 const unsigned component = (op == TGSI_OPCODE_SIN) ? 0 : 1;
793 const unsigned scs_mask = (1U << component);
794 int done_mask = ~dst.writemask;
795 st_src_reg tmp;
796
797 assert(op == TGSI_OPCODE_SIN || op == TGSI_OPCODE_COS);
798
799 /* If there are compnents in the destination that differ from the component
800 * that will be written by the SCS instrution, we'll need a temporary.
801 */
802 if (scs_mask != unsigned(dst.writemask)) {
803 tmp = get_temp(glsl_type::vec4_type);
804 }
805
806 for (unsigned i = 0; i < 4; i++) {
807 unsigned this_mask = (1U << i);
808 st_src_reg src0 = src;
809
810 if ((done_mask & this_mask) != 0)
811 continue;
812
813 /* The source swizzle specified which component of the source generates
814 * sine / cosine for the current component in the destination. The SCS
815 * instruction requires that this value be swizzle to the X component.
816 * Replace the current swizzle with a swizzle that puts the source in
817 * the X component.
818 */
819 unsigned src0_swiz = GET_SWZ(src.swizzle, i);
820
821 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
822 src0_swiz, src0_swiz);
823 for (unsigned j = i + 1; j < 4; j++) {
824 /* If there is another enabled component in the destination that is
825 * derived from the same inputs, generate its value on this pass as
826 * well.
827 */
828 if (!(done_mask & (1 << j)) &&
829 GET_SWZ(src0.swizzle, j) == src0_swiz) {
830 this_mask |= (1 << j);
831 }
832 }
833
834 if (this_mask != scs_mask) {
835 glsl_to_tgsi_instruction *inst;
836 st_dst_reg tmp_dst = st_dst_reg(tmp);
837
838 /* Emit the SCS instruction.
839 */
840 inst = emit(ir, TGSI_OPCODE_SCS, tmp_dst, src0);
841 inst->dst.writemask = scs_mask;
842
843 /* Move the result of the SCS instruction to the desired location in
844 * the destination.
845 */
846 tmp.swizzle = MAKE_SWIZZLE4(component, component,
847 component, component);
848 inst = emit(ir, TGSI_OPCODE_SCS, dst, tmp);
849 inst->dst.writemask = this_mask;
850 } else {
851 /* Emit the SCS instruction to write directly to the destination.
852 */
853 glsl_to_tgsi_instruction *inst = emit(ir, TGSI_OPCODE_SCS, dst, src0);
854 inst->dst.writemask = scs_mask;
855 }
856
857 done_mask |= this_mask;
858 }
859 }
860
861 int
862 glsl_to_tgsi_visitor::add_constant(gl_register_file file,
863 gl_constant_value values[4], int size, int datatype,
864 GLuint *swizzle_out)
865 {
866 if (file == PROGRAM_CONSTANT) {
867 return _mesa_add_typed_unnamed_constant(this->prog->Parameters, values,
868 size, datatype, swizzle_out);
869 } else {
870 int index = 0;
871 immediate_storage *entry;
872 assert(file == PROGRAM_IMMEDIATE);
873
874 /* Search immediate storage to see if we already have an identical
875 * immediate that we can use instead of adding a duplicate entry.
876 */
877 foreach_iter(exec_list_iterator, iter, this->immediates) {
878 entry = (immediate_storage *)iter.get();
879
880 if (entry->size == size &&
881 entry->type == datatype &&
882 !memcmp(entry->values, values, size * sizeof(gl_constant_value))) {
883 return index;
884 }
885 index++;
886 }
887
888 /* Add this immediate to the list. */
889 entry = new(mem_ctx) immediate_storage(values, size, datatype);
890 this->immediates.push_tail(entry);
891 this->num_immediates++;
892 return index;
893 }
894 }
895
896 st_src_reg
897 glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
898 {
899 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT);
900 union gl_constant_value uval;
901
902 uval.f = val;
903 src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle);
904
905 return src;
906 }
907
908 st_src_reg
909 glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
910 {
911 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT);
912 union gl_constant_value uval;
913
914 assert(native_integers);
915
916 uval.i = val;
917 src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle);
918
919 return src;
920 }
921
922 st_src_reg
923 glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val)
924 {
925 if (native_integers)
926 return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) :
927 st_src_reg_for_int(val);
928 else
929 return st_src_reg_for_float(val);
930 }
931
932 static int
933 type_size(const struct glsl_type *type)
934 {
935 unsigned int i;
936 int size;
937
938 switch (type->base_type) {
939 case GLSL_TYPE_UINT:
940 case GLSL_TYPE_INT:
941 case GLSL_TYPE_FLOAT:
942 case GLSL_TYPE_BOOL:
943 if (type->is_matrix()) {
944 return type->matrix_columns;
945 } else {
946 /* Regardless of size of vector, it gets a vec4. This is bad
947 * packing for things like floats, but otherwise arrays become a
948 * mess. Hopefully a later pass over the code can pack scalars
949 * down if appropriate.
950 */
951 return 1;
952 }
953 case GLSL_TYPE_ARRAY:
954 assert(type->length > 0);
955 return type_size(type->fields.array) * type->length;
956 case GLSL_TYPE_STRUCT:
957 size = 0;
958 for (i = 0; i < type->length; i++) {
959 size += type_size(type->fields.structure[i].type);
960 }
961 return size;
962 case GLSL_TYPE_SAMPLER:
963 /* Samplers take up one slot in UNIFORMS[], but they're baked in
964 * at link time.
965 */
966 return 1;
967 default:
968 assert(0);
969 return 0;
970 }
971 }
972
973 /**
974 * In the initial pass of codegen, we assign temporary numbers to
975 * intermediate results. (not SSA -- variable assignments will reuse
976 * storage).
977 */
978 st_src_reg
979 glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
980 {
981 st_src_reg src;
982
983 src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
984 src.file = PROGRAM_TEMPORARY;
985 src.index = next_temp;
986 src.reladdr = NULL;
987 next_temp += type_size(type);
988
989 if (type->is_array() || type->is_record()) {
990 src.swizzle = SWIZZLE_NOOP;
991 } else {
992 src.swizzle = swizzle_for_size(type->vector_elements);
993 }
994 src.negate = 0;
995
996 return src;
997 }
998
999 variable_storage *
1000 glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var)
1001 {
1002
1003 variable_storage *entry;
1004
1005 foreach_iter(exec_list_iterator, iter, this->variables) {
1006 entry = (variable_storage *)iter.get();
1007
1008 if (entry->var == var)
1009 return entry;
1010 }
1011
1012 return NULL;
1013 }
1014
1015 void
1016 glsl_to_tgsi_visitor::visit(ir_variable *ir)
1017 {
1018 if (strcmp(ir->name, "gl_FragCoord") == 0) {
1019 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
1020
1021 fp->OriginUpperLeft = ir->origin_upper_left;
1022 fp->PixelCenterInteger = ir->pixel_center_integer;
1023 }
1024
1025 if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
1026 unsigned int i;
1027 const ir_state_slot *const slots = ir->state_slots;
1028 assert(ir->state_slots != NULL);
1029
1030 /* Check if this statevar's setup in the STATE file exactly
1031 * matches how we'll want to reference it as a
1032 * struct/array/whatever. If not, then we need to move it into
1033 * temporary storage and hope that it'll get copy-propagated
1034 * out.
1035 */
1036 for (i = 0; i < ir->num_state_slots; i++) {
1037 if (slots[i].swizzle != SWIZZLE_XYZW) {
1038 break;
1039 }
1040 }
1041
1042 variable_storage *storage;
1043 st_dst_reg dst;
1044 if (i == ir->num_state_slots) {
1045 /* We'll set the index later. */
1046 storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
1047 this->variables.push_tail(storage);
1048
1049 dst = undef_dst;
1050 } else {
1051 /* The variable_storage constructor allocates slots based on the size
1052 * of the type. However, this had better match the number of state
1053 * elements that we're going to copy into the new temporary.
1054 */
1055 assert((int) ir->num_state_slots == type_size(ir->type));
1056
1057 storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY,
1058 this->next_temp);
1059 this->variables.push_tail(storage);
1060 this->next_temp += type_size(ir->type);
1061
1062 dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index,
1063 native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT));
1064 }
1065
1066
1067 for (unsigned int i = 0; i < ir->num_state_slots; i++) {
1068 int index = _mesa_add_state_reference(this->prog->Parameters,
1069 (gl_state_index *)slots[i].tokens);
1070
1071 if (storage->file == PROGRAM_STATE_VAR) {
1072 if (storage->index == -1) {
1073 storage->index = index;
1074 } else {
1075 assert(index == storage->index + (int)i);
1076 }
1077 } else {
1078 st_src_reg src(PROGRAM_STATE_VAR, index,
1079 native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT);
1080 src.swizzle = slots[i].swizzle;
1081 emit(ir, TGSI_OPCODE_MOV, dst, src);
1082 /* even a float takes up a whole vec4 reg in a struct/array. */
1083 dst.index++;
1084 }
1085 }
1086
1087 if (storage->file == PROGRAM_TEMPORARY &&
1088 dst.index != storage->index + (int) ir->num_state_slots) {
1089 fail_link(this->shader_program,
1090 "failed to load builtin uniform `%s' (%d/%d regs loaded)\n",
1091 ir->name, dst.index - storage->index,
1092 type_size(ir->type));
1093 }
1094 }
1095 }
1096
1097 void
1098 glsl_to_tgsi_visitor::visit(ir_loop *ir)
1099 {
1100 ir_dereference_variable *counter = NULL;
1101
1102 if (ir->counter != NULL)
1103 counter = new(ir) ir_dereference_variable(ir->counter);
1104
1105 if (ir->from != NULL) {
1106 assert(ir->counter != NULL);
1107
1108 ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL);
1109
1110 a->accept(this);
1111 delete a;
1112 }
1113
1114 emit(NULL, TGSI_OPCODE_BGNLOOP);
1115
1116 if (ir->to) {
1117 ir_expression *e =
1118 new(ir) ir_expression(ir->cmp, glsl_type::bool_type,
1119 counter, ir->to);
1120 ir_if *if_stmt = new(ir) ir_if(e);
1121
1122 ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break);
1123
1124 if_stmt->then_instructions.push_tail(brk);
1125
1126 if_stmt->accept(this);
1127
1128 delete if_stmt;
1129 delete e;
1130 delete brk;
1131 }
1132
1133 visit_exec_list(&ir->body_instructions, this);
1134
1135 if (ir->increment) {
1136 ir_expression *e =
1137 new(ir) ir_expression(ir_binop_add, counter->type,
1138 counter, ir->increment);
1139
1140 ir_assignment *a = new(ir) ir_assignment(counter, e, NULL);
1141
1142 a->accept(this);
1143 delete a;
1144 delete e;
1145 }
1146
1147 emit(NULL, TGSI_OPCODE_ENDLOOP);
1148 }
1149
1150 void
1151 glsl_to_tgsi_visitor::visit(ir_loop_jump *ir)
1152 {
1153 switch (ir->mode) {
1154 case ir_loop_jump::jump_break:
1155 emit(NULL, TGSI_OPCODE_BRK);
1156 break;
1157 case ir_loop_jump::jump_continue:
1158 emit(NULL, TGSI_OPCODE_CONT);
1159 break;
1160 }
1161 }
1162
1163
1164 void
1165 glsl_to_tgsi_visitor::visit(ir_function_signature *ir)
1166 {
1167 assert(0);
1168 (void)ir;
1169 }
1170
1171 void
1172 glsl_to_tgsi_visitor::visit(ir_function *ir)
1173 {
1174 /* Ignore function bodies other than main() -- we shouldn't see calls to
1175 * them since they should all be inlined before we get to glsl_to_tgsi.
1176 */
1177 if (strcmp(ir->name, "main") == 0) {
1178 const ir_function_signature *sig;
1179 exec_list empty;
1180
1181 sig = ir->matching_signature(&empty);
1182
1183 assert(sig);
1184
1185 foreach_iter(exec_list_iterator, iter, sig->body) {
1186 ir_instruction *ir = (ir_instruction *)iter.get();
1187
1188 ir->accept(this);
1189 }
1190 }
1191 }
1192
1193 bool
1194 glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
1195 {
1196 int nonmul_operand = 1 - mul_operand;
1197 st_src_reg a, b, c;
1198 st_dst_reg result_dst;
1199
1200 ir_expression *expr = ir->operands[mul_operand]->as_expression();
1201 if (!expr || expr->operation != ir_binop_mul)
1202 return false;
1203
1204 expr->operands[0]->accept(this);
1205 a = this->result;
1206 expr->operands[1]->accept(this);
1207 b = this->result;
1208 ir->operands[nonmul_operand]->accept(this);
1209 c = this->result;
1210
1211 this->result = get_temp(ir->type);
1212 result_dst = st_dst_reg(this->result);
1213 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1214 emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c);
1215
1216 return true;
1217 }
1218
1219 /**
1220 * Emit MAD(a, -b, a) instead of AND(a, NOT(b))
1221 *
1222 * The logic values are 1.0 for true and 0.0 for false. Logical-and is
1223 * implemented using multiplication, and logical-or is implemented using
1224 * addition. Logical-not can be implemented as (true - x), or (1.0 - x).
1225 * As result, the logical expression (a & !b) can be rewritten as:
1226 *
1227 * - a * !b
1228 * - a * (1 - b)
1229 * - (a * 1) - (a * b)
1230 * - a + -(a * b)
1231 * - a + (a * -b)
1232 *
1233 * This final expression can be implemented as a single MAD(a, -b, a)
1234 * instruction.
1235 */
1236 bool
1237 glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
1238 {
1239 const int other_operand = 1 - try_operand;
1240 st_src_reg a, b;
1241
1242 ir_expression *expr = ir->operands[try_operand]->as_expression();
1243 if (!expr || expr->operation != ir_unop_logic_not)
1244 return false;
1245
1246 ir->operands[other_operand]->accept(this);
1247 a = this->result;
1248 expr->operands[0]->accept(this);
1249 b = this->result;
1250
1251 b.negate = ~b.negate;
1252
1253 this->result = get_temp(ir->type);
1254 emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a);
1255
1256 return true;
1257 }
1258
1259 bool
1260 glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
1261 {
1262 /* Saturates were only introduced to vertex programs in
1263 * NV_vertex_program3, so don't give them to drivers in the VP.
1264 */
1265 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB)
1266 return false;
1267
1268 ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
1269 if (!sat_src)
1270 return false;
1271
1272 sat_src->accept(this);
1273 st_src_reg src = this->result;
1274
1275 /* If we generated an expression instruction into a temporary in
1276 * processing the saturate's operand, apply the saturate to that
1277 * instruction. Otherwise, generate a MOV to do the saturate.
1278 *
1279 * Note that we have to be careful to only do this optimization if
1280 * the instruction in question was what generated src->result. For
1281 * example, ir_dereference_array might generate a MUL instruction
1282 * to create the reladdr, and return us a src reg using that
1283 * reladdr. That MUL result is not the value we're trying to
1284 * saturate.
1285 */
1286 ir_expression *sat_src_expr = sat_src->as_expression();
1287 if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul ||
1288 sat_src_expr->operation == ir_binop_add ||
1289 sat_src_expr->operation == ir_binop_dot)) {
1290 glsl_to_tgsi_instruction *new_inst;
1291 new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
1292 new_inst->saturate = true;
1293 } else {
1294 this->result = get_temp(ir->type);
1295 st_dst_reg result_dst = st_dst_reg(this->result);
1296 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1297 glsl_to_tgsi_instruction *inst;
1298 inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src);
1299 inst->saturate = true;
1300 }
1301
1302 return true;
1303 }
1304
1305 void
1306 glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
1307 st_src_reg *reg, int *num_reladdr)
1308 {
1309 if (!reg->reladdr)
1310 return;
1311
1312 emit_arl(ir, address_reg, *reg->reladdr);
1313
1314 if (*num_reladdr != 1) {
1315 st_src_reg temp = get_temp(glsl_type::vec4_type);
1316
1317 emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg);
1318 *reg = temp;
1319 }
1320
1321 (*num_reladdr)--;
1322 }
1323
1324 void
1325 glsl_to_tgsi_visitor::visit(ir_expression *ir)
1326 {
1327 unsigned int operand;
1328 st_src_reg op[Elements(ir->operands)];
1329 st_src_reg result_src;
1330 st_dst_reg result_dst;
1331
1332 /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c)
1333 */
1334 if (ir->operation == ir_binop_add) {
1335 if (try_emit_mad(ir, 1))
1336 return;
1337 if (try_emit_mad(ir, 0))
1338 return;
1339 }
1340
1341 /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b))
1342 */
1343 if (ir->operation == ir_binop_logic_and) {
1344 if (try_emit_mad_for_and_not(ir, 1))
1345 return;
1346 if (try_emit_mad_for_and_not(ir, 0))
1347 return;
1348 }
1349
1350 if (try_emit_sat(ir))
1351 return;
1352
1353 if (ir->operation == ir_quadop_vector)
1354 assert(!"ir_quadop_vector should have been lowered");
1355
1356 for (operand = 0; operand < ir->get_num_operands(); operand++) {
1357 this->result.file = PROGRAM_UNDEFINED;
1358 ir->operands[operand]->accept(this);
1359 if (this->result.file == PROGRAM_UNDEFINED) {
1360 ir_print_visitor v;
1361 printf("Failed to get tree for expression operand:\n");
1362 ir->operands[operand]->accept(&v);
1363 exit(1);
1364 }
1365 op[operand] = this->result;
1366
1367 /* Matrix expression operands should have been broken down to vector
1368 * operations already.
1369 */
1370 assert(!ir->operands[operand]->type->is_matrix());
1371 }
1372
1373 int vector_elements = ir->operands[0]->type->vector_elements;
1374 if (ir->operands[1]) {
1375 vector_elements = MAX2(vector_elements,
1376 ir->operands[1]->type->vector_elements);
1377 }
1378
1379 this->result.file = PROGRAM_UNDEFINED;
1380
1381 /* Storage for our result. Ideally for an assignment we'd be using
1382 * the actual storage for the result here, instead.
1383 */
1384 result_src = get_temp(ir->type);
1385 /* convenience for the emit functions below. */
1386 result_dst = st_dst_reg(result_src);
1387 /* Limit writes to the channels that will be used by result_src later.
1388 * This does limit this temp's use as a temporary for multi-instruction
1389 * sequences.
1390 */
1391 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1392
1393 switch (ir->operation) {
1394 case ir_unop_logic_not:
1395 if (result_dst.type != GLSL_TYPE_FLOAT)
1396 emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
1397 else {
1398 /* Previously 'SEQ dst, src, 0.0' was used for this. However, many
1399 * older GPUs implement SEQ using multiple instructions (i915 uses two
1400 * SGE instructions and a MUL instruction). Since our logic values are
1401 * 0.0 and 1.0, 1-x also implements !x.
1402 */
1403 op[0].negate = ~op[0].negate;
1404 emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0));
1405 }
1406 break;
1407 case ir_unop_neg:
1408 if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT)
1409 emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
1410 else {
1411 op[0].negate = ~op[0].negate;
1412 result_src = op[0];
1413 }
1414 break;
1415 case ir_unop_abs:
1416 emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]);
1417 break;
1418 case ir_unop_sign:
1419 emit(ir, TGSI_OPCODE_SSG, result_dst, op[0]);
1420 break;
1421 case ir_unop_rcp:
1422 emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]);
1423 break;
1424
1425 case ir_unop_exp2:
1426 emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]);
1427 break;
1428 case ir_unop_exp:
1429 case ir_unop_log:
1430 assert(!"not reached: should be handled by ir_explog_to_explog2");
1431 break;
1432 case ir_unop_log2:
1433 emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]);
1434 break;
1435 case ir_unop_sin:
1436 emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]);
1437 break;
1438 case ir_unop_cos:
1439 emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]);
1440 break;
1441 case ir_unop_sin_reduced:
1442 emit_scs(ir, TGSI_OPCODE_SIN, result_dst, op[0]);
1443 break;
1444 case ir_unop_cos_reduced:
1445 emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]);
1446 break;
1447
1448 case ir_unop_dFdx:
1449 emit(ir, TGSI_OPCODE_DDX, result_dst, op[0]);
1450 break;
1451 case ir_unop_dFdy:
1452 op[0].negate = ~op[0].negate;
1453 emit(ir, TGSI_OPCODE_DDY, result_dst, op[0]);
1454 break;
1455
1456 case ir_unop_noise: {
1457 /* At some point, a motivated person could add a better
1458 * implementation of noise. Currently not even the nvidia
1459 * binary drivers do anything more than this. In any case, the
1460 * place to do this is in the GL state tracker, not the poor
1461 * driver.
1462 */
1463 emit(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5));
1464 break;
1465 }
1466
1467 case ir_binop_add:
1468 emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
1469 break;
1470 case ir_binop_sub:
1471 emit(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]);
1472 break;
1473
1474 case ir_binop_mul:
1475 emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
1476 break;
1477 case ir_binop_div:
1478 if (result_dst.type == GLSL_TYPE_FLOAT)
1479 assert(!"not reached: should be handled by ir_div_to_mul_rcp");
1480 else
1481 emit(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]);
1482 break;
1483 case ir_binop_mod:
1484 if (result_dst.type == GLSL_TYPE_FLOAT)
1485 assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
1486 else
1487 emit(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]);
1488 break;
1489
1490 case ir_binop_less:
1491 emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]);
1492 break;
1493 case ir_binop_greater:
1494 emit(ir, TGSI_OPCODE_SLT, result_dst, op[1], op[0]);
1495 break;
1496 case ir_binop_lequal:
1497 emit(ir, TGSI_OPCODE_SGE, result_dst, op[1], op[0]);
1498 break;
1499 case ir_binop_gequal:
1500 emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]);
1501 break;
1502 case ir_binop_equal:
1503 emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
1504 break;
1505 case ir_binop_nequal:
1506 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1507 break;
1508 case ir_binop_all_equal:
1509 /* "==" operator producing a scalar boolean. */
1510 if (ir->operands[0]->type->is_vector() ||
1511 ir->operands[1]->type->is_vector()) {
1512 st_src_reg temp = get_temp(native_integers ?
1513 glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
1514 glsl_type::vec4_type);
1515
1516 if (native_integers) {
1517 st_dst_reg temp_dst = st_dst_reg(temp);
1518 st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp);
1519
1520 emit(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]);
1521
1522 /* Emit 1-3 AND operations to combine the SEQ results. */
1523 switch (ir->operands[0]->type->vector_elements) {
1524 case 2:
1525 break;
1526 case 3:
1527 temp_dst.writemask = WRITEMASK_Y;
1528 temp1.swizzle = SWIZZLE_YYYY;
1529 temp2.swizzle = SWIZZLE_ZZZZ;
1530 emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
1531 break;
1532 case 4:
1533 temp_dst.writemask = WRITEMASK_X;
1534 temp1.swizzle = SWIZZLE_XXXX;
1535 temp2.swizzle = SWIZZLE_YYYY;
1536 emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
1537 temp_dst.writemask = WRITEMASK_Y;
1538 temp1.swizzle = SWIZZLE_ZZZZ;
1539 temp2.swizzle = SWIZZLE_WWWW;
1540 emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
1541 }
1542
1543 temp1.swizzle = SWIZZLE_XXXX;
1544 temp2.swizzle = SWIZZLE_YYYY;
1545 emit(ir, TGSI_OPCODE_AND, result_dst, temp1, temp2);
1546 } else {
1547 emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
1548
1549 /* After the dot-product, the value will be an integer on the
1550 * range [0,4]. Zero becomes 1.0, and positive values become zero.
1551 */
1552 emit_dp(ir, result_dst, temp, temp, vector_elements);
1553
1554 /* Negating the result of the dot-product gives values on the range
1555 * [-4, 0]. Zero becomes 1.0, and negative values become zero.
1556 * This is achieved using SGE.
1557 */
1558 st_src_reg sge_src = result_src;
1559 sge_src.negate = ~sge_src.negate;
1560 emit(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0));
1561 }
1562 } else {
1563 emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
1564 }
1565 break;
1566 case ir_binop_any_nequal:
1567 /* "!=" operator producing a scalar boolean. */
1568 if (ir->operands[0]->type->is_vector() ||
1569 ir->operands[1]->type->is_vector()) {
1570 st_src_reg temp = get_temp(native_integers ?
1571 glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
1572 glsl_type::vec4_type);
1573 emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
1574
1575 if (native_integers) {
1576 st_dst_reg temp_dst = st_dst_reg(temp);
1577 st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp);
1578
1579 /* Emit 1-3 OR operations to combine the SNE results. */
1580 switch (ir->operands[0]->type->vector_elements) {
1581 case 2:
1582 break;
1583 case 3:
1584 temp_dst.writemask = WRITEMASK_Y;
1585 temp1.swizzle = SWIZZLE_YYYY;
1586 temp2.swizzle = SWIZZLE_ZZZZ;
1587 emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
1588 break;
1589 case 4:
1590 temp_dst.writemask = WRITEMASK_X;
1591 temp1.swizzle = SWIZZLE_XXXX;
1592 temp2.swizzle = SWIZZLE_YYYY;
1593 emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
1594 temp_dst.writemask = WRITEMASK_Y;
1595 temp1.swizzle = SWIZZLE_ZZZZ;
1596 temp2.swizzle = SWIZZLE_WWWW;
1597 emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
1598 }
1599
1600 temp1.swizzle = SWIZZLE_XXXX;
1601 temp2.swizzle = SWIZZLE_YYYY;
1602 emit(ir, TGSI_OPCODE_OR, result_dst, temp1, temp2);
1603 } else {
1604 /* After the dot-product, the value will be an integer on the
1605 * range [0,4]. Zero stays zero, and positive values become 1.0.
1606 */
1607 glsl_to_tgsi_instruction *const dp =
1608 emit_dp(ir, result_dst, temp, temp, vector_elements);
1609 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1610 /* The clamping to [0,1] can be done for free in the fragment
1611 * shader with a saturate.
1612 */
1613 dp->saturate = true;
1614 } else {
1615 /* Negating the result of the dot-product gives values on the range
1616 * [-4, 0]. Zero stays zero, and negative values become 1.0. This
1617 * achieved using SLT.
1618 */
1619 st_src_reg slt_src = result_src;
1620 slt_src.negate = ~slt_src.negate;
1621 emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
1622 }
1623 }
1624 } else {
1625 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1626 }
1627 break;
1628
1629 case ir_unop_any: {
1630 assert(ir->operands[0]->type->is_vector());
1631
1632 /* After the dot-product, the value will be an integer on the
1633 * range [0,4]. Zero stays zero, and positive values become 1.0.
1634 */
1635 glsl_to_tgsi_instruction *const dp =
1636 emit_dp(ir, result_dst, op[0], op[0],
1637 ir->operands[0]->type->vector_elements);
1638 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
1639 result_dst.type == GLSL_TYPE_FLOAT) {
1640 /* The clamping to [0,1] can be done for free in the fragment
1641 * shader with a saturate.
1642 */
1643 dp->saturate = true;
1644 } else if (result_dst.type == GLSL_TYPE_FLOAT) {
1645 /* Negating the result of the dot-product gives values on the range
1646 * [-4, 0]. Zero stays zero, and negative values become 1.0. This
1647 * is achieved using SLT.
1648 */
1649 st_src_reg slt_src = result_src;
1650 slt_src.negate = ~slt_src.negate;
1651 emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
1652 }
1653 else {
1654 /* Use SNE 0 if integers are being used as boolean values. */
1655 emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
1656 }
1657 break;
1658 }
1659
1660 case ir_binop_logic_xor:
1661 if (native_integers)
1662 emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
1663 else
1664 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1665 break;
1666
1667 case ir_binop_logic_or: {
1668 if (native_integers) {
1669 /* If integers are used as booleans, we can use an actual "or"
1670 * instruction.
1671 */
1672 assert(native_integers);
1673 emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]);
1674 } else {
1675 /* After the addition, the value will be an integer on the
1676 * range [0,2]. Zero stays zero, and positive values become 1.0.
1677 */
1678 glsl_to_tgsi_instruction *add =
1679 emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
1680 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1681 /* The clamping to [0,1] can be done for free in the fragment
1682 * shader with a saturate if floats are being used as boolean values.
1683 */
1684 add->saturate = true;
1685 } else {
1686 /* Negating the result of the addition gives values on the range
1687 * [-2, 0]. Zero stays zero, and negative values become 1.0. This
1688 * is achieved using SLT.
1689 */
1690 st_src_reg slt_src = result_src;
1691 slt_src.negate = ~slt_src.negate;
1692 emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
1693 }
1694 }
1695 break;
1696 }
1697
1698 case ir_binop_logic_and:
1699 /* If native integers are disabled, the bool args are stored as float 0.0
1700 * or 1.0, so "mul" gives us "and". If they're enabled, just use the
1701 * actual AND opcode.
1702 */
1703 if (native_integers)
1704 emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]);
1705 else
1706 emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
1707 break;
1708
1709 case ir_binop_dot:
1710 assert(ir->operands[0]->type->is_vector());
1711 assert(ir->operands[0]->type == ir->operands[1]->type);
1712 emit_dp(ir, result_dst, op[0], op[1],
1713 ir->operands[0]->type->vector_elements);
1714 break;
1715
1716 case ir_unop_sqrt:
1717 /* sqrt(x) = x * rsq(x). */
1718 emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
1719 emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]);
1720 /* For incoming channels <= 0, set the result to 0. */
1721 op[0].negate = ~op[0].negate;
1722 emit(ir, TGSI_OPCODE_CMP, result_dst,
1723 op[0], result_src, st_src_reg_for_float(0.0));
1724 break;
1725 case ir_unop_rsq:
1726 emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
1727 break;
1728 case ir_unop_i2f:
1729 if (native_integers) {
1730 emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]);
1731 break;
1732 }
1733 /* fallthrough to next case otherwise */
1734 case ir_unop_b2f:
1735 if (native_integers) {
1736 emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0));
1737 break;
1738 }
1739 /* fallthrough to next case otherwise */
1740 case ir_unop_i2u:
1741 case ir_unop_u2i:
1742 /* Converting between signed and unsigned integers is a no-op. */
1743 result_src = op[0];
1744 break;
1745 case ir_unop_b2i:
1746 if (native_integers) {
1747 /* Booleans are stored as integers using ~0 for true and 0 for false.
1748 * GLSL requires that int(bool) return 1 for true and 0 for false.
1749 * This conversion is done with AND, but it could be done with NEG.
1750 */
1751 emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1));
1752 } else {
1753 /* Booleans and integers are both stored as floats when native
1754 * integers are disabled.
1755 */
1756 result_src = op[0];
1757 }
1758 break;
1759 case ir_unop_f2i:
1760 if (native_integers)
1761 emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]);
1762 else
1763 emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
1764 break;
1765 case ir_unop_f2b:
1766 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
1767 break;
1768 case ir_unop_i2b:
1769 if (native_integers)
1770 emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
1771 else
1772 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
1773 break;
1774 case ir_unop_trunc:
1775 emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
1776 break;
1777 case ir_unop_ceil:
1778 emit(ir, TGSI_OPCODE_CEIL, result_dst, op[0]);
1779 break;
1780 case ir_unop_floor:
1781 emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
1782 break;
1783 case ir_unop_round_even:
1784 emit(ir, TGSI_OPCODE_ROUND, result_dst, op[0]);
1785 break;
1786 case ir_unop_fract:
1787 emit(ir, TGSI_OPCODE_FRC, result_dst, op[0]);
1788 break;
1789
1790 case ir_binop_min:
1791 emit(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]);
1792 break;
1793 case ir_binop_max:
1794 emit(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]);
1795 break;
1796 case ir_binop_pow:
1797 emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]);
1798 break;
1799
1800 case ir_unop_bit_not:
1801 if (native_integers) {
1802 emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
1803 break;
1804 }
1805 case ir_unop_u2f:
1806 if (native_integers) {
1807 emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]);
1808 break;
1809 }
1810 case ir_binop_lshift:
1811 if (native_integers) {
1812 emit(ir, TGSI_OPCODE_SHL, result_dst, op[0], op[1]);
1813 break;
1814 }
1815 case ir_binop_rshift:
1816 if (native_integers) {
1817 emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0], op[1]);
1818 break;
1819 }
1820 case ir_binop_bit_and:
1821 if (native_integers) {
1822 emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]);
1823 break;
1824 }
1825 case ir_binop_bit_xor:
1826 if (native_integers) {
1827 emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
1828 break;
1829 }
1830 case ir_binop_bit_or:
1831 if (native_integers) {
1832 emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]);
1833 break;
1834 }
1835
1836 assert(!"GLSL 1.30 features unsupported");
1837 break;
1838
1839 case ir_quadop_vector:
1840 /* This operation should have already been handled.
1841 */
1842 assert(!"Should not get here.");
1843 break;
1844 }
1845
1846 this->result = result_src;
1847 }
1848
1849
1850 void
1851 glsl_to_tgsi_visitor::visit(ir_swizzle *ir)
1852 {
1853 st_src_reg src;
1854 int i;
1855 int swizzle[4];
1856
1857 /* Note that this is only swizzles in expressions, not those on the left
1858 * hand side of an assignment, which do write masking. See ir_assignment
1859 * for that.
1860 */
1861
1862 ir->val->accept(this);
1863 src = this->result;
1864 assert(src.file != PROGRAM_UNDEFINED);
1865
1866 for (i = 0; i < 4; i++) {
1867 if (i < ir->type->vector_elements) {
1868 switch (i) {
1869 case 0:
1870 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x);
1871 break;
1872 case 1:
1873 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y);
1874 break;
1875 case 2:
1876 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z);
1877 break;
1878 case 3:
1879 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w);
1880 break;
1881 }
1882 } else {
1883 /* If the type is smaller than a vec4, replicate the last
1884 * channel out.
1885 */
1886 swizzle[i] = swizzle[ir->type->vector_elements - 1];
1887 }
1888 }
1889
1890 src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1891
1892 this->result = src;
1893 }
1894
1895 void
1896 glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
1897 {
1898 variable_storage *entry = find_variable_storage(ir->var);
1899 ir_variable *var = ir->var;
1900
1901 if (!entry) {
1902 switch (var->mode) {
1903 case ir_var_uniform:
1904 entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
1905 var->location);
1906 this->variables.push_tail(entry);
1907 break;
1908 case ir_var_in:
1909 case ir_var_inout:
1910 /* The linker assigns locations for varyings and attributes,
1911 * including deprecated builtins (like gl_Color), user-assign
1912 * generic attributes (glBindVertexLocation), and
1913 * user-defined varyings.
1914 *
1915 * FINISHME: We would hit this path for function arguments. Fix!
1916 */
1917 assert(var->location != -1);
1918 entry = new(mem_ctx) variable_storage(var,
1919 PROGRAM_INPUT,
1920 var->location);
1921 break;
1922 case ir_var_out:
1923 assert(var->location != -1);
1924 entry = new(mem_ctx) variable_storage(var,
1925 PROGRAM_OUTPUT,
1926 var->location + var->index);
1927 break;
1928 case ir_var_system_value:
1929 entry = new(mem_ctx) variable_storage(var,
1930 PROGRAM_SYSTEM_VALUE,
1931 var->location);
1932 break;
1933 case ir_var_auto:
1934 case ir_var_temporary:
1935 entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY,
1936 this->next_temp);
1937 this->variables.push_tail(entry);
1938
1939 next_temp += type_size(var->type);
1940 break;
1941 }
1942
1943 if (!entry) {
1944 printf("Failed to make storage for %s\n", var->name);
1945 exit(1);
1946 }
1947 }
1948
1949 this->result = st_src_reg(entry->file, entry->index, var->type);
1950 if (!native_integers)
1951 this->result.type = GLSL_TYPE_FLOAT;
1952 }
1953
1954 void
1955 glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
1956 {
1957 ir_constant *index;
1958 st_src_reg src;
1959 int element_size = type_size(ir->type);
1960
1961 index = ir->array_index->constant_expression_value();
1962
1963 ir->array->accept(this);
1964 src = this->result;
1965
1966 if (index) {
1967 src.index += index->value.i[0] * element_size;
1968 } else {
1969 /* Variable index array dereference. It eats the "vec4" of the
1970 * base of the array and an index that offsets the TGSI register
1971 * index.
1972 */
1973 ir->array_index->accept(this);
1974
1975 st_src_reg index_reg;
1976
1977 if (element_size == 1) {
1978 index_reg = this->result;
1979 } else {
1980 index_reg = get_temp(native_integers ?
1981 glsl_type::int_type : glsl_type::float_type);
1982
1983 emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg),
1984 this->result, st_src_reg_for_type(index_reg.type, element_size));
1985 }
1986
1987 /* If there was already a relative address register involved, add the
1988 * new and the old together to get the new offset.
1989 */
1990 if (src.reladdr != NULL) {
1991 st_src_reg accum_reg = get_temp(native_integers ?
1992 glsl_type::int_type : glsl_type::float_type);
1993
1994 emit(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg),
1995 index_reg, *src.reladdr);
1996
1997 index_reg = accum_reg;
1998 }
1999
2000 src.reladdr = ralloc(mem_ctx, st_src_reg);
2001 memcpy(src.reladdr, &index_reg, sizeof(index_reg));
2002 }
2003
2004 /* If the type is smaller than a vec4, replicate the last channel out. */
2005 if (ir->type->is_scalar() || ir->type->is_vector())
2006 src.swizzle = swizzle_for_size(ir->type->vector_elements);
2007 else
2008 src.swizzle = SWIZZLE_NOOP;
2009
2010 this->result = src;
2011 }
2012
2013 void
2014 glsl_to_tgsi_visitor::visit(ir_dereference_record *ir)
2015 {
2016 unsigned int i;
2017 const glsl_type *struct_type = ir->record->type;
2018 int offset = 0;
2019
2020 ir->record->accept(this);
2021
2022 for (i = 0; i < struct_type->length; i++) {
2023 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
2024 break;
2025 offset += type_size(struct_type->fields.structure[i].type);
2026 }
2027
2028 /* If the type is smaller than a vec4, replicate the last channel out. */
2029 if (ir->type->is_scalar() || ir->type->is_vector())
2030 this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
2031 else
2032 this->result.swizzle = SWIZZLE_NOOP;
2033
2034 this->result.index += offset;
2035 }
2036
2037 /**
2038 * We want to be careful in assignment setup to hit the actual storage
2039 * instead of potentially using a temporary like we might with the
2040 * ir_dereference handler.
2041 */
2042 static st_dst_reg
2043 get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v)
2044 {
2045 /* The LHS must be a dereference. If the LHS is a variable indexed array
2046 * access of a vector, it must be separated into a series conditional moves
2047 * before reaching this point (see ir_vec_index_to_cond_assign).
2048 */
2049 assert(ir->as_dereference());
2050 ir_dereference_array *deref_array = ir->as_dereference_array();
2051 if (deref_array) {
2052 assert(!deref_array->array->type->is_vector());
2053 }
2054
2055 /* Use the rvalue deref handler for the most part. We'll ignore
2056 * swizzles in it and write swizzles using writemask, though.
2057 */
2058 ir->accept(v);
2059 return st_dst_reg(v->result);
2060 }
2061
2062 /**
2063 * Process the condition of a conditional assignment
2064 *
2065 * Examines the condition of a conditional assignment to generate the optimal
2066 * first operand of a \c CMP instruction. If the condition is a relational
2067 * operator with 0 (e.g., \c ir_binop_less), the value being compared will be
2068 * used as the source for the \c CMP instruction. Otherwise the comparison
2069 * is processed to a boolean result, and the boolean result is used as the
2070 * operand to the CMP instruction.
2071 */
2072 bool
2073 glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir)
2074 {
2075 ir_rvalue *src_ir = ir;
2076 bool negate = true;
2077 bool switch_order = false;
2078
2079 ir_expression *const expr = ir->as_expression();
2080 if ((expr != NULL) && (expr->get_num_operands() == 2)) {
2081 bool zero_on_left = false;
2082
2083 if (expr->operands[0]->is_zero()) {
2084 src_ir = expr->operands[1];
2085 zero_on_left = true;
2086 } else if (expr->operands[1]->is_zero()) {
2087 src_ir = expr->operands[0];
2088 zero_on_left = false;
2089 }
2090
2091 /* a is - 0 + - 0 +
2092 * (a < 0) T F F ( a < 0) T F F
2093 * (0 < a) F F T (-a < 0) F F T
2094 * (a <= 0) T T F (-a < 0) F F T (swap order of other operands)
2095 * (0 <= a) F T T ( a < 0) T F F (swap order of other operands)
2096 * (a > 0) F F T (-a < 0) F F T
2097 * (0 > a) T F F ( a < 0) T F F
2098 * (a >= 0) F T T ( a < 0) T F F (swap order of other operands)
2099 * (0 >= a) T T F (-a < 0) F F T (swap order of other operands)
2100 *
2101 * Note that exchanging the order of 0 and 'a' in the comparison simply
2102 * means that the value of 'a' should be negated.
2103 */
2104 if (src_ir != ir) {
2105 switch (expr->operation) {
2106 case ir_binop_less:
2107 switch_order = false;
2108 negate = zero_on_left;
2109 break;
2110
2111 case ir_binop_greater:
2112 switch_order = false;
2113 negate = !zero_on_left;
2114 break;
2115
2116 case ir_binop_lequal:
2117 switch_order = true;
2118 negate = !zero_on_left;
2119 break;
2120
2121 case ir_binop_gequal:
2122 switch_order = true;
2123 negate = zero_on_left;
2124 break;
2125
2126 default:
2127 /* This isn't the right kind of comparison afterall, so make sure
2128 * the whole condition is visited.
2129 */
2130 src_ir = ir;
2131 break;
2132 }
2133 }
2134 }
2135
2136 src_ir->accept(this);
2137
2138 /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
2139 * condition we produced is 0.0 or 1.0. By flipping the sign, we can
2140 * choose which value TGSI_OPCODE_CMP produces without an extra instruction
2141 * computing the condition.
2142 */
2143 if (negate)
2144 this->result.negate = ~this->result.negate;
2145
2146 return switch_order;
2147 }
2148
2149 void
2150 glsl_to_tgsi_visitor::visit(ir_assignment *ir)
2151 {
2152 st_dst_reg l;
2153 st_src_reg r;
2154 int i;
2155
2156 ir->rhs->accept(this);
2157 r = this->result;
2158
2159 l = get_assignment_lhs(ir->lhs, this);
2160
2161 /* FINISHME: This should really set to the correct maximal writemask for each
2162 * FINISHME: component written (in the loops below). This case can only
2163 * FINISHME: occur for matrices, arrays, and structures.
2164 */
2165 if (ir->write_mask == 0) {
2166 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
2167 l.writemask = WRITEMASK_XYZW;
2168 } else if (ir->lhs->type->is_scalar() &&
2169 ir->lhs->variable_referenced()->mode == ir_var_out) {
2170 /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the
2171 * FINISHME: W component of fragment shader output zero, work correctly.
2172 */
2173 l.writemask = WRITEMASK_XYZW;
2174 } else {
2175 int swizzles[4];
2176 int first_enabled_chan = 0;
2177 int rhs_chan = 0;
2178
2179 l.writemask = ir->write_mask;
2180
2181 for (int i = 0; i < 4; i++) {
2182 if (l.writemask & (1 << i)) {
2183 first_enabled_chan = GET_SWZ(r.swizzle, i);
2184 break;
2185 }
2186 }
2187
2188 /* Swizzle a small RHS vector into the channels being written.
2189 *
2190 * glsl ir treats write_mask as dictating how many channels are
2191 * present on the RHS while TGSI treats write_mask as just
2192 * showing which channels of the vec4 RHS get written.
2193 */
2194 for (int i = 0; i < 4; i++) {
2195 if (l.writemask & (1 << i))
2196 swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++);
2197 else
2198 swizzles[i] = first_enabled_chan;
2199 }
2200 r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1],
2201 swizzles[2], swizzles[3]);
2202 }
2203
2204 assert(l.file != PROGRAM_UNDEFINED);
2205 assert(r.file != PROGRAM_UNDEFINED);
2206
2207 if (ir->condition) {
2208 const bool switch_order = this->process_move_condition(ir->condition);
2209 st_src_reg condition = this->result;
2210
2211 for (i = 0; i < type_size(ir->lhs->type); i++) {
2212 st_src_reg l_src = st_src_reg(l);
2213 st_src_reg condition_temp = condition;
2214 l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements);
2215
2216 if (native_integers) {
2217 /* This is necessary because TGSI's CMP instruction expects the
2218 * condition to be a float, and we store booleans as integers.
2219 * If TGSI had a UCMP instruction or similar, this extra
2220 * instruction would not be necessary.
2221 */
2222 condition_temp = get_temp(glsl_type::vec4_type);
2223 condition.negate = 0;
2224 emit(ir, TGSI_OPCODE_I2F, st_dst_reg(condition_temp), condition);
2225 condition_temp.swizzle = condition.swizzle;
2226 }
2227
2228 if (switch_order) {
2229 emit(ir, TGSI_OPCODE_CMP, l, condition_temp, l_src, r);
2230 } else {
2231 emit(ir, TGSI_OPCODE_CMP, l, condition_temp, r, l_src);
2232 }
2233
2234 l.index++;
2235 r.index++;
2236 }
2237 } else if (ir->rhs->as_expression() &&
2238 this->instructions.get_tail() &&
2239 ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir &&
2240 type_size(ir->lhs->type) == 1 &&
2241 l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst.writemask) {
2242 /* To avoid emitting an extra MOV when assigning an expression to a
2243 * variable, emit the last instruction of the expression again, but
2244 * replace the destination register with the target of the assignment.
2245 * Dead code elimination will remove the original instruction.
2246 */
2247 glsl_to_tgsi_instruction *inst, *new_inst;
2248 inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
2249 new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]);
2250 new_inst->saturate = inst->saturate;
2251 inst->dead_mask = inst->dst.writemask;
2252 } else {
2253 for (i = 0; i < type_size(ir->lhs->type); i++) {
2254 emit(ir, TGSI_OPCODE_MOV, l, r);
2255 l.index++;
2256 r.index++;
2257 }
2258 }
2259 }
2260
2261
2262 void
2263 glsl_to_tgsi_visitor::visit(ir_constant *ir)
2264 {
2265 st_src_reg src;
2266 GLfloat stack_vals[4] = { 0 };
2267 gl_constant_value *values = (gl_constant_value *) stack_vals;
2268 GLenum gl_type = GL_NONE;
2269 unsigned int i;
2270 static int in_array = 0;
2271 gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE;
2272
2273 /* Unfortunately, 4 floats is all we can get into
2274 * _mesa_add_typed_unnamed_constant. So, make a temp to store an
2275 * aggregate constant and move each constant value into it. If we
2276 * get lucky, copy propagation will eliminate the extra moves.
2277 */
2278 if (ir->type->base_type == GLSL_TYPE_STRUCT) {
2279 st_src_reg temp_base = get_temp(ir->type);
2280 st_dst_reg temp = st_dst_reg(temp_base);
2281
2282 foreach_iter(exec_list_iterator, iter, ir->components) {
2283 ir_constant *field_value = (ir_constant *)iter.get();
2284 int size = type_size(field_value->type);
2285
2286 assert(size > 0);
2287
2288 field_value->accept(this);
2289 src = this->result;
2290
2291 for (i = 0; i < (unsigned int)size; i++) {
2292 emit(ir, TGSI_OPCODE_MOV, temp, src);
2293
2294 src.index++;
2295 temp.index++;
2296 }
2297 }
2298 this->result = temp_base;
2299 return;
2300 }
2301
2302 if (ir->type->is_array()) {
2303 st_src_reg temp_base = get_temp(ir->type);
2304 st_dst_reg temp = st_dst_reg(temp_base);
2305 int size = type_size(ir->type->fields.array);
2306
2307 assert(size > 0);
2308 in_array++;
2309
2310 for (i = 0; i < ir->type->length; i++) {
2311 ir->array_elements[i]->accept(this);
2312 src = this->result;
2313 for (int j = 0; j < size; j++) {
2314 emit(ir, TGSI_OPCODE_MOV, temp, src);
2315
2316 src.index++;
2317 temp.index++;
2318 }
2319 }
2320 this->result = temp_base;
2321 in_array--;
2322 return;
2323 }
2324
2325 if (ir->type->is_matrix()) {
2326 st_src_reg mat = get_temp(ir->type);
2327 st_dst_reg mat_column = st_dst_reg(mat);
2328
2329 for (i = 0; i < ir->type->matrix_columns; i++) {
2330 assert(ir->type->base_type == GLSL_TYPE_FLOAT);
2331 values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements];
2332
2333 src = st_src_reg(file, -1, ir->type->base_type);
2334 src.index = add_constant(file,
2335 values,
2336 ir->type->vector_elements,
2337 GL_FLOAT,
2338 &src.swizzle);
2339 emit(ir, TGSI_OPCODE_MOV, mat_column, src);
2340
2341 mat_column.index++;
2342 }
2343
2344 this->result = mat;
2345 return;
2346 }
2347
2348 switch (ir->type->base_type) {
2349 case GLSL_TYPE_FLOAT:
2350 gl_type = GL_FLOAT;
2351 for (i = 0; i < ir->type->vector_elements; i++) {
2352 values[i].f = ir->value.f[i];
2353 }
2354 break;
2355 case GLSL_TYPE_UINT:
2356 gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT;
2357 for (i = 0; i < ir->type->vector_elements; i++) {
2358 if (native_integers)
2359 values[i].u = ir->value.u[i];
2360 else
2361 values[i].f = ir->value.u[i];
2362 }
2363 break;
2364 case GLSL_TYPE_INT:
2365 gl_type = native_integers ? GL_INT : GL_FLOAT;
2366 for (i = 0; i < ir->type->vector_elements; i++) {
2367 if (native_integers)
2368 values[i].i = ir->value.i[i];
2369 else
2370 values[i].f = ir->value.i[i];
2371 }
2372 break;
2373 case GLSL_TYPE_BOOL:
2374 gl_type = native_integers ? GL_BOOL : GL_FLOAT;
2375 for (i = 0; i < ir->type->vector_elements; i++) {
2376 if (native_integers)
2377 values[i].u = ir->value.b[i] ? ~0 : 0;
2378 else
2379 values[i].f = ir->value.b[i];
2380 }
2381 break;
2382 default:
2383 assert(!"Non-float/uint/int/bool constant");
2384 }
2385
2386 this->result = st_src_reg(file, -1, ir->type);
2387 this->result.index = add_constant(file,
2388 values,
2389 ir->type->vector_elements,
2390 gl_type,
2391 &this->result.swizzle);
2392 }
2393
2394 function_entry *
2395 glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig)
2396 {
2397 function_entry *entry;
2398
2399 foreach_iter(exec_list_iterator, iter, this->function_signatures) {
2400 entry = (function_entry *)iter.get();
2401
2402 if (entry->sig == sig)
2403 return entry;
2404 }
2405
2406 entry = ralloc(mem_ctx, function_entry);
2407 entry->sig = sig;
2408 entry->sig_id = this->next_signature_id++;
2409 entry->bgn_inst = NULL;
2410
2411 /* Allocate storage for all the parameters. */
2412 foreach_iter(exec_list_iterator, iter, sig->parameters) {
2413 ir_variable *param = (ir_variable *)iter.get();
2414 variable_storage *storage;
2415
2416 storage = find_variable_storage(param);
2417 assert(!storage);
2418
2419 storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY,
2420 this->next_temp);
2421 this->variables.push_tail(storage);
2422
2423 this->next_temp += type_size(param->type);
2424 }
2425
2426 if (!sig->return_type->is_void()) {
2427 entry->return_reg = get_temp(sig->return_type);
2428 } else {
2429 entry->return_reg = undef_src;
2430 }
2431
2432 this->function_signatures.push_tail(entry);
2433 return entry;
2434 }
2435
2436 void
2437 glsl_to_tgsi_visitor::visit(ir_call *ir)
2438 {
2439 glsl_to_tgsi_instruction *call_inst;
2440 ir_function_signature *sig = ir->callee;
2441 function_entry *entry = get_function_signature(sig);
2442 int i;
2443
2444 /* Process in parameters. */
2445 exec_list_iterator sig_iter = sig->parameters.iterator();
2446 foreach_iter(exec_list_iterator, iter, *ir) {
2447 ir_rvalue *param_rval = (ir_rvalue *)iter.get();
2448 ir_variable *param = (ir_variable *)sig_iter.get();
2449
2450 if (param->mode == ir_var_in ||
2451 param->mode == ir_var_inout) {
2452 variable_storage *storage = find_variable_storage(param);
2453 assert(storage);
2454
2455 param_rval->accept(this);
2456 st_src_reg r = this->result;
2457
2458 st_dst_reg l;
2459 l.file = storage->file;
2460 l.index = storage->index;
2461 l.reladdr = NULL;
2462 l.writemask = WRITEMASK_XYZW;
2463 l.cond_mask = COND_TR;
2464
2465 for (i = 0; i < type_size(param->type); i++) {
2466 emit(ir, TGSI_OPCODE_MOV, l, r);
2467 l.index++;
2468 r.index++;
2469 }
2470 }
2471
2472 sig_iter.next();
2473 }
2474 assert(!sig_iter.has_next());
2475
2476 /* Emit call instruction */
2477 call_inst = emit(ir, TGSI_OPCODE_CAL);
2478 call_inst->function = entry;
2479
2480 /* Process out parameters. */
2481 sig_iter = sig->parameters.iterator();
2482 foreach_iter(exec_list_iterator, iter, *ir) {
2483 ir_rvalue *param_rval = (ir_rvalue *)iter.get();
2484 ir_variable *param = (ir_variable *)sig_iter.get();
2485
2486 if (param->mode == ir_var_out ||
2487 param->mode == ir_var_inout) {
2488 variable_storage *storage = find_variable_storage(param);
2489 assert(storage);
2490
2491 st_src_reg r;
2492 r.file = storage->file;
2493 r.index = storage->index;
2494 r.reladdr = NULL;
2495 r.swizzle = SWIZZLE_NOOP;
2496 r.negate = 0;
2497
2498 param_rval->accept(this);
2499 st_dst_reg l = st_dst_reg(this->result);
2500
2501 for (i = 0; i < type_size(param->type); i++) {
2502 emit(ir, TGSI_OPCODE_MOV, l, r);
2503 l.index++;
2504 r.index++;
2505 }
2506 }
2507
2508 sig_iter.next();
2509 }
2510 assert(!sig_iter.has_next());
2511
2512 /* Process return value. */
2513 this->result = entry->return_reg;
2514 }
2515
2516 void
2517 glsl_to_tgsi_visitor::visit(ir_texture *ir)
2518 {
2519 st_src_reg result_src, coord, lod_info, projector, dx, dy, offset;
2520 st_dst_reg result_dst, coord_dst;
2521 glsl_to_tgsi_instruction *inst = NULL;
2522 unsigned opcode = TGSI_OPCODE_NOP;
2523
2524 if (ir->coordinate) {
2525 ir->coordinate->accept(this);
2526
2527 /* Put our coords in a temp. We'll need to modify them for shadow,
2528 * projection, or LOD, so the only case we'd use it as is is if
2529 * we're doing plain old texturing. The optimization passes on
2530 * glsl_to_tgsi_visitor should handle cleaning up our mess in that case.
2531 */
2532 coord = get_temp(glsl_type::vec4_type);
2533 coord_dst = st_dst_reg(coord);
2534 emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
2535 }
2536
2537 if (ir->projector) {
2538 ir->projector->accept(this);
2539 projector = this->result;
2540 }
2541
2542 /* Storage for our result. Ideally for an assignment we'd be using
2543 * the actual storage for the result here, instead.
2544 */
2545 result_src = get_temp(glsl_type::vec4_type);
2546 result_dst = st_dst_reg(result_src);
2547
2548 switch (ir->op) {
2549 case ir_tex:
2550 opcode = TGSI_OPCODE_TEX;
2551 break;
2552 case ir_txb:
2553 opcode = TGSI_OPCODE_TXB;
2554 ir->lod_info.bias->accept(this);
2555 lod_info = this->result;
2556 break;
2557 case ir_txl:
2558 opcode = TGSI_OPCODE_TXL;
2559 ir->lod_info.lod->accept(this);
2560 lod_info = this->result;
2561 break;
2562 case ir_txd:
2563 opcode = TGSI_OPCODE_TXD;
2564 ir->lod_info.grad.dPdx->accept(this);
2565 dx = this->result;
2566 ir->lod_info.grad.dPdy->accept(this);
2567 dy = this->result;
2568 break;
2569 case ir_txs:
2570 opcode = TGSI_OPCODE_TXQ;
2571 ir->lod_info.lod->accept(this);
2572 lod_info = this->result;
2573 break;
2574 case ir_txf:
2575 opcode = TGSI_OPCODE_TXF;
2576 ir->lod_info.lod->accept(this);
2577 lod_info = this->result;
2578 if (ir->offset) {
2579 ir->offset->accept(this);
2580 offset = this->result;
2581 }
2582 break;
2583 }
2584
2585 const glsl_type *sampler_type = ir->sampler->type;
2586
2587 if (ir->projector) {
2588 if (opcode == TGSI_OPCODE_TEX) {
2589 /* Slot the projector in as the last component of the coord. */
2590 coord_dst.writemask = WRITEMASK_W;
2591 emit(ir, TGSI_OPCODE_MOV, coord_dst, projector);
2592 coord_dst.writemask = WRITEMASK_XYZW;
2593 opcode = TGSI_OPCODE_TXP;
2594 } else {
2595 st_src_reg coord_w = coord;
2596 coord_w.swizzle = SWIZZLE_WWWW;
2597
2598 /* For the other TEX opcodes there's no projective version
2599 * since the last slot is taken up by LOD info. Do the
2600 * projective divide now.
2601 */
2602 coord_dst.writemask = WRITEMASK_W;
2603 emit(ir, TGSI_OPCODE_RCP, coord_dst, projector);
2604
2605 /* In the case where we have to project the coordinates "by hand,"
2606 * the shadow comparator value must also be projected.
2607 */
2608 st_src_reg tmp_src = coord;
2609 if (ir->shadow_comparitor) {
2610 /* Slot the shadow value in as the second to last component of the
2611 * coord.
2612 */
2613 ir->shadow_comparitor->accept(this);
2614
2615 tmp_src = get_temp(glsl_type::vec4_type);
2616 st_dst_reg tmp_dst = st_dst_reg(tmp_src);
2617
2618 /* Projective division not allowed for array samplers. */
2619 assert(!sampler_type->sampler_array);
2620
2621 tmp_dst.writemask = WRITEMASK_Z;
2622 emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result);
2623
2624 tmp_dst.writemask = WRITEMASK_XY;
2625 emit(ir, TGSI_OPCODE_MOV, tmp_dst, coord);
2626 }
2627
2628 coord_dst.writemask = WRITEMASK_XYZ;
2629 emit(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w);
2630
2631 coord_dst.writemask = WRITEMASK_XYZW;
2632 coord.swizzle = SWIZZLE_XYZW;
2633 }
2634 }
2635
2636 /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow
2637 * comparator was put in the correct place (and projected) by the code,
2638 * above, that handles by-hand projection.
2639 */
2640 if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) {
2641 /* Slot the shadow value in as the second to last component of the
2642 * coord.
2643 */
2644 ir->shadow_comparitor->accept(this);
2645
2646 /* XXX This will need to be updated for cubemap array samplers. */
2647 if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D &&
2648 sampler_type->sampler_array) ||
2649 sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) {
2650 coord_dst.writemask = WRITEMASK_W;
2651 } else {
2652 coord_dst.writemask = WRITEMASK_Z;
2653 }
2654
2655 emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
2656 coord_dst.writemask = WRITEMASK_XYZW;
2657 }
2658
2659 if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB ||
2660 opcode == TGSI_OPCODE_TXF) {
2661 /* TGSI stores LOD or LOD bias in the last channel of the coords. */
2662 coord_dst.writemask = WRITEMASK_W;
2663 emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info);
2664 coord_dst.writemask = WRITEMASK_XYZW;
2665 }
2666
2667 if (opcode == TGSI_OPCODE_TXD)
2668 inst = emit(ir, opcode, result_dst, coord, dx, dy);
2669 else if (opcode == TGSI_OPCODE_TXQ)
2670 inst = emit(ir, opcode, result_dst, lod_info);
2671 else if (opcode == TGSI_OPCODE_TXF) {
2672 inst = emit(ir, opcode, result_dst, coord);
2673 } else
2674 inst = emit(ir, opcode, result_dst, coord);
2675
2676 if (ir->shadow_comparitor)
2677 inst->tex_shadow = GL_TRUE;
2678
2679 inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler,
2680 this->shader_program,
2681 this->prog);
2682
2683 if (ir->offset) {
2684 inst->tex_offset_num_offset = 1;
2685 inst->tex_offsets[0].Index = offset.index;
2686 inst->tex_offsets[0].File = offset.file;
2687 inst->tex_offsets[0].SwizzleX = GET_SWZ(offset.swizzle, 0);
2688 inst->tex_offsets[0].SwizzleY = GET_SWZ(offset.swizzle, 1);
2689 inst->tex_offsets[0].SwizzleZ = GET_SWZ(offset.swizzle, 2);
2690 }
2691
2692 switch (sampler_type->sampler_dimensionality) {
2693 case GLSL_SAMPLER_DIM_1D:
2694 inst->tex_target = (sampler_type->sampler_array)
2695 ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
2696 break;
2697 case GLSL_SAMPLER_DIM_2D:
2698 inst->tex_target = (sampler_type->sampler_array)
2699 ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
2700 break;
2701 case GLSL_SAMPLER_DIM_3D:
2702 inst->tex_target = TEXTURE_3D_INDEX;
2703 break;
2704 case GLSL_SAMPLER_DIM_CUBE:
2705 inst->tex_target = TEXTURE_CUBE_INDEX;
2706 break;
2707 case GLSL_SAMPLER_DIM_RECT:
2708 inst->tex_target = TEXTURE_RECT_INDEX;
2709 break;
2710 case GLSL_SAMPLER_DIM_BUF:
2711 assert(!"FINISHME: Implement ARB_texture_buffer_object");
2712 break;
2713 case GLSL_SAMPLER_DIM_EXTERNAL:
2714 inst->tex_target = TEXTURE_EXTERNAL_INDEX;
2715 break;
2716 default:
2717 assert(!"Should not get here.");
2718 }
2719
2720 this->result = result_src;
2721 }
2722
2723 void
2724 glsl_to_tgsi_visitor::visit(ir_return *ir)
2725 {
2726 if (ir->get_value()) {
2727 st_dst_reg l;
2728 int i;
2729
2730 assert(current_function);
2731
2732 ir->get_value()->accept(this);
2733 st_src_reg r = this->result;
2734
2735 l = st_dst_reg(current_function->return_reg);
2736
2737 for (i = 0; i < type_size(current_function->sig->return_type); i++) {
2738 emit(ir, TGSI_OPCODE_MOV, l, r);
2739 l.index++;
2740 r.index++;
2741 }
2742 }
2743
2744 emit(ir, TGSI_OPCODE_RET);
2745 }
2746
2747 void
2748 glsl_to_tgsi_visitor::visit(ir_discard *ir)
2749 {
2750 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
2751
2752 if (ir->condition) {
2753 ir->condition->accept(this);
2754 this->result.negate = ~this->result.negate;
2755 emit(ir, TGSI_OPCODE_KIL, undef_dst, this->result);
2756 } else {
2757 emit(ir, TGSI_OPCODE_KILP);
2758 }
2759
2760 fp->UsesKill = GL_TRUE;
2761 }
2762
2763 void
2764 glsl_to_tgsi_visitor::visit(ir_if *ir)
2765 {
2766 glsl_to_tgsi_instruction *cond_inst, *if_inst;
2767 glsl_to_tgsi_instruction *prev_inst;
2768
2769 prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
2770
2771 ir->condition->accept(this);
2772 assert(this->result.file != PROGRAM_UNDEFINED);
2773
2774 if (this->options->EmitCondCodes) {
2775 cond_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
2776
2777 /* See if we actually generated any instruction for generating
2778 * the condition. If not, then cook up a move to a temp so we
2779 * have something to set cond_update on.
2780 */
2781 if (cond_inst == prev_inst) {
2782 st_src_reg temp = get_temp(glsl_type::bool_type);
2783 cond_inst = emit(ir->condition, TGSI_OPCODE_MOV, st_dst_reg(temp), result);
2784 }
2785 cond_inst->cond_update = GL_TRUE;
2786
2787 if_inst = emit(ir->condition, TGSI_OPCODE_IF);
2788 if_inst->dst.cond_mask = COND_NE;
2789 } else {
2790 if_inst = emit(ir->condition, TGSI_OPCODE_IF, undef_dst, this->result);
2791 }
2792
2793 this->instructions.push_tail(if_inst);
2794
2795 visit_exec_list(&ir->then_instructions, this);
2796
2797 if (!ir->else_instructions.is_empty()) {
2798 emit(ir->condition, TGSI_OPCODE_ELSE);
2799 visit_exec_list(&ir->else_instructions, this);
2800 }
2801
2802 if_inst = emit(ir->condition, TGSI_OPCODE_ENDIF);
2803 }
2804
2805 glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
2806 {
2807 result.file = PROGRAM_UNDEFINED;
2808 next_temp = 1;
2809 next_signature_id = 1;
2810 num_immediates = 0;
2811 current_function = NULL;
2812 num_address_regs = 0;
2813 indirect_addr_temps = false;
2814 indirect_addr_consts = false;
2815 mem_ctx = ralloc_context(NULL);
2816 ctx = NULL;
2817 prog = NULL;
2818 shader_program = NULL;
2819 options = NULL;
2820 }
2821
2822 glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor()
2823 {
2824 ralloc_free(mem_ctx);
2825 }
2826
2827 extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v)
2828 {
2829 delete v;
2830 }
2831
2832
2833 /**
2834 * Count resources used by the given gpu program (number of texture
2835 * samplers, etc).
2836 */
2837 static void
2838 count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
2839 {
2840 v->samplers_used = 0;
2841
2842 foreach_iter(exec_list_iterator, iter, v->instructions) {
2843 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
2844
2845 if (is_tex_instruction(inst->op)) {
2846 v->samplers_used |= 1 << inst->sampler;
2847
2848 if (inst->tex_shadow) {
2849 prog->ShadowSamplers |= 1 << inst->sampler;
2850 }
2851 }
2852 }
2853
2854 prog->SamplersUsed = v->samplers_used;
2855
2856 if (v->shader_program != NULL)
2857 _mesa_update_shader_textures_used(v->shader_program, prog);
2858 }
2859
2860 static void
2861 set_uniform_initializer(struct gl_context *ctx, void *mem_ctx,
2862 struct gl_shader_program *shader_program,
2863 const char *name, const glsl_type *type,
2864 ir_constant *val)
2865 {
2866 if (type->is_record()) {
2867 ir_constant *field_constant;
2868
2869 field_constant = (ir_constant *)val->components.get_head();
2870
2871 for (unsigned int i = 0; i < type->length; i++) {
2872 const glsl_type *field_type = type->fields.structure[i].type;
2873 const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name,
2874 type->fields.structure[i].name);
2875 set_uniform_initializer(ctx, mem_ctx, shader_program, field_name,
2876 field_type, field_constant);
2877 field_constant = (ir_constant *)field_constant->next;
2878 }
2879 return;
2880 }
2881
2882 int loc = _mesa_get_uniform_location(ctx, shader_program, name);
2883
2884 if (loc == -1) {
2885 fail_link(shader_program,
2886 "Couldn't find uniform for initializer %s\n", name);
2887 return;
2888 }
2889
2890 for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) {
2891 ir_constant *element;
2892 const glsl_type *element_type;
2893 if (type->is_array()) {
2894 element = val->array_elements[i];
2895 element_type = type->fields.array;
2896 } else {
2897 element = val;
2898 element_type = type;
2899 }
2900
2901 void *values;
2902
2903 if (element_type->base_type == GLSL_TYPE_BOOL) {
2904 int *conv = ralloc_array(mem_ctx, int, element_type->components());
2905 for (unsigned int j = 0; j < element_type->components(); j++) {
2906 conv[j] = element->value.b[j];
2907 }
2908 values = (void *)conv;
2909 element_type = glsl_type::get_instance(GLSL_TYPE_INT,
2910 element_type->vector_elements,
2911 1);
2912 } else {
2913 values = &element->value;
2914 }
2915
2916 if (element_type->is_matrix()) {
2917 _mesa_uniform_matrix(ctx, shader_program,
2918 element_type->matrix_columns,
2919 element_type->vector_elements,
2920 loc, 1, GL_FALSE, (GLfloat *)values);
2921 } else {
2922 _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns,
2923 values, element_type->gl_type);
2924 }
2925
2926 loc++;
2927 }
2928 }
2929
2930 /**
2931 * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
2932 * are read from the given src in this instruction
2933 */
2934 static int
2935 get_src_arg_mask(st_dst_reg dst, st_src_reg src)
2936 {
2937 int read_mask = 0, comp;
2938
2939 /* Now, given the src swizzle and the written channels, find which
2940 * components are actually read
2941 */
2942 for (comp = 0; comp < 4; ++comp) {
2943 const unsigned coord = GET_SWZ(src.swizzle, comp);
2944 ASSERT(coord < 4);
2945 if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W)
2946 read_mask |= 1 << coord;
2947 }
2948
2949 return read_mask;
2950 }
2951
2952 /**
2953 * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP
2954 * instruction is the first instruction to write to register T0. There are
2955 * several lowering passes done in GLSL IR (e.g. branches and
2956 * relative addressing) that create a large number of conditional assignments
2957 * that ir_to_mesa converts to CMP instructions like the one mentioned above.
2958 *
2959 * Here is why this conversion is safe:
2960 * CMP T0, T1 T2 T0 can be expanded to:
2961 * if (T1 < 0.0)
2962 * MOV T0, T2;
2963 * else
2964 * MOV T0, T0;
2965 *
2966 * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same
2967 * as the original program. If (T1 < 0.0) evaluates to false, executing
2968 * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized.
2969 * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2
2970 * because any instruction that was going to read from T0 after this was going
2971 * to read a garbage value anyway.
2972 */
2973 void
2974 glsl_to_tgsi_visitor::simplify_cmp(void)
2975 {
2976 unsigned *tempWrites;
2977 unsigned outputWrites[MAX_PROGRAM_OUTPUTS];
2978
2979 tempWrites = new unsigned[MAX_TEMPS];
2980 if (!tempWrites) {
2981 return;
2982 }
2983 memset(tempWrites, 0, sizeof(unsigned) * MAX_TEMPS);
2984 memset(outputWrites, 0, sizeof(outputWrites));
2985
2986 foreach_iter(exec_list_iterator, iter, this->instructions) {
2987 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
2988 unsigned prevWriteMask = 0;
2989
2990 /* Give up if we encounter relative addressing or flow control. */
2991 if (inst->dst.reladdr ||
2992 tgsi_get_opcode_info(inst->op)->is_branch ||
2993 inst->op == TGSI_OPCODE_BGNSUB ||
2994 inst->op == TGSI_OPCODE_CONT ||
2995 inst->op == TGSI_OPCODE_END ||
2996 inst->op == TGSI_OPCODE_ENDSUB ||
2997 inst->op == TGSI_OPCODE_RET) {
2998 break;
2999 }
3000
3001 if (inst->dst.file == PROGRAM_OUTPUT) {
3002 assert(inst->dst.index < MAX_PROGRAM_OUTPUTS);
3003 prevWriteMask = outputWrites[inst->dst.index];
3004 outputWrites[inst->dst.index] |= inst->dst.writemask;
3005 } else if (inst->dst.file == PROGRAM_TEMPORARY) {
3006 assert(inst->dst.index < MAX_TEMPS);
3007 prevWriteMask = tempWrites[inst->dst.index];
3008 tempWrites[inst->dst.index] |= inst->dst.writemask;
3009 }
3010
3011 /* For a CMP to be considered a conditional write, the destination
3012 * register and source register two must be the same. */
3013 if (inst->op == TGSI_OPCODE_CMP
3014 && !(inst->dst.writemask & prevWriteMask)
3015 && inst->src[2].file == inst->dst.file
3016 && inst->src[2].index == inst->dst.index
3017 && inst->dst.writemask == get_src_arg_mask(inst->dst, inst->src[2])) {
3018
3019 inst->op = TGSI_OPCODE_MOV;
3020 inst->src[0] = inst->src[1];
3021 }
3022 }
3023
3024 delete [] tempWrites;
3025 }
3026
3027 /* Replaces all references to a temporary register index with another index. */
3028 void
3029 glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index)
3030 {
3031 foreach_iter(exec_list_iterator, iter, this->instructions) {
3032 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3033 unsigned j;
3034
3035 for (j=0; j < num_inst_src_regs(inst->op); j++) {
3036 if (inst->src[j].file == PROGRAM_TEMPORARY &&
3037 inst->src[j].index == index) {
3038 inst->src[j].index = new_index;
3039 }
3040 }
3041
3042 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
3043 inst->dst.index = new_index;
3044 }
3045 }
3046 }
3047
3048 int
3049 glsl_to_tgsi_visitor::get_first_temp_read(int index)
3050 {
3051 int depth = 0; /* loop depth */
3052 int loop_start = -1; /* index of the first active BGNLOOP (if any) */
3053 unsigned i = 0, j;
3054
3055 foreach_iter(exec_list_iterator, iter, this->instructions) {
3056 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3057
3058 for (j=0; j < num_inst_src_regs(inst->op); j++) {
3059 if (inst->src[j].file == PROGRAM_TEMPORARY &&
3060 inst->src[j].index == index) {
3061 return (depth == 0) ? i : loop_start;
3062 }
3063 }
3064
3065 if (inst->op == TGSI_OPCODE_BGNLOOP) {
3066 if(depth++ == 0)
3067 loop_start = i;
3068 } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
3069 if (--depth == 0)
3070 loop_start = -1;
3071 }
3072 assert(depth >= 0);
3073
3074 i++;
3075 }
3076
3077 return -1;
3078 }
3079
3080 int
3081 glsl_to_tgsi_visitor::get_first_temp_write(int index)
3082 {
3083 int depth = 0; /* loop depth */
3084 int loop_start = -1; /* index of the first active BGNLOOP (if any) */
3085 int i = 0;
3086
3087 foreach_iter(exec_list_iterator, iter, this->instructions) {
3088 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3089
3090 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
3091 return (depth == 0) ? i : loop_start;
3092 }
3093
3094 if (inst->op == TGSI_OPCODE_BGNLOOP) {
3095 if(depth++ == 0)
3096 loop_start = i;
3097 } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
3098 if (--depth == 0)
3099 loop_start = -1;
3100 }
3101 assert(depth >= 0);
3102
3103 i++;
3104 }
3105
3106 return -1;
3107 }
3108
3109 int
3110 glsl_to_tgsi_visitor::get_last_temp_read(int index)
3111 {
3112 int depth = 0; /* loop depth */
3113 int last = -1; /* index of last instruction that reads the temporary */
3114 unsigned i = 0, j;
3115
3116 foreach_iter(exec_list_iterator, iter, this->instructions) {
3117 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3118
3119 for (j=0; j < num_inst_src_regs(inst->op); j++) {
3120 if (inst->src[j].file == PROGRAM_TEMPORARY &&
3121 inst->src[j].index == index) {
3122 last = (depth == 0) ? i : -2;
3123 }
3124 }
3125
3126 if (inst->op == TGSI_OPCODE_BGNLOOP)
3127 depth++;
3128 else if (inst->op == TGSI_OPCODE_ENDLOOP)
3129 if (--depth == 0 && last == -2)
3130 last = i;
3131 assert(depth >= 0);
3132
3133 i++;
3134 }
3135
3136 assert(last >= -1);
3137 return last;
3138 }
3139
3140 int
3141 glsl_to_tgsi_visitor::get_last_temp_write(int index)
3142 {
3143 int depth = 0; /* loop depth */
3144 int last = -1; /* index of last instruction that writes to the temporary */
3145 int i = 0;
3146
3147 foreach_iter(exec_list_iterator, iter, this->instructions) {
3148 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3149
3150 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index)
3151 last = (depth == 0) ? i : -2;
3152
3153 if (inst->op == TGSI_OPCODE_BGNLOOP)
3154 depth++;
3155 else if (inst->op == TGSI_OPCODE_ENDLOOP)
3156 if (--depth == 0 && last == -2)
3157 last = i;
3158 assert(depth >= 0);
3159
3160 i++;
3161 }
3162
3163 assert(last >= -1);
3164 return last;
3165 }
3166
3167 /*
3168 * On a basic block basis, tracks available PROGRAM_TEMPORARY register
3169 * channels for copy propagation and updates following instructions to
3170 * use the original versions.
3171 *
3172 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
3173 * will occur. As an example, a TXP production before this pass:
3174 *
3175 * 0: MOV TEMP[1], INPUT[4].xyyy;
3176 * 1: MOV TEMP[1].w, INPUT[4].wwww;
3177 * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
3178 *
3179 * and after:
3180 *
3181 * 0: MOV TEMP[1], INPUT[4].xyyy;
3182 * 1: MOV TEMP[1].w, INPUT[4].wwww;
3183 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
3184 *
3185 * which allows for dead code elimination on TEMP[1]'s writes.
3186 */
3187 void
3188 glsl_to_tgsi_visitor::copy_propagate(void)
3189 {
3190 glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx,
3191 glsl_to_tgsi_instruction *,
3192 this->next_temp * 4);
3193 int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
3194 int level = 0;
3195
3196 foreach_iter(exec_list_iterator, iter, this->instructions) {
3197 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3198
3199 assert(inst->dst.file != PROGRAM_TEMPORARY
3200 || inst->dst.index < this->next_temp);
3201
3202 /* First, do any copy propagation possible into the src regs. */
3203 for (int r = 0; r < 3; r++) {
3204 glsl_to_tgsi_instruction *first = NULL;
3205 bool good = true;
3206 int acp_base = inst->src[r].index * 4;
3207
3208 if (inst->src[r].file != PROGRAM_TEMPORARY ||
3209 inst->src[r].reladdr)
3210 continue;
3211
3212 /* See if we can find entries in the ACP consisting of MOVs
3213 * from the same src register for all the swizzled channels
3214 * of this src register reference.
3215 */
3216 for (int i = 0; i < 4; i++) {
3217 int src_chan = GET_SWZ(inst->src[r].swizzle, i);
3218 glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan];
3219
3220 if (!copy_chan) {
3221 good = false;
3222 break;
3223 }
3224
3225 assert(acp_level[acp_base + src_chan] <= level);
3226
3227 if (!first) {
3228 first = copy_chan;
3229 } else {
3230 if (first->src[0].file != copy_chan->src[0].file ||
3231 first->src[0].index != copy_chan->src[0].index) {
3232 good = false;
3233 break;
3234 }
3235 }
3236 }
3237
3238 if (good) {
3239 /* We've now validated that we can copy-propagate to
3240 * replace this src register reference. Do it.
3241 */
3242 inst->src[r].file = first->src[0].file;
3243 inst->src[r].index = first->src[0].index;
3244
3245 int swizzle = 0;
3246 for (int i = 0; i < 4; i++) {
3247 int src_chan = GET_SWZ(inst->src[r].swizzle, i);
3248 glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan];
3249 swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) <<
3250 (3 * i));
3251 }
3252 inst->src[r].swizzle = swizzle;
3253 }
3254 }
3255
3256 switch (inst->op) {
3257 case TGSI_OPCODE_BGNLOOP:
3258 case TGSI_OPCODE_ENDLOOP:
3259 /* End of a basic block, clear the ACP entirely. */
3260 memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
3261 break;
3262
3263 case TGSI_OPCODE_IF:
3264 ++level;
3265 break;
3266
3267 case TGSI_OPCODE_ENDIF:
3268 case TGSI_OPCODE_ELSE:
3269 /* Clear all channels written inside the block from the ACP, but
3270 * leaving those that were not touched.
3271 */
3272 for (int r = 0; r < this->next_temp; r++) {
3273 for (int c = 0; c < 4; c++) {
3274 if (!acp[4 * r + c])
3275 continue;
3276
3277 if (acp_level[4 * r + c] >= level)
3278 acp[4 * r + c] = NULL;
3279 }
3280 }
3281 if (inst->op == TGSI_OPCODE_ENDIF)
3282 --level;
3283 break;
3284
3285 default:
3286 /* Continuing the block, clear any written channels from
3287 * the ACP.
3288 */
3289 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) {
3290 /* Any temporary might be written, so no copy propagation
3291 * across this instruction.
3292 */
3293 memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
3294 } else if (inst->dst.file == PROGRAM_OUTPUT &&
3295 inst->dst.reladdr) {
3296 /* Any output might be written, so no copy propagation
3297 * from outputs across this instruction.
3298 */
3299 for (int r = 0; r < this->next_temp; r++) {
3300 for (int c = 0; c < 4; c++) {
3301 if (!acp[4 * r + c])
3302 continue;
3303
3304 if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT)
3305 acp[4 * r + c] = NULL;
3306 }
3307 }
3308 } else if (inst->dst.file == PROGRAM_TEMPORARY ||
3309 inst->dst.file == PROGRAM_OUTPUT) {
3310 /* Clear where it's used as dst. */
3311 if (inst->dst.file == PROGRAM_TEMPORARY) {
3312 for (int c = 0; c < 4; c++) {
3313 if (inst->dst.writemask & (1 << c)) {
3314 acp[4 * inst->dst.index + c] = NULL;
3315 }
3316 }
3317 }
3318
3319 /* Clear where it's used as src. */
3320 for (int r = 0; r < this->next_temp; r++) {
3321 for (int c = 0; c < 4; c++) {
3322 if (!acp[4 * r + c])
3323 continue;
3324
3325 int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c);
3326
3327 if (acp[4 * r + c]->src[0].file == inst->dst.file &&
3328 acp[4 * r + c]->src[0].index == inst->dst.index &&
3329 inst->dst.writemask & (1 << src_chan))
3330 {
3331 acp[4 * r + c] = NULL;
3332 }
3333 }
3334 }
3335 }
3336 break;
3337 }
3338
3339 /* If this is a copy, add it to the ACP. */
3340 if (inst->op == TGSI_OPCODE_MOV &&
3341 inst->dst.file == PROGRAM_TEMPORARY &&
3342 !inst->dst.reladdr &&
3343 !inst->saturate &&
3344 !inst->src[0].reladdr &&
3345 !inst->src[0].negate) {
3346 for (int i = 0; i < 4; i++) {
3347 if (inst->dst.writemask & (1 << i)) {
3348 acp[4 * inst->dst.index + i] = inst;
3349 acp_level[4 * inst->dst.index + i] = level;
3350 }
3351 }
3352 }
3353 }
3354
3355 ralloc_free(acp_level);
3356 ralloc_free(acp);
3357 }
3358
3359 /*
3360 * Tracks available PROGRAM_TEMPORARY registers for dead code elimination.
3361 *
3362 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
3363 * will occur. As an example, a TXP production after copy propagation but
3364 * before this pass:
3365 *
3366 * 0: MOV TEMP[1], INPUT[4].xyyy;
3367 * 1: MOV TEMP[1].w, INPUT[4].wwww;
3368 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
3369 *
3370 * and after this pass:
3371 *
3372 * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
3373 *
3374 * FIXME: assumes that all functions are inlined (no support for BGNSUB/ENDSUB)
3375 * FIXME: doesn't eliminate all dead code inside of loops; it steps around them
3376 */
3377 void
3378 glsl_to_tgsi_visitor::eliminate_dead_code(void)
3379 {
3380 int i;
3381
3382 for (i=0; i < this->next_temp; i++) {
3383 int last_read = get_last_temp_read(i);
3384 int j = 0;
3385
3386 foreach_iter(exec_list_iterator, iter, this->instructions) {
3387 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3388
3389 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == i &&
3390 j > last_read)
3391 {
3392 iter.remove();
3393 delete inst;
3394 }
3395
3396 j++;
3397 }
3398 }
3399 }
3400
3401 /*
3402 * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead
3403 * code elimination. This is less primitive than eliminate_dead_code(), as it
3404 * is per-channel and can detect consecutive writes without a read between them
3405 * as dead code. However, there is some dead code that can be eliminated by
3406 * eliminate_dead_code() but not this function - for example, this function
3407 * cannot eliminate an instruction writing to a register that is never read and
3408 * is the only instruction writing to that register.
3409 *
3410 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
3411 * will occur.
3412 */
3413 int
3414 glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
3415 {
3416 glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx,
3417 glsl_to_tgsi_instruction *,
3418 this->next_temp * 4);
3419 int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
3420 int level = 0;
3421 int removed = 0;
3422
3423 foreach_iter(exec_list_iterator, iter, this->instructions) {
3424 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3425
3426 assert(inst->dst.file != PROGRAM_TEMPORARY
3427 || inst->dst.index < this->next_temp);
3428
3429 switch (inst->op) {
3430 case TGSI_OPCODE_BGNLOOP:
3431 case TGSI_OPCODE_ENDLOOP:
3432 case TGSI_OPCODE_CONT:
3433 case TGSI_OPCODE_BRK:
3434 /* End of a basic block, clear the write array entirely.
3435 *
3436 * This keeps us from killing dead code when the writes are
3437 * on either side of a loop, even when the register isn't touched
3438 * inside the loop. However, glsl_to_tgsi_visitor doesn't seem to emit
3439 * dead code of this type, so it shouldn't make a difference as long as
3440 * the dead code elimination pass in the GLSL compiler does its job.
3441 */
3442 memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
3443 break;
3444
3445 case TGSI_OPCODE_ENDIF:
3446 case TGSI_OPCODE_ELSE:
3447 /* Promote the recorded level of all channels written inside the
3448 * preceding if or else block to the level above the if/else block.
3449 */
3450 for (int r = 0; r < this->next_temp; r++) {
3451 for (int c = 0; c < 4; c++) {
3452 if (!writes[4 * r + c])
3453 continue;
3454
3455 if (write_level[4 * r + c] == level)
3456 write_level[4 * r + c] = level-1;
3457 }
3458 }
3459
3460 if(inst->op == TGSI_OPCODE_ENDIF)
3461 --level;
3462
3463 break;
3464
3465 case TGSI_OPCODE_IF:
3466 ++level;
3467 /* fallthrough to default case to mark the condition as read */
3468
3469 default:
3470 /* Continuing the block, clear any channels from the write array that
3471 * are read by this instruction.
3472 */
3473 for (unsigned i = 0; i < Elements(inst->src); i++) {
3474 if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){
3475 /* Any temporary might be read, so no dead code elimination
3476 * across this instruction.
3477 */
3478 memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
3479 } else if (inst->src[i].file == PROGRAM_TEMPORARY) {
3480 /* Clear where it's used as src. */
3481 int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0);
3482 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1);
3483 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2);
3484 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3);
3485
3486 for (int c = 0; c < 4; c++) {
3487 if (src_chans & (1 << c)) {
3488 writes[4 * inst->src[i].index + c] = NULL;
3489 }
3490 }
3491 }
3492 }
3493 break;
3494 }
3495
3496 /* If this instruction writes to a temporary, add it to the write array.
3497 * If there is already an instruction in the write array for one or more
3498 * of the channels, flag that channel write as dead.
3499 */
3500 if (inst->dst.file == PROGRAM_TEMPORARY &&
3501 !inst->dst.reladdr &&
3502 !inst->saturate) {
3503 for (int c = 0; c < 4; c++) {
3504 if (inst->dst.writemask & (1 << c)) {
3505 if (writes[4 * inst->dst.index + c]) {
3506 if (write_level[4 * inst->dst.index + c] < level)
3507 continue;
3508 else
3509 writes[4 * inst->dst.index + c]->dead_mask |= (1 << c);
3510 }
3511 writes[4 * inst->dst.index + c] = inst;
3512 write_level[4 * inst->dst.index + c] = level;
3513 }
3514 }
3515 }
3516 }
3517
3518 /* Anything still in the write array at this point is dead code. */
3519 for (int r = 0; r < this->next_temp; r++) {
3520 for (int c = 0; c < 4; c++) {
3521 glsl_to_tgsi_instruction *inst = writes[4 * r + c];
3522 if (inst)
3523 inst->dead_mask |= (1 << c);
3524 }
3525 }
3526
3527 /* Now actually remove the instructions that are completely dead and update
3528 * the writemask of other instructions with dead channels.
3529 */
3530 foreach_iter(exec_list_iterator, iter, this->instructions) {
3531 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3532
3533 if (!inst->dead_mask || !inst->dst.writemask)
3534 continue;
3535 else if ((inst->dst.writemask & ~inst->dead_mask) == 0) {
3536 iter.remove();
3537 delete inst;
3538 removed++;
3539 } else
3540 inst->dst.writemask &= ~(inst->dead_mask);
3541 }
3542
3543 ralloc_free(write_level);
3544 ralloc_free(writes);
3545
3546 return removed;
3547 }
3548
3549 /* Merges temporary registers together where possible to reduce the number of
3550 * registers needed to run a program.
3551 *
3552 * Produces optimal code only after copy propagation and dead code elimination
3553 * have been run. */
3554 void
3555 glsl_to_tgsi_visitor::merge_registers(void)
3556 {
3557 int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp);
3558 int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp);
3559 int i, j;
3560
3561 /* Read the indices of the last read and first write to each temp register
3562 * into an array so that we don't have to traverse the instruction list as
3563 * much. */
3564 for (i=0; i < this->next_temp; i++) {
3565 last_reads[i] = get_last_temp_read(i);
3566 first_writes[i] = get_first_temp_write(i);
3567 }
3568
3569 /* Start looking for registers with non-overlapping usages that can be
3570 * merged together. */
3571 for (i=0; i < this->next_temp; i++) {
3572 /* Don't touch unused registers. */
3573 if (last_reads[i] < 0 || first_writes[i] < 0) continue;
3574
3575 for (j=0; j < this->next_temp; j++) {
3576 /* Don't touch unused registers. */
3577 if (last_reads[j] < 0 || first_writes[j] < 0) continue;
3578
3579 /* We can merge the two registers if the first write to j is after or
3580 * in the same instruction as the last read from i. Note that the
3581 * register at index i will always be used earlier or at the same time
3582 * as the register at index j. */
3583 if (first_writes[i] <= first_writes[j] &&
3584 last_reads[i] <= first_writes[j])
3585 {
3586 rename_temp_register(j, i); /* Replace all references to j with i.*/
3587
3588 /* Update the first_writes and last_reads arrays with the new
3589 * values for the merged register index, and mark the newly unused
3590 * register index as such. */
3591 last_reads[i] = last_reads[j];
3592 first_writes[j] = -1;
3593 last_reads[j] = -1;
3594 }
3595 }
3596 }
3597
3598 ralloc_free(last_reads);
3599 ralloc_free(first_writes);
3600 }
3601
3602 /* Reassign indices to temporary registers by reusing unused indices created
3603 * by optimization passes. */
3604 void
3605 glsl_to_tgsi_visitor::renumber_registers(void)
3606 {
3607 int i = 0;
3608 int new_index = 0;
3609
3610 for (i=0; i < this->next_temp; i++) {
3611 if (get_first_temp_read(i) < 0) continue;
3612 if (i != new_index)
3613 rename_temp_register(i, new_index);
3614 new_index++;
3615 }
3616
3617 this->next_temp = new_index;
3618 }
3619
3620 /**
3621 * Returns a fragment program which implements the current pixel transfer ops.
3622 * Based on get_pixel_transfer_program in st_atom_pixeltransfer.c.
3623 */
3624 extern "C" void
3625 get_pixel_transfer_visitor(struct st_fragment_program *fp,
3626 glsl_to_tgsi_visitor *original,
3627 int scale_and_bias, int pixel_maps)
3628 {
3629 glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
3630 struct st_context *st = st_context(original->ctx);
3631 struct gl_program *prog = &fp->Base.Base;
3632 struct gl_program_parameter_list *params = _mesa_new_parameter_list();
3633 st_src_reg coord, src0;
3634 st_dst_reg dst0;
3635 glsl_to_tgsi_instruction *inst;
3636
3637 /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
3638 v->ctx = original->ctx;
3639 v->prog = prog;
3640 v->shader_program = NULL;
3641 v->glsl_version = original->glsl_version;
3642 v->native_integers = original->native_integers;
3643 v->options = original->options;
3644 v->next_temp = original->next_temp;
3645 v->num_address_regs = original->num_address_regs;
3646 v->samplers_used = prog->SamplersUsed = original->samplers_used;
3647 v->indirect_addr_temps = original->indirect_addr_temps;
3648 v->indirect_addr_consts = original->indirect_addr_consts;
3649 memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
3650 v->num_immediates = original->num_immediates;
3651
3652 /*
3653 * Get initial pixel color from the texture.
3654 * TEX colorTemp, fragment.texcoord[0], texture[0], 2D;
3655 */
3656 coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
3657 src0 = v->get_temp(glsl_type::vec4_type);
3658 dst0 = st_dst_reg(src0);
3659 inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
3660 inst->sampler = 0;
3661 inst->tex_target = TEXTURE_2D_INDEX;
3662
3663 prog->InputsRead |= FRAG_BIT_TEX0;
3664 prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */
3665 v->samplers_used |= (1 << 0);
3666
3667 if (scale_and_bias) {
3668 static const gl_state_index scale_state[STATE_LENGTH] =
3669 { STATE_INTERNAL, STATE_PT_SCALE,
3670 (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
3671 static const gl_state_index bias_state[STATE_LENGTH] =
3672 { STATE_INTERNAL, STATE_PT_BIAS,
3673 (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
3674 GLint scale_p, bias_p;
3675 st_src_reg scale, bias;
3676
3677 scale_p = _mesa_add_state_reference(params, scale_state);
3678 bias_p = _mesa_add_state_reference(params, bias_state);
3679
3680 /* MAD colorTemp, colorTemp, scale, bias; */
3681 scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT);
3682 bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT);
3683 inst = v->emit(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias);
3684 }
3685
3686 if (pixel_maps) {
3687 st_src_reg temp = v->get_temp(glsl_type::vec4_type);
3688 st_dst_reg temp_dst = st_dst_reg(temp);
3689
3690 assert(st->pixel_xfer.pixelmap_texture);
3691
3692 /* With a little effort, we can do four pixel map look-ups with
3693 * two TEX instructions:
3694 */
3695
3696 /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */
3697 temp_dst.writemask = WRITEMASK_XY; /* write R,G */
3698 inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
3699 inst->sampler = 1;
3700 inst->tex_target = TEXTURE_2D_INDEX;
3701
3702 /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */
3703 src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
3704 temp_dst.writemask = WRITEMASK_ZW; /* write B,A */
3705 inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
3706 inst->sampler = 1;
3707 inst->tex_target = TEXTURE_2D_INDEX;
3708
3709 prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */
3710 v->samplers_used |= (1 << 1);
3711
3712 /* MOV colorTemp, temp; */
3713 inst = v->emit(NULL, TGSI_OPCODE_MOV, dst0, temp);
3714 }
3715
3716 /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
3717 * new visitor. */
3718 foreach_iter(exec_list_iterator, iter, original->instructions) {
3719 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3720 glsl_to_tgsi_instruction *newinst;
3721 st_src_reg src_regs[3];
3722
3723 if (inst->dst.file == PROGRAM_OUTPUT)
3724 prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index);
3725
3726 for (int i=0; i<3; i++) {
3727 src_regs[i] = inst->src[i];
3728 if (src_regs[i].file == PROGRAM_INPUT &&
3729 src_regs[i].index == FRAG_ATTRIB_COL0)
3730 {
3731 src_regs[i].file = PROGRAM_TEMPORARY;
3732 src_regs[i].index = src0.index;
3733 }
3734 else if (src_regs[i].file == PROGRAM_INPUT)
3735 prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index);
3736 }
3737
3738 newinst = v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
3739 newinst->tex_target = inst->tex_target;
3740 }
3741
3742 /* Make modifications to fragment program info. */
3743 prog->Parameters = _mesa_combine_parameter_lists(params,
3744 original->prog->Parameters);
3745 _mesa_free_parameter_list(params);
3746 count_resources(v, prog);
3747 fp->glsl_to_tgsi = v;
3748 }
3749
3750 /**
3751 * Make fragment program for glBitmap:
3752 * Sample the texture and kill the fragment if the bit is 0.
3753 * This program will be combined with the user's fragment program.
3754 *
3755 * Based on make_bitmap_fragment_program in st_cb_bitmap.c.
3756 */
3757 extern "C" void
3758 get_bitmap_visitor(struct st_fragment_program *fp,
3759 glsl_to_tgsi_visitor *original, int samplerIndex)
3760 {
3761 glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
3762 struct st_context *st = st_context(original->ctx);
3763 struct gl_program *prog = &fp->Base.Base;
3764 st_src_reg coord, src0;
3765 st_dst_reg dst0;
3766 glsl_to_tgsi_instruction *inst;
3767
3768 /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
3769 v->ctx = original->ctx;
3770 v->prog = prog;
3771 v->shader_program = NULL;
3772 v->glsl_version = original->glsl_version;
3773 v->native_integers = original->native_integers;
3774 v->options = original->options;
3775 v->next_temp = original->next_temp;
3776 v->num_address_regs = original->num_address_regs;
3777 v->samplers_used = prog->SamplersUsed = original->samplers_used;
3778 v->indirect_addr_temps = original->indirect_addr_temps;
3779 v->indirect_addr_consts = original->indirect_addr_consts;
3780 memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
3781 v->num_immediates = original->num_immediates;
3782
3783 /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
3784 coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
3785 src0 = v->get_temp(glsl_type::vec4_type);
3786 dst0 = st_dst_reg(src0);
3787 inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
3788 inst->sampler = samplerIndex;
3789 inst->tex_target = TEXTURE_2D_INDEX;
3790
3791 prog->InputsRead |= FRAG_BIT_TEX0;
3792 prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */
3793 v->samplers_used |= (1 << samplerIndex);
3794
3795 /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */
3796 src0.negate = NEGATE_XYZW;
3797 if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM)
3798 src0.swizzle = SWIZZLE_XXXX;
3799 inst = v->emit(NULL, TGSI_OPCODE_KIL, undef_dst, src0);
3800
3801 /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
3802 * new visitor. */
3803 foreach_iter(exec_list_iterator, iter, original->instructions) {
3804 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3805 glsl_to_tgsi_instruction *newinst;
3806 st_src_reg src_regs[3];
3807
3808 if (inst->dst.file == PROGRAM_OUTPUT)
3809 prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index);
3810
3811 for (int i=0; i<3; i++) {
3812 src_regs[i] = inst->src[i];
3813 if (src_regs[i].file == PROGRAM_INPUT)
3814 prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index);
3815 }
3816
3817 newinst = v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
3818 newinst->tex_target = inst->tex_target;
3819 }
3820
3821 /* Make modifications to fragment program info. */
3822 prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters);
3823 count_resources(v, prog);
3824 fp->glsl_to_tgsi = v;
3825 }
3826
3827 /* ------------------------- TGSI conversion stuff -------------------------- */
3828 struct label {
3829 unsigned branch_target;
3830 unsigned token;
3831 };
3832
3833 /**
3834 * Intermediate state used during shader translation.
3835 */
3836 struct st_translate {
3837 struct ureg_program *ureg;
3838
3839 struct ureg_dst temps[MAX_TEMPS];
3840 struct ureg_src *constants;
3841 struct ureg_src *immediates;
3842 struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
3843 struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
3844 struct ureg_dst address[1];
3845 struct ureg_src samplers[PIPE_MAX_SAMPLERS];
3846 struct ureg_src systemValues[SYSTEM_VALUE_MAX];
3847
3848 const GLuint *inputMapping;
3849 const GLuint *outputMapping;
3850
3851 /* For every instruction that contains a label (eg CALL), keep
3852 * details so that we can go back afterwards and emit the correct
3853 * tgsi instruction number for each label.
3854 */
3855 struct label *labels;
3856 unsigned labels_size;
3857 unsigned labels_count;
3858
3859 /* Keep a record of the tgsi instruction number that each mesa
3860 * instruction starts at, will be used to fix up labels after
3861 * translation.
3862 */
3863 unsigned *insn;
3864 unsigned insn_size;
3865 unsigned insn_count;
3866
3867 unsigned procType; /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */
3868
3869 boolean error;
3870 };
3871
3872 /** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */
3873 static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
3874 TGSI_SEMANTIC_FACE,
3875 TGSI_SEMANTIC_VERTEXID,
3876 TGSI_SEMANTIC_INSTANCEID
3877 };
3878
3879 /**
3880 * Make note of a branch to a label in the TGSI code.
3881 * After we've emitted all instructions, we'll go over the list
3882 * of labels built here and patch the TGSI code with the actual
3883 * location of each label.
3884 */
3885 static unsigned *get_label(struct st_translate *t, unsigned branch_target)
3886 {
3887 unsigned i;
3888
3889 if (t->labels_count + 1 >= t->labels_size) {
3890 t->labels_size = 1 << (util_logbase2(t->labels_size) + 1);
3891 t->labels = (struct label *)realloc(t->labels,
3892 t->labels_size * sizeof(struct label));
3893 if (t->labels == NULL) {
3894 static unsigned dummy;
3895 t->error = TRUE;
3896 return &dummy;
3897 }
3898 }
3899
3900 i = t->labels_count++;
3901 t->labels[i].branch_target = branch_target;
3902 return &t->labels[i].token;
3903 }
3904
3905 /**
3906 * Called prior to emitting the TGSI code for each instruction.
3907 * Allocate additional space for instructions if needed.
3908 * Update the insn[] array so the next glsl_to_tgsi_instruction points to
3909 * the next TGSI instruction.
3910 */
3911 static void set_insn_start(struct st_translate *t, unsigned start)
3912 {
3913 if (t->insn_count + 1 >= t->insn_size) {
3914 t->insn_size = 1 << (util_logbase2(t->insn_size) + 1);
3915 t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof(t->insn[0]));
3916 if (t->insn == NULL) {
3917 t->error = TRUE;
3918 return;
3919 }
3920 }
3921
3922 t->insn[t->insn_count++] = start;
3923 }
3924
3925 /**
3926 * Map a glsl_to_tgsi constant/immediate to a TGSI immediate.
3927 */
3928 static struct ureg_src
3929 emit_immediate(struct st_translate *t,
3930 gl_constant_value values[4],
3931 int type, int size)
3932 {
3933 struct ureg_program *ureg = t->ureg;
3934
3935 switch(type)
3936 {
3937 case GL_FLOAT:
3938 return ureg_DECL_immediate(ureg, &values[0].f, size);
3939 case GL_INT:
3940 return ureg_DECL_immediate_int(ureg, &values[0].i, size);
3941 case GL_UNSIGNED_INT:
3942 case GL_BOOL:
3943 return ureg_DECL_immediate_uint(ureg, &values[0].u, size);
3944 default:
3945 assert(!"should not get here - type must be float, int, uint, or bool");
3946 return ureg_src_undef();
3947 }
3948 }
3949
3950 /**
3951 * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register.
3952 */
3953 static struct ureg_dst
3954 dst_register(struct st_translate *t,
3955 gl_register_file file,
3956 GLuint index)
3957 {
3958 switch(file) {
3959 case PROGRAM_UNDEFINED:
3960 return ureg_dst_undef();
3961
3962 case PROGRAM_TEMPORARY:
3963 if (ureg_dst_is_undef(t->temps[index]))
3964 t->temps[index] = ureg_DECL_local_temporary(t->ureg);
3965
3966 return t->temps[index];
3967
3968 case PROGRAM_OUTPUT:
3969 if (t->procType == TGSI_PROCESSOR_VERTEX)
3970 assert(index < VERT_RESULT_MAX);
3971 else if (t->procType == TGSI_PROCESSOR_FRAGMENT)
3972 assert(index < FRAG_RESULT_MAX);
3973 else
3974 assert(index < GEOM_RESULT_MAX);
3975
3976 assert(t->outputMapping[index] < Elements(t->outputs));
3977
3978 return t->outputs[t->outputMapping[index]];
3979
3980 case PROGRAM_ADDRESS:
3981 return t->address[index];
3982
3983 default:
3984 assert(!"unknown dst register file");
3985 return ureg_dst_undef();
3986 }
3987 }
3988
3989 /**
3990 * Map a glsl_to_tgsi src register to a TGSI ureg_src register.
3991 */
3992 static struct ureg_src
3993 src_register(struct st_translate *t,
3994 gl_register_file file,
3995 GLuint index)
3996 {
3997 switch(file) {
3998 case PROGRAM_UNDEFINED:
3999 return ureg_src_undef();
4000
4001 case PROGRAM_TEMPORARY:
4002 assert(index >= 0);
4003 assert(index < Elements(t->temps));
4004 if (ureg_dst_is_undef(t->temps[index]))
4005 t->temps[index] = ureg_DECL_local_temporary(t->ureg);
4006 return ureg_src(t->temps[index]);
4007
4008 case PROGRAM_NAMED_PARAM:
4009 case PROGRAM_ENV_PARAM:
4010 case PROGRAM_LOCAL_PARAM:
4011 case PROGRAM_UNIFORM:
4012 assert(index >= 0);
4013 return t->constants[index];
4014 case PROGRAM_STATE_VAR:
4015 case PROGRAM_CONSTANT: /* ie, immediate */
4016 if (index < 0)
4017 return ureg_DECL_constant(t->ureg, 0);
4018 else
4019 return t->constants[index];
4020
4021 case PROGRAM_IMMEDIATE:
4022 return t->immediates[index];
4023
4024 case PROGRAM_INPUT:
4025 assert(t->inputMapping[index] < Elements(t->inputs));
4026 return t->inputs[t->inputMapping[index]];
4027
4028 case PROGRAM_OUTPUT:
4029 assert(t->outputMapping[index] < Elements(t->outputs));
4030 return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */
4031
4032 case PROGRAM_ADDRESS:
4033 return ureg_src(t->address[index]);
4034
4035 case PROGRAM_SYSTEM_VALUE:
4036 assert(index < Elements(t->systemValues));
4037 return t->systemValues[index];
4038
4039 default:
4040 assert(!"unknown src register file");
4041 return ureg_src_undef();
4042 }
4043 }
4044
4045 /**
4046 * Create a TGSI ureg_dst register from an st_dst_reg.
4047 */
4048 static struct ureg_dst
4049 translate_dst(struct st_translate *t,
4050 const st_dst_reg *dst_reg,
4051 bool saturate, bool clamp_color)
4052 {
4053 struct ureg_dst dst = dst_register(t,
4054 dst_reg->file,
4055 dst_reg->index);
4056
4057 dst = ureg_writemask(dst, dst_reg->writemask);
4058
4059 if (saturate)
4060 dst = ureg_saturate(dst);
4061 else if (clamp_color && dst_reg->file == PROGRAM_OUTPUT) {
4062 /* Clamp colors for ARB_color_buffer_float. */
4063 switch (t->procType) {
4064 case TGSI_PROCESSOR_VERTEX:
4065 /* XXX if the geometry shader is present, this must be done there
4066 * instead of here. */
4067 if (dst_reg->index == VERT_RESULT_COL0 ||
4068 dst_reg->index == VERT_RESULT_COL1 ||
4069 dst_reg->index == VERT_RESULT_BFC0 ||
4070 dst_reg->index == VERT_RESULT_BFC1) {
4071 dst = ureg_saturate(dst);
4072 }
4073 break;
4074
4075 case TGSI_PROCESSOR_FRAGMENT:
4076 if (dst_reg->index >= FRAG_RESULT_COLOR) {
4077 dst = ureg_saturate(dst);
4078 }
4079 break;
4080 }
4081 }
4082
4083 if (dst_reg->reladdr != NULL)
4084 dst = ureg_dst_indirect(dst, ureg_src(t->address[0]));
4085
4086 return dst;
4087 }
4088
4089 /**
4090 * Create a TGSI ureg_src register from an st_src_reg.
4091 */
4092 static struct ureg_src
4093 translate_src(struct st_translate *t, const st_src_reg *src_reg)
4094 {
4095 struct ureg_src src = src_register(t, src_reg->file, src_reg->index);
4096
4097 src = ureg_swizzle(src,
4098 GET_SWZ(src_reg->swizzle, 0) & 0x3,
4099 GET_SWZ(src_reg->swizzle, 1) & 0x3,
4100 GET_SWZ(src_reg->swizzle, 2) & 0x3,
4101 GET_SWZ(src_reg->swizzle, 3) & 0x3);
4102
4103 if ((src_reg->negate & 0xf) == NEGATE_XYZW)
4104 src = ureg_negate(src);
4105
4106 if (src_reg->reladdr != NULL) {
4107 /* Normally ureg_src_indirect() would be used here, but a stupid compiler
4108 * bug in g++ makes ureg_src_indirect (an inline C function) erroneously
4109 * set the bit for src.Negate. So we have to do the operation manually
4110 * here to work around the compiler's problems. */
4111 /*src = ureg_src_indirect(src, ureg_src(t->address[0]));*/
4112 struct ureg_src addr = ureg_src(t->address[0]);
4113 src.Indirect = 1;
4114 src.IndirectFile = addr.File;
4115 src.IndirectIndex = addr.Index;
4116 src.IndirectSwizzle = addr.SwizzleX;
4117
4118 if (src_reg->file != PROGRAM_INPUT &&
4119 src_reg->file != PROGRAM_OUTPUT) {
4120 /* If src_reg->index was negative, it was set to zero in
4121 * src_register(). Reassign it now. But don't do this
4122 * for input/output regs since they get remapped while
4123 * const buffers don't.
4124 */
4125 src.Index = src_reg->index;
4126 }
4127 }
4128
4129 return src;
4130 }
4131
4132 static struct tgsi_texture_offset
4133 translate_tex_offset(struct st_translate *t,
4134 const struct tgsi_texture_offset *in_offset)
4135 {
4136 struct tgsi_texture_offset offset;
4137
4138 assert(in_offset->File == PROGRAM_IMMEDIATE);
4139
4140 offset.File = TGSI_FILE_IMMEDIATE;
4141 offset.Index = in_offset->Index;
4142 offset.SwizzleX = in_offset->SwizzleX;
4143 offset.SwizzleY = in_offset->SwizzleY;
4144 offset.SwizzleZ = in_offset->SwizzleZ;
4145
4146 return offset;
4147 }
4148
4149 static void
4150 compile_tgsi_instruction(struct st_translate *t,
4151 const glsl_to_tgsi_instruction *inst,
4152 bool clamp_dst_color_output)
4153 {
4154 struct ureg_program *ureg = t->ureg;
4155 GLuint i;
4156 struct ureg_dst dst[1];
4157 struct ureg_src src[4];
4158 struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET];
4159
4160 unsigned num_dst;
4161 unsigned num_src;
4162
4163 num_dst = num_inst_dst_regs(inst->op);
4164 num_src = num_inst_src_regs(inst->op);
4165
4166 if (num_dst)
4167 dst[0] = translate_dst(t,
4168 &inst->dst,
4169 inst->saturate,
4170 clamp_dst_color_output);
4171
4172 for (i = 0; i < num_src; i++)
4173 src[i] = translate_src(t, &inst->src[i]);
4174
4175 switch(inst->op) {
4176 case TGSI_OPCODE_BGNLOOP:
4177 case TGSI_OPCODE_CAL:
4178 case TGSI_OPCODE_ELSE:
4179 case TGSI_OPCODE_ENDLOOP:
4180 case TGSI_OPCODE_IF:
4181 assert(num_dst == 0);
4182 ureg_label_insn(ureg,
4183 inst->op,
4184 src, num_src,
4185 get_label(t,
4186 inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0));
4187 return;
4188
4189 case TGSI_OPCODE_TEX:
4190 case TGSI_OPCODE_TXB:
4191 case TGSI_OPCODE_TXD:
4192 case TGSI_OPCODE_TXL:
4193 case TGSI_OPCODE_TXP:
4194 case TGSI_OPCODE_TXQ:
4195 case TGSI_OPCODE_TXF:
4196 src[num_src++] = t->samplers[inst->sampler];
4197 for (i = 0; i < inst->tex_offset_num_offset; i++) {
4198 texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i]);
4199 }
4200 ureg_tex_insn(ureg,
4201 inst->op,
4202 dst, num_dst,
4203 st_translate_texture_target(inst->tex_target, inst->tex_shadow),
4204 texoffsets, inst->tex_offset_num_offset,
4205 src, num_src);
4206 return;
4207
4208 case TGSI_OPCODE_SCS:
4209 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY);
4210 ureg_insn(ureg, inst->op, dst, num_dst, src, num_src);
4211 break;
4212
4213 default:
4214 ureg_insn(ureg,
4215 inst->op,
4216 dst, num_dst,
4217 src, num_src);
4218 break;
4219 }
4220 }
4221
4222 /**
4223 * Emit the TGSI instructions for inverting and adjusting WPOS.
4224 * This code is unavoidable because it also depends on whether
4225 * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM).
4226 */
4227 static void
4228 emit_wpos_adjustment( struct st_translate *t,
4229 const struct gl_program *program,
4230 boolean invert,
4231 GLfloat adjX, GLfloat adjY[2])
4232 {
4233 struct ureg_program *ureg = t->ureg;
4234
4235 /* Fragment program uses fragment position input.
4236 * Need to replace instances of INPUT[WPOS] with temp T
4237 * where T = INPUT[WPOS] by y is inverted.
4238 */
4239 static const gl_state_index wposTransformState[STATE_LENGTH]
4240 = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM,
4241 (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 };
4242
4243 /* XXX: note we are modifying the incoming shader here! Need to
4244 * do this before emitting the constant decls below, or this
4245 * will be missed:
4246 */
4247 unsigned wposTransConst = _mesa_add_state_reference(program->Parameters,
4248 wposTransformState);
4249
4250 struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst );
4251 struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg );
4252 struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
4253
4254 /* First, apply the coordinate shift: */
4255 if (adjX || adjY[0] || adjY[1]) {
4256 if (adjY[0] != adjY[1]) {
4257 /* Adjust the y coordinate by adjY[1] or adjY[0] respectively
4258 * depending on whether inversion is actually going to be applied
4259 * or not, which is determined by testing against the inversion
4260 * state variable used below, which will be either +1 or -1.
4261 */
4262 struct ureg_dst adj_temp = ureg_DECL_local_temporary(ureg);
4263
4264 ureg_CMP(ureg, adj_temp,
4265 ureg_scalar(wpostrans, invert ? 2 : 0),
4266 ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f),
4267 ureg_imm4f(ureg, adjX, adjY[1], 0.0f, 0.0f));
4268 ureg_ADD(ureg, wpos_temp, wpos_input, ureg_src(adj_temp));
4269 } else {
4270 ureg_ADD(ureg, wpos_temp, wpos_input,
4271 ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f));
4272 }
4273 wpos_input = ureg_src(wpos_temp);
4274 } else {
4275 /* MOV wpos_temp, input[wpos]
4276 */
4277 ureg_MOV( ureg, wpos_temp, wpos_input );
4278 }
4279
4280 /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be
4281 * inversion/identity, or the other way around if we're drawing to an FBO.
4282 */
4283 if (invert) {
4284 /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
4285 */
4286 ureg_MAD( ureg,
4287 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
4288 wpos_input,
4289 ureg_scalar(wpostrans, 0),
4290 ureg_scalar(wpostrans, 1));
4291 } else {
4292 /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
4293 */
4294 ureg_MAD( ureg,
4295 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
4296 wpos_input,
4297 ureg_scalar(wpostrans, 2),
4298 ureg_scalar(wpostrans, 3));
4299 }
4300
4301 /* Use wpos_temp as position input from here on:
4302 */
4303 t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
4304 }
4305
4306
4307 /**
4308 * Emit fragment position/ooordinate code.
4309 */
4310 static void
4311 emit_wpos(struct st_context *st,
4312 struct st_translate *t,
4313 const struct gl_program *program,
4314 struct ureg_program *ureg)
4315 {
4316 const struct gl_fragment_program *fp =
4317 (const struct gl_fragment_program *) program;
4318 struct pipe_screen *pscreen = st->pipe->screen;
4319 GLfloat adjX = 0.0f;
4320 GLfloat adjY[2] = { 0.0f, 0.0f };
4321 boolean invert = FALSE;
4322
4323 /* Query the pixel center conventions supported by the pipe driver and set
4324 * adjX, adjY to help out if it cannot handle the requested one internally.
4325 *
4326 * The bias of the y-coordinate depends on whether y-inversion takes place
4327 * (adjY[1]) or not (adjY[0]), which is in turn dependent on whether we are
4328 * drawing to an FBO (causes additional inversion), and whether the the pipe
4329 * driver origin and the requested origin differ (the latter condition is
4330 * stored in the 'invert' variable).
4331 *
4332 * For height = 100 (i = integer, h = half-integer, l = lower, u = upper):
4333 *
4334 * center shift only:
4335 * i -> h: +0.5
4336 * h -> i: -0.5
4337 *
4338 * inversion only:
4339 * l,i -> u,i: ( 0.0 + 1.0) * -1 + 100 = 99
4340 * l,h -> u,h: ( 0.5 + 0.0) * -1 + 100 = 99.5
4341 * u,i -> l,i: (99.0 + 1.0) * -1 + 100 = 0
4342 * u,h -> l,h: (99.5 + 0.0) * -1 + 100 = 0.5
4343 *
4344 * inversion and center shift:
4345 * l,i -> u,h: ( 0.0 + 0.5) * -1 + 100 = 99.5
4346 * l,h -> u,i: ( 0.5 + 0.5) * -1 + 100 = 99
4347 * u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5
4348 * u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0
4349 */
4350 if (fp->OriginUpperLeft) {
4351 /* Fragment shader wants origin in upper-left */
4352 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
4353 /* the driver supports upper-left origin */
4354 }
4355 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) {
4356 /* the driver supports lower-left origin, need to invert Y */
4357 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
4358 invert = TRUE;
4359 }
4360 else
4361 assert(0);
4362 }
4363 else {
4364 /* Fragment shader wants origin in lower-left */
4365 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT))
4366 /* the driver supports lower-left origin */
4367 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
4368 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT))
4369 /* the driver supports upper-left origin, need to invert Y */
4370 invert = TRUE;
4371 else
4372 assert(0);
4373 }
4374
4375 if (fp->PixelCenterInteger) {
4376 /* Fragment shader wants pixel center integer */
4377 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
4378 /* the driver supports pixel center integer */
4379 adjY[1] = 1.0f;
4380 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
4381 }
4382 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
4383 /* the driver supports pixel center half integer, need to bias X,Y */
4384 adjX = -0.5f;
4385 adjY[0] = -0.5f;
4386 adjY[1] = 0.5f;
4387 }
4388 else
4389 assert(0);
4390 }
4391 else {
4392 /* Fragment shader wants pixel center half integer */
4393 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
4394 /* the driver supports pixel center half integer */
4395 }
4396 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
4397 /* the driver supports pixel center integer, need to bias X,Y */
4398 adjX = adjY[0] = adjY[1] = 0.5f;
4399 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
4400 }
4401 else
4402 assert(0);
4403 }
4404
4405 /* we invert after adjustment so that we avoid the MOV to temporary,
4406 * and reuse the adjustment ADD instead */
4407 emit_wpos_adjustment(t, program, invert, adjX, adjY);
4408 }
4409
4410 /**
4411 * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back.
4412 * TGSI uses +1 for front, -1 for back.
4413 * This function converts the TGSI value to the GL value. Simply clamping/
4414 * saturating the value to [0,1] does the job.
4415 */
4416 static void
4417 emit_face_var(struct st_translate *t)
4418 {
4419 struct ureg_program *ureg = t->ureg;
4420 struct ureg_dst face_temp = ureg_DECL_temporary(ureg);
4421 struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]];
4422
4423 /* MOV_SAT face_temp, input[face] */
4424 face_temp = ureg_saturate(face_temp);
4425 ureg_MOV(ureg, face_temp, face_input);
4426
4427 /* Use face_temp as face input from here on: */
4428 t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp);
4429 }
4430
4431 static void
4432 emit_edgeflags(struct st_translate *t)
4433 {
4434 struct ureg_program *ureg = t->ureg;
4435 struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]];
4436 struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]];
4437
4438 ureg_MOV(ureg, edge_dst, edge_src);
4439 }
4440
4441 /**
4442 * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format.
4443 * \param program the program to translate
4444 * \param numInputs number of input registers used
4445 * \param inputMapping maps Mesa fragment program inputs to TGSI generic
4446 * input indexes
4447 * \param inputSemanticName the TGSI_SEMANTIC flag for each input
4448 * \param inputSemanticIndex the semantic index (ex: which texcoord) for
4449 * each input
4450 * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
4451 * \param numOutputs number of output registers used
4452 * \param outputMapping maps Mesa fragment program outputs to TGSI
4453 * generic outputs
4454 * \param outputSemanticName the TGSI_SEMANTIC flag for each output
4455 * \param outputSemanticIndex the semantic index (ex: which texcoord) for
4456 * each output
4457 *
4458 * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY
4459 */
4460 extern "C" enum pipe_error
4461 st_translate_program(
4462 struct gl_context *ctx,
4463 uint procType,
4464 struct ureg_program *ureg,
4465 glsl_to_tgsi_visitor *program,
4466 const struct gl_program *proginfo,
4467 GLuint numInputs,
4468 const GLuint inputMapping[],
4469 const ubyte inputSemanticName[],
4470 const ubyte inputSemanticIndex[],
4471 const GLuint interpMode[],
4472 GLuint numOutputs,
4473 const GLuint outputMapping[],
4474 const ubyte outputSemanticName[],
4475 const ubyte outputSemanticIndex[],
4476 boolean passthrough_edgeflags,
4477 boolean clamp_color)
4478 {
4479 struct st_translate *t;
4480 unsigned i;
4481 enum pipe_error ret = PIPE_OK;
4482
4483 assert(numInputs <= Elements(t->inputs));
4484 assert(numOutputs <= Elements(t->outputs));
4485
4486 t = CALLOC_STRUCT(st_translate);
4487 if (!t) {
4488 ret = PIPE_ERROR_OUT_OF_MEMORY;
4489 goto out;
4490 }
4491
4492 memset(t, 0, sizeof *t);
4493
4494 t->procType = procType;
4495 t->inputMapping = inputMapping;
4496 t->outputMapping = outputMapping;
4497 t->ureg = ureg;
4498
4499 if (program->shader_program) {
4500 for (i = 0; i < program->shader_program->NumUserUniformStorage; i++) {
4501 struct gl_uniform_storage *const storage =
4502 &program->shader_program->UniformStorage[i];
4503
4504 _mesa_uniform_detach_all_driver_storage(storage);
4505 }
4506 }
4507
4508 /*
4509 * Declare input attributes.
4510 */
4511 if (procType == TGSI_PROCESSOR_FRAGMENT) {
4512 for (i = 0; i < numInputs; i++) {
4513 t->inputs[i] = ureg_DECL_fs_input(ureg,
4514 inputSemanticName[i],
4515 inputSemanticIndex[i],
4516 interpMode[i]);
4517 }
4518
4519 if (proginfo->InputsRead & FRAG_BIT_WPOS) {
4520 /* Must do this after setting up t->inputs, and before
4521 * emitting constant references, below:
4522 */
4523 emit_wpos(st_context(ctx), t, proginfo, ureg);
4524 }
4525
4526 if (proginfo->InputsRead & FRAG_BIT_FACE)
4527 emit_face_var(t);
4528
4529 /*
4530 * Declare output attributes.
4531 */
4532 for (i = 0; i < numOutputs; i++) {
4533 switch (outputSemanticName[i]) {
4534 case TGSI_SEMANTIC_POSITION:
4535 t->outputs[i] = ureg_DECL_output(ureg,
4536 TGSI_SEMANTIC_POSITION, /* Z/Depth */
4537 outputSemanticIndex[i]);
4538 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z);
4539 break;
4540 case TGSI_SEMANTIC_STENCIL:
4541 t->outputs[i] = ureg_DECL_output(ureg,
4542 TGSI_SEMANTIC_STENCIL, /* Stencil */
4543 outputSemanticIndex[i]);
4544 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y);
4545 break;
4546 case TGSI_SEMANTIC_COLOR:
4547 t->outputs[i] = ureg_DECL_output(ureg,
4548 TGSI_SEMANTIC_COLOR,
4549 outputSemanticIndex[i]);
4550 break;
4551 default:
4552 assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR");
4553 ret = PIPE_ERROR_BAD_INPUT;
4554 goto out;
4555 }
4556 }
4557 }
4558 else if (procType == TGSI_PROCESSOR_GEOMETRY) {
4559 for (i = 0; i < numInputs; i++) {
4560 t->inputs[i] = ureg_DECL_gs_input(ureg,
4561 i,
4562 inputSemanticName[i],
4563 inputSemanticIndex[i]);
4564 }
4565
4566 for (i = 0; i < numOutputs; i++) {
4567 t->outputs[i] = ureg_DECL_output(ureg,
4568 outputSemanticName[i],
4569 outputSemanticIndex[i]);
4570 }
4571 }
4572 else {
4573 assert(procType == TGSI_PROCESSOR_VERTEX);
4574
4575 for (i = 0; i < numInputs; i++) {
4576 t->inputs[i] = ureg_DECL_vs_input(ureg, i);
4577 }
4578
4579 for (i = 0; i < numOutputs; i++) {
4580 if (outputSemanticName[i] == TGSI_SEMANTIC_CLIPDIST) {
4581 int mask = ((1 << (program->num_clip_distances - 4*outputSemanticIndex[i])) - 1) & TGSI_WRITEMASK_XYZW;
4582 t->outputs[i] = ureg_DECL_output_masked(ureg,
4583 outputSemanticName[i],
4584 outputSemanticIndex[i],
4585 mask);
4586 } else {
4587 t->outputs[i] = ureg_DECL_output(ureg,
4588 outputSemanticName[i],
4589 outputSemanticIndex[i]);
4590 }
4591 }
4592 if (passthrough_edgeflags)
4593 emit_edgeflags(t);
4594 }
4595
4596 /* Declare address register.
4597 */
4598 if (program->num_address_regs > 0) {
4599 assert(program->num_address_regs == 1);
4600 t->address[0] = ureg_DECL_address(ureg);
4601 }
4602
4603 /* Declare misc input registers
4604 */
4605 {
4606 GLbitfield sysInputs = proginfo->SystemValuesRead;
4607 unsigned numSys = 0;
4608 for (i = 0; sysInputs; i++) {
4609 if (sysInputs & (1 << i)) {
4610 unsigned semName = mesa_sysval_to_semantic[i];
4611 t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0);
4612 numSys++;
4613 sysInputs &= ~(1 << i);
4614 }
4615 }
4616 }
4617
4618 if (program->indirect_addr_temps) {
4619 /* If temps are accessed with indirect addressing, declare temporaries
4620 * in sequential order. Else, we declare them on demand elsewhere.
4621 * (Note: the number of temporaries is equal to program->next_temp)
4622 */
4623 for (i = 0; i < (unsigned)program->next_temp; i++) {
4624 /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */
4625 t->temps[i] = ureg_DECL_local_temporary(t->ureg);
4626 }
4627 }
4628
4629 /* Emit constants and uniforms. TGSI uses a single index space for these,
4630 * so we put all the translated regs in t->constants.
4631 */
4632 if (proginfo->Parameters) {
4633 t->constants = (struct ureg_src *)CALLOC(proginfo->Parameters->NumParameters * sizeof(t->constants[0]));
4634 if (t->constants == NULL) {
4635 ret = PIPE_ERROR_OUT_OF_MEMORY;
4636 goto out;
4637 }
4638
4639 for (i = 0; i < proginfo->Parameters->NumParameters; i++) {
4640 switch (proginfo->Parameters->Parameters[i].Type) {
4641 case PROGRAM_ENV_PARAM:
4642 case PROGRAM_LOCAL_PARAM:
4643 case PROGRAM_STATE_VAR:
4644 case PROGRAM_NAMED_PARAM:
4645 case PROGRAM_UNIFORM:
4646 t->constants[i] = ureg_DECL_constant(ureg, i);
4647 break;
4648
4649 /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect
4650 * addressing of the const buffer.
4651 * FIXME: Be smarter and recognize param arrays:
4652 * indirect addressing is only valid within the referenced
4653 * array.
4654 */
4655 case PROGRAM_CONSTANT:
4656 if (program->indirect_addr_consts)
4657 t->constants[i] = ureg_DECL_constant(ureg, i);
4658 else
4659 t->constants[i] = emit_immediate(t,
4660 proginfo->Parameters->ParameterValues[i],
4661 proginfo->Parameters->Parameters[i].DataType,
4662 4);
4663 break;
4664 default:
4665 break;
4666 }
4667 }
4668 }
4669
4670 /* Emit immediate values.
4671 */
4672 t->immediates = (struct ureg_src *)CALLOC(program->num_immediates * sizeof(struct ureg_src));
4673 if (t->immediates == NULL) {
4674 ret = PIPE_ERROR_OUT_OF_MEMORY;
4675 goto out;
4676 }
4677 i = 0;
4678 foreach_iter(exec_list_iterator, iter, program->immediates) {
4679 immediate_storage *imm = (immediate_storage *)iter.get();
4680 assert(i < program->num_immediates);
4681 t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size);
4682 }
4683 assert(i == program->num_immediates);
4684
4685 /* texture samplers */
4686 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
4687 if (program->samplers_used & (1 << i)) {
4688 t->samplers[i] = ureg_DECL_sampler(ureg, i);
4689 }
4690 }
4691
4692 /* Emit each instruction in turn:
4693 */
4694 foreach_iter(exec_list_iterator, iter, program->instructions) {
4695 set_insn_start(t, ureg_get_instruction_number(ureg));
4696 compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get(),
4697 clamp_color);
4698 }
4699
4700 /* Fix up all emitted labels:
4701 */
4702 for (i = 0; i < t->labels_count; i++) {
4703 ureg_fixup_label(ureg, t->labels[i].token,
4704 t->insn[t->labels[i].branch_target]);
4705 }
4706
4707 if (program->shader_program) {
4708 /* This has to be done last. Any operation the can cause
4709 * prog->ParameterValues to get reallocated (e.g., anything that adds a
4710 * program constant) has to happen before creating this linkage.
4711 */
4712 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
4713 if (program->shader_program->_LinkedShaders[i] == NULL)
4714 continue;
4715
4716 _mesa_associate_uniform_storage(ctx, program->shader_program,
4717 program->shader_program->_LinkedShaders[i]->Program->Parameters);
4718 }
4719 }
4720
4721 out:
4722 if (t) {
4723 FREE(t->insn);
4724 FREE(t->labels);
4725 FREE(t->constants);
4726 FREE(t->immediates);
4727
4728 if (t->error) {
4729 debug_printf("%s: translate error flag set\n", __FUNCTION__);
4730 }
4731
4732 FREE(t);
4733 }
4734
4735 return ret;
4736 }
4737 /* ----------------------------- End TGSI code ------------------------------ */
4738
4739 /**
4740 * Convert a shader's GLSL IR into a Mesa gl_program, although without
4741 * generating Mesa IR.
4742 */
4743 static struct gl_program *
4744 get_mesa_program(struct gl_context *ctx,
4745 struct gl_shader_program *shader_program,
4746 struct gl_shader *shader,
4747 int num_clip_distances)
4748 {
4749 glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor();
4750 struct gl_program *prog;
4751 GLenum target;
4752 const char *target_string;
4753 bool progress;
4754 struct gl_shader_compiler_options *options =
4755 &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)];
4756
4757 switch (shader->Type) {
4758 case GL_VERTEX_SHADER:
4759 target = GL_VERTEX_PROGRAM_ARB;
4760 target_string = "vertex";
4761 break;
4762 case GL_FRAGMENT_SHADER:
4763 target = GL_FRAGMENT_PROGRAM_ARB;
4764 target_string = "fragment";
4765 break;
4766 case GL_GEOMETRY_SHADER:
4767 target = GL_GEOMETRY_PROGRAM_NV;
4768 target_string = "geometry";
4769 break;
4770 default:
4771 assert(!"should not be reached");
4772 return NULL;
4773 }
4774
4775 validate_ir_tree(shader->ir);
4776
4777 prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
4778 if (!prog)
4779 return NULL;
4780 prog->Parameters = _mesa_new_parameter_list();
4781 v->ctx = ctx;
4782 v->prog = prog;
4783 v->shader_program = shader_program;
4784 v->options = options;
4785 v->glsl_version = ctx->Const.GLSLVersion;
4786 v->native_integers = ctx->Const.NativeIntegers;
4787 v->num_clip_distances = num_clip_distances;
4788
4789 _mesa_generate_parameters_list_for_uniforms(shader_program, shader,
4790 prog->Parameters);
4791
4792 /* Remove reads from output registers. */
4793 lower_output_reads(shader->ir);
4794
4795 /* Emit intermediate IR for main(). */
4796 visit_exec_list(shader->ir, v);
4797
4798 /* Now emit bodies for any functions that were used. */
4799 do {
4800 progress = GL_FALSE;
4801
4802 foreach_iter(exec_list_iterator, iter, v->function_signatures) {
4803 function_entry *entry = (function_entry *)iter.get();
4804
4805 if (!entry->bgn_inst) {
4806 v->current_function = entry;
4807
4808 entry->bgn_inst = v->emit(NULL, TGSI_OPCODE_BGNSUB);
4809 entry->bgn_inst->function = entry;
4810
4811 visit_exec_list(&entry->sig->body, v);
4812
4813 glsl_to_tgsi_instruction *last;
4814 last = (glsl_to_tgsi_instruction *)v->instructions.get_tail();
4815 if (last->op != TGSI_OPCODE_RET)
4816 v->emit(NULL, TGSI_OPCODE_RET);
4817
4818 glsl_to_tgsi_instruction *end;
4819 end = v->emit(NULL, TGSI_OPCODE_ENDSUB);
4820 end->function = entry;
4821
4822 progress = GL_TRUE;
4823 }
4824 }
4825 } while (progress);
4826
4827 #if 0
4828 /* Print out some information (for debugging purposes) used by the
4829 * optimization passes. */
4830 for (i=0; i < v->next_temp; i++) {
4831 int fr = v->get_first_temp_read(i);
4832 int fw = v->get_first_temp_write(i);
4833 int lr = v->get_last_temp_read(i);
4834 int lw = v->get_last_temp_write(i);
4835
4836 printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw);
4837 assert(fw <= fr);
4838 }
4839 #endif
4840
4841 /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
4842 v->simplify_cmp();
4843 v->copy_propagate();
4844 while (v->eliminate_dead_code_advanced());
4845
4846 /* FIXME: These passes to optimize temporary registers don't work when there
4847 * is indirect addressing of the temporary register space. We need proper
4848 * array support so that we don't have to give up these passes in every
4849 * shader that uses arrays.
4850 */
4851 if (!v->indirect_addr_temps) {
4852 v->eliminate_dead_code();
4853 v->merge_registers();
4854 v->renumber_registers();
4855 }
4856
4857 /* Write the END instruction. */
4858 v->emit(NULL, TGSI_OPCODE_END);
4859
4860 if (ctx->Shader.Flags & GLSL_DUMP) {
4861 printf("\n");
4862 printf("GLSL IR for linked %s program %d:\n", target_string,
4863 shader_program->Name);
4864 _mesa_print_ir(shader->ir, NULL);
4865 printf("\n");
4866 printf("\n");
4867 fflush(stdout);
4868 }
4869
4870 prog->Instructions = NULL;
4871 prog->NumInstructions = 0;
4872
4873 do_set_program_inouts(shader->ir, prog, shader->Type == GL_FRAGMENT_SHADER);
4874 count_resources(v, prog);
4875
4876 _mesa_reference_program(ctx, &shader->Program, prog);
4877
4878 /* This has to be done last. Any operation the can cause
4879 * prog->ParameterValues to get reallocated (e.g., anything that adds a
4880 * program constant) has to happen before creating this linkage.
4881 */
4882 _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters);
4883 if (!shader_program->LinkStatus) {
4884 return NULL;
4885 }
4886
4887 struct st_vertex_program *stvp;
4888 struct st_fragment_program *stfp;
4889 struct st_geometry_program *stgp;
4890
4891 switch (shader->Type) {
4892 case GL_VERTEX_SHADER:
4893 stvp = (struct st_vertex_program *)prog;
4894 stvp->glsl_to_tgsi = v;
4895 break;
4896 case GL_FRAGMENT_SHADER:
4897 stfp = (struct st_fragment_program *)prog;
4898 stfp->glsl_to_tgsi = v;
4899 break;
4900 case GL_GEOMETRY_SHADER:
4901 stgp = (struct st_geometry_program *)prog;
4902 stgp->glsl_to_tgsi = v;
4903 break;
4904 default:
4905 assert(!"should not be reached");
4906 return NULL;
4907 }
4908
4909 return prog;
4910 }
4911
4912 /**
4913 * Searches through the IR for a declaration of gl_ClipDistance and returns the
4914 * declared size of the gl_ClipDistance array. Returns 0 if gl_ClipDistance is
4915 * not declared in the IR.
4916 */
4917 int get_clip_distance_size(exec_list *ir)
4918 {
4919 foreach_iter (exec_list_iterator, iter, *ir) {
4920 ir_instruction *inst = (ir_instruction *)iter.get();
4921 ir_variable *var = inst->as_variable();
4922 if (var == NULL) continue;
4923 if (!strcmp(var->name, "gl_ClipDistance")) {
4924 return var->type->length;
4925 }
4926 }
4927
4928 return 0;
4929 }
4930
4931 extern "C" {
4932
4933 struct gl_shader *
4934 st_new_shader(struct gl_context *ctx, GLuint name, GLuint type)
4935 {
4936 struct gl_shader *shader;
4937 assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER ||
4938 type == GL_GEOMETRY_SHADER_ARB);
4939 shader = rzalloc(NULL, struct gl_shader);
4940 if (shader) {
4941 shader->Type = type;
4942 shader->Name = name;
4943 _mesa_init_shader(ctx, shader);
4944 }
4945 return shader;
4946 }
4947
4948 struct gl_shader_program *
4949 st_new_shader_program(struct gl_context *ctx, GLuint name)
4950 {
4951 struct gl_shader_program *shProg;
4952 shProg = rzalloc(NULL, struct gl_shader_program);
4953 if (shProg) {
4954 shProg->Name = name;
4955 _mesa_init_shader_program(ctx, shProg);
4956 }
4957 return shProg;
4958 }
4959
4960 /**
4961 * Link a shader.
4962 * Called via ctx->Driver.LinkShader()
4963 * This actually involves converting GLSL IR into an intermediate TGSI-like IR
4964 * with code lowering and other optimizations.
4965 */
4966 GLboolean
4967 st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
4968 {
4969 int num_clip_distances[MESA_SHADER_TYPES];
4970 assert(prog->LinkStatus);
4971
4972 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
4973 if (prog->_LinkedShaders[i] == NULL)
4974 continue;
4975
4976 bool progress;
4977 exec_list *ir = prog->_LinkedShaders[i]->ir;
4978 const struct gl_shader_compiler_options *options =
4979 &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)];
4980
4981 /* We have to determine the length of the gl_ClipDistance array before
4982 * the array is lowered to two vec4s by lower_clip_distance().
4983 */
4984 num_clip_distances[i] = get_clip_distance_size(ir);
4985
4986 do {
4987 unsigned what_to_lower = MOD_TO_FRACT | DIV_TO_MUL_RCP |
4988 EXP_TO_EXP2 | LOG_TO_LOG2;
4989 if (options->EmitNoPow)
4990 what_to_lower |= POW_TO_EXP2;
4991 if (!ctx->Const.NativeIntegers)
4992 what_to_lower |= INT_DIV_TO_MUL_RCP;
4993
4994 progress = false;
4995
4996 /* Lowering */
4997 do_mat_op_to_vec(ir);
4998 lower_instructions(ir, what_to_lower);
4999
5000 progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
5001
5002 progress = do_common_optimization(ir, true, true,
5003 options->MaxUnrollIterations)
5004 || progress;
5005
5006 progress = lower_quadop_vector(ir, false) || progress;
5007 progress = lower_clip_distance(ir) || progress;
5008
5009 if (options->MaxIfDepth == 0)
5010 progress = lower_discard(ir) || progress;
5011
5012 progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress;
5013
5014 if (options->EmitNoNoise)
5015 progress = lower_noise(ir) || progress;
5016
5017 /* If there are forms of indirect addressing that the driver
5018 * cannot handle, perform the lowering pass.
5019 */
5020 if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput
5021 || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform)
5022 progress =
5023 lower_variable_index_to_cond_assign(ir,
5024 options->EmitNoIndirectInput,
5025 options->EmitNoIndirectOutput,
5026 options->EmitNoIndirectTemp,
5027 options->EmitNoIndirectUniform)
5028 || progress;
5029
5030 progress = do_vec_index_to_cond_assign(ir) || progress;
5031 } while (progress);
5032
5033 validate_ir_tree(ir);
5034 }
5035
5036 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
5037 struct gl_program *linked_prog;
5038
5039 if (prog->_LinkedShaders[i] == NULL)
5040 continue;
5041
5042 linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i],
5043 num_clip_distances[i]);
5044
5045 if (linked_prog) {
5046 static const GLenum targets[] = {
5047 GL_VERTEX_PROGRAM_ARB,
5048 GL_FRAGMENT_PROGRAM_ARB,
5049 GL_GEOMETRY_PROGRAM_NV
5050 };
5051
5052 _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
5053 linked_prog);
5054 if (!ctx->Driver.ProgramStringNotify(ctx, targets[i], linked_prog)) {
5055 _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
5056 NULL);
5057 _mesa_reference_program(ctx, &linked_prog, NULL);
5058 return GL_FALSE;
5059 }
5060 }
5061
5062 _mesa_reference_program(ctx, &linked_prog, NULL);
5063 }
5064
5065 return GL_TRUE;
5066 }
5067
5068 void
5069 st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi,
5070 const GLuint outputMapping[],
5071 struct pipe_stream_output_info *so)
5072 {
5073 unsigned i;
5074 struct gl_transform_feedback_info *info =
5075 &glsl_to_tgsi->shader_program->LinkedTransformFeedback;
5076
5077 for (i = 0; i < info->NumOutputs; i++) {
5078 so->output[i].register_index =
5079 outputMapping[info->Outputs[i].OutputRegister];
5080 so->output[i].start_component = info->Outputs[i].ComponentOffset;
5081 so->output[i].num_components = info->Outputs[i].NumComponents;
5082 so->output[i].output_buffer = info->Outputs[i].OutputBuffer;
5083 so->output[i].dst_offset = info->Outputs[i].DstOffset;
5084 }
5085
5086 for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
5087 so->stride[i] = info->BufferStride[i];
5088 }
5089 so->num_outputs = info->NumOutputs;
5090 }
5091
5092 } /* extern "C" */