ir_to_mesa: Fix matrix * scalar multiplication.
[mesa.git] / src / mesa / shader / ir_to_mesa.cpp
1 /*
2 * Copyright (C) 2005-2007 Brian Paul All Rights Reserved.
3 * Copyright (C) 2008 VMware, Inc. All Rights Reserved.
4 * Copyright © 2010 Intel Corporation
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
24 */
25
26 /**
27 * \file ir_to_mesa.cpp
28 *
29 * Translates the IR to ARB_fragment_program text if possible,
30 * printing the result
31 */
32
33 #include <stdio.h>
34 #include "ir.h"
35 #include "ir_visitor.h"
36 #include "ir_print_visitor.h"
37 #include "ir_expression_flattening.h"
38 #include "glsl_types.h"
39 #include "glsl_parser_extras.h"
40 #include "../glsl/program.h"
41 #include "ir_optimization.h"
42 #include "ast.h"
43
44 extern "C" {
45 #include "main/mtypes.h"
46 #include "shader/prog_instruction.h"
47 #include "shader/prog_print.h"
48 #include "shader/program.h"
49 #include "shader/prog_uniform.h"
50 #include "shader/prog_parameter.h"
51 #include "shader/shader_api.h"
52 }
53
54 /**
55 * This struct is a corresponding struct to Mesa prog_src_register, with
56 * wider fields.
57 */
58 typedef struct ir_to_mesa_src_reg {
59 int file; /**< PROGRAM_* from Mesa */
60 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
61 GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
62 int negate; /**< NEGATE_XYZW mask from mesa */
63 bool reladdr; /**< Register index should be offset by address reg. */
64 } ir_to_mesa_src_reg;
65
66 typedef struct ir_to_mesa_dst_reg {
67 int file; /**< PROGRAM_* from Mesa */
68 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
69 int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
70 } ir_to_mesa_dst_reg;
71
72 extern ir_to_mesa_src_reg ir_to_mesa_undef;
73
74 class ir_to_mesa_instruction : public exec_node {
75 public:
76 enum prog_opcode op;
77 ir_to_mesa_dst_reg dst_reg;
78 ir_to_mesa_src_reg src_reg[3];
79 /** Pointer to the ir source this tree came from for debugging */
80 ir_instruction *ir;
81 };
82
83 class temp_entry : public exec_node {
84 public:
85 temp_entry(ir_variable *var, int file, int index)
86 : file(file), index(index), var(var)
87 {
88 /* empty */
89 }
90
91 int file;
92 int index;
93 ir_variable *var; /* variable that maps to this, if any */
94 };
95
96 class ir_to_mesa_visitor : public ir_visitor {
97 public:
98 ir_to_mesa_visitor();
99
100 GLcontext *ctx;
101 struct gl_program *prog;
102
103 int next_temp;
104
105 temp_entry *find_variable_storage(ir_variable *var);
106
107 ir_to_mesa_src_reg get_temp(const glsl_type *type);
108
109 struct ir_to_mesa_src_reg src_reg_for_float(float val);
110
111 /**
112 * \name Visit methods
113 *
114 * As typical for the visitor pattern, there must be one \c visit method for
115 * each concrete subclass of \c ir_instruction. Virtual base classes within
116 * the hierarchy should not have \c visit methods.
117 */
118 /*@{*/
119 virtual void visit(ir_variable *);
120 virtual void visit(ir_loop *);
121 virtual void visit(ir_loop_jump *);
122 virtual void visit(ir_function_signature *);
123 virtual void visit(ir_function *);
124 virtual void visit(ir_expression *);
125 virtual void visit(ir_swizzle *);
126 virtual void visit(ir_dereference_variable *);
127 virtual void visit(ir_dereference_array *);
128 virtual void visit(ir_dereference_record *);
129 virtual void visit(ir_assignment *);
130 virtual void visit(ir_constant *);
131 virtual void visit(ir_call *);
132 virtual void visit(ir_return *);
133 virtual void visit(ir_texture *);
134 virtual void visit(ir_if *);
135 /*@}*/
136
137 struct ir_to_mesa_src_reg result;
138
139 /** List of temp_entry */
140 exec_list variable_storage;
141
142 /** List of ir_to_mesa_instruction */
143 exec_list instructions;
144
145 ir_to_mesa_instruction *ir_to_mesa_emit_op1(ir_instruction *ir,
146 enum prog_opcode op,
147 ir_to_mesa_dst_reg dst,
148 ir_to_mesa_src_reg src0);
149
150 ir_to_mesa_instruction *ir_to_mesa_emit_op2(ir_instruction *ir,
151 enum prog_opcode op,
152 ir_to_mesa_dst_reg dst,
153 ir_to_mesa_src_reg src0,
154 ir_to_mesa_src_reg src1);
155
156 ir_to_mesa_instruction *ir_to_mesa_emit_op3(ir_instruction *ir,
157 enum prog_opcode op,
158 ir_to_mesa_dst_reg dst,
159 ir_to_mesa_src_reg src0,
160 ir_to_mesa_src_reg src1,
161 ir_to_mesa_src_reg src2);
162
163 void ir_to_mesa_emit_scalar_op1(ir_instruction *ir,
164 enum prog_opcode op,
165 ir_to_mesa_dst_reg dst,
166 ir_to_mesa_src_reg src0);
167
168 void *mem_ctx;
169 };
170
171 ir_to_mesa_src_reg ir_to_mesa_undef = {
172 PROGRAM_UNDEFINED, 0, SWIZZLE_NOOP, NEGATE_NONE, false,
173 };
174
175 ir_to_mesa_dst_reg ir_to_mesa_undef_dst = {
176 PROGRAM_UNDEFINED, 0, SWIZZLE_NOOP
177 };
178
179 ir_to_mesa_dst_reg ir_to_mesa_address_reg = {
180 PROGRAM_ADDRESS, 0, WRITEMASK_X
181 };
182
183 static int swizzle_for_size(int size)
184 {
185 int size_swizzles[4] = {
186 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
187 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
188 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
189 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
190 };
191
192 return size_swizzles[size - 1];
193 }
194
195 /* This list should match up with builtin_variables.h */
196 static const struct {
197 const char *name;
198 int file;
199 int index;
200 } builtin_var_to_mesa_reg[] = {
201 /* core_vs */
202 {"gl_Position", PROGRAM_OUTPUT, VERT_RESULT_HPOS},
203 {"gl_PointSize", PROGRAM_OUTPUT, VERT_RESULT_PSIZ},
204
205 /* core_fs */
206 {"gl_FragCoord", PROGRAM_INPUT, FRAG_ATTRIB_WPOS},
207 {"gl_FrontFacing", PROGRAM_INPUT, FRAG_ATTRIB_FACE},
208 {"gl_FragColor", PROGRAM_OUTPUT, FRAG_ATTRIB_COL0},
209 {"gl_FragDepth", PROGRAM_UNDEFINED, FRAG_ATTRIB_WPOS}, /* FINISHME: WPOS.z */
210
211 /* 110_deprecated_fs */
212 {"gl_Color", PROGRAM_INPUT, FRAG_ATTRIB_COL0},
213 {"gl_SecondaryColor", PROGRAM_INPUT, FRAG_ATTRIB_COL1},
214 {"gl_FogFragCoord", PROGRAM_INPUT, FRAG_ATTRIB_FOGC},
215 {"gl_TexCoord", PROGRAM_INPUT, FRAG_ATTRIB_TEX0}, /* array */
216
217 /* 110_deprecated_vs */
218 {"gl_Vertex", PROGRAM_INPUT, VERT_ATTRIB_POS},
219 {"gl_Normal", PROGRAM_INPUT, VERT_ATTRIB_NORMAL},
220 {"gl_Color", PROGRAM_INPUT, VERT_ATTRIB_COLOR0},
221 {"gl_SecondaryColor", PROGRAM_INPUT, VERT_ATTRIB_COLOR1},
222 {"gl_MultiTexCoord0", PROGRAM_INPUT, VERT_ATTRIB_TEX0},
223 {"gl_MultiTexCoord1", PROGRAM_INPUT, VERT_ATTRIB_TEX1},
224 {"gl_MultiTexCoord2", PROGRAM_INPUT, VERT_ATTRIB_TEX2},
225 {"gl_MultiTexCoord3", PROGRAM_INPUT, VERT_ATTRIB_TEX3},
226 {"gl_MultiTexCoord4", PROGRAM_INPUT, VERT_ATTRIB_TEX4},
227 {"gl_MultiTexCoord5", PROGRAM_INPUT, VERT_ATTRIB_TEX5},
228 {"gl_MultiTexCoord6", PROGRAM_INPUT, VERT_ATTRIB_TEX6},
229 {"gl_MultiTexCoord7", PROGRAM_INPUT, VERT_ATTRIB_TEX7},
230 {"gl_TexCoord", PROGRAM_OUTPUT, VERT_RESULT_TEX0}, /* array */
231 {"gl_FogCoord", PROGRAM_INPUT, VERT_RESULT_FOGC},
232 /*{"gl_ClipVertex", PROGRAM_OUTPUT, VERT_ATTRIB_FOGC},*/ /* FINISHME */
233 {"gl_FrontColor", PROGRAM_OUTPUT, VERT_RESULT_COL0},
234 {"gl_BackColor", PROGRAM_OUTPUT, VERT_RESULT_BFC0},
235 {"gl_FrontSecondaryColor", PROGRAM_OUTPUT, VERT_RESULT_COL1},
236 {"gl_BackSecondaryColor", PROGRAM_OUTPUT, VERT_RESULT_BFC1},
237 {"gl_FogFragCoord", PROGRAM_OUTPUT, VERT_RESULT_FOGC},
238
239 /* 130_vs */
240 /*{"gl_VertexID", PROGRAM_INPUT, VERT_ATTRIB_FOGC},*/ /* FINISHME */
241
242 {"gl_FragData", PROGRAM_OUTPUT, FRAG_RESULT_DATA0}, /* array */
243 };
244
245 ir_to_mesa_instruction *
246 ir_to_mesa_visitor::ir_to_mesa_emit_op3(ir_instruction *ir,
247 enum prog_opcode op,
248 ir_to_mesa_dst_reg dst,
249 ir_to_mesa_src_reg src0,
250 ir_to_mesa_src_reg src1,
251 ir_to_mesa_src_reg src2)
252 {
253 ir_to_mesa_instruction *inst = new(mem_ctx) ir_to_mesa_instruction();
254
255 inst->op = op;
256 inst->dst_reg = dst;
257 inst->src_reg[0] = src0;
258 inst->src_reg[1] = src1;
259 inst->src_reg[2] = src2;
260 inst->ir = ir;
261
262 this->instructions.push_tail(inst);
263
264 return inst;
265 }
266
267
268 ir_to_mesa_instruction *
269 ir_to_mesa_visitor::ir_to_mesa_emit_op2(ir_instruction *ir,
270 enum prog_opcode op,
271 ir_to_mesa_dst_reg dst,
272 ir_to_mesa_src_reg src0,
273 ir_to_mesa_src_reg src1)
274 {
275 return ir_to_mesa_emit_op3(ir, op, dst, src0, src1, ir_to_mesa_undef);
276 }
277
278 ir_to_mesa_instruction *
279 ir_to_mesa_visitor::ir_to_mesa_emit_op1(ir_instruction *ir,
280 enum prog_opcode op,
281 ir_to_mesa_dst_reg dst,
282 ir_to_mesa_src_reg src0)
283 {
284 return ir_to_mesa_emit_op3(ir, op, dst,
285 src0, ir_to_mesa_undef, ir_to_mesa_undef);
286 }
287
288 inline ir_to_mesa_dst_reg
289 ir_to_mesa_dst_reg_from_src(ir_to_mesa_src_reg reg)
290 {
291 ir_to_mesa_dst_reg dst_reg;
292
293 dst_reg.file = reg.file;
294 dst_reg.index = reg.index;
295 dst_reg.writemask = WRITEMASK_XYZW;
296
297 return dst_reg;
298 }
299
300 /**
301 * Emits Mesa scalar opcodes to produce unique answers across channels.
302 *
303 * Some Mesa opcodes are scalar-only, like ARB_fp/vp. The src X
304 * channel determines the result across all channels. So to do a vec4
305 * of this operation, we want to emit a scalar per source channel used
306 * to produce dest channels.
307 */
308 void
309 ir_to_mesa_visitor::ir_to_mesa_emit_scalar_op1(ir_instruction *ir,
310 enum prog_opcode op,
311 ir_to_mesa_dst_reg dst,
312 ir_to_mesa_src_reg src0)
313 {
314 int i, j;
315 int done_mask = ~dst.writemask;
316
317 /* Mesa RCP is a scalar operation splatting results to all channels,
318 * like ARB_fp/vp. So emit as many RCPs as necessary to cover our
319 * dst channels.
320 */
321 for (i = 0; i < 4; i++) {
322 GLuint this_mask = (1 << i);
323 ir_to_mesa_instruction *inst;
324 ir_to_mesa_src_reg src = src0;
325
326 if (done_mask & this_mask)
327 continue;
328
329 GLuint src_swiz = GET_SWZ(src.swizzle, i);
330 for (j = i + 1; j < 4; j++) {
331 if (!(done_mask & (1 << j)) && GET_SWZ(src.swizzle, j) == src_swiz) {
332 this_mask |= (1 << j);
333 }
334 }
335 src.swizzle = MAKE_SWIZZLE4(src_swiz, src_swiz,
336 src_swiz, src_swiz);
337
338 inst = ir_to_mesa_emit_op1(ir, op,
339 dst,
340 src);
341 inst->dst_reg.writemask = this_mask;
342 done_mask |= this_mask;
343 }
344 }
345
346 struct ir_to_mesa_src_reg
347 ir_to_mesa_visitor::src_reg_for_float(float val)
348 {
349 ir_to_mesa_src_reg src_reg;
350
351 src_reg.file = PROGRAM_CONSTANT;
352 src_reg.index = _mesa_add_unnamed_constant(this->prog->Parameters,
353 &val, 1, &src_reg.swizzle);
354
355 return src_reg;
356 }
357
358 /**
359 * In the initial pass of codegen, we assign temporary numbers to
360 * intermediate results. (not SSA -- variable assignments will reuse
361 * storage). Actual register allocation for the Mesa VM occurs in a
362 * pass over the Mesa IR later.
363 */
364 ir_to_mesa_src_reg
365 ir_to_mesa_visitor::get_temp(const glsl_type *type)
366 {
367 ir_to_mesa_src_reg src_reg;
368 int swizzle[4];
369 int i;
370
371 assert(!type->is_array());
372
373 src_reg.file = PROGRAM_TEMPORARY;
374 src_reg.index = type->matrix_columns;
375 src_reg.reladdr = false;
376
377 for (i = 0; i < type->vector_elements; i++)
378 swizzle[i] = i;
379 for (; i < 4; i++)
380 swizzle[i] = type->vector_elements - 1;
381 src_reg.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1],
382 swizzle[2], swizzle[3]);
383
384 return src_reg;
385 }
386
387 static int
388 type_size(const struct glsl_type *type)
389 {
390 unsigned int i;
391 int size;
392
393 switch (type->base_type) {
394 case GLSL_TYPE_UINT:
395 case GLSL_TYPE_INT:
396 case GLSL_TYPE_FLOAT:
397 case GLSL_TYPE_BOOL:
398 if (type->is_matrix()) {
399 return 4; /* FINISHME: Not all matrices are 4x4. */
400 } else {
401 /* Regardless of size of vector, it gets a vec4. This is bad
402 * packing for things like floats, but otherwise arrays become a
403 * mess. Hopefully a later pass over the code can pack scalars
404 * down if appropriate.
405 */
406 return 1;
407 }
408 case GLSL_TYPE_ARRAY:
409 return type_size(type->fields.array) * type->length;
410 case GLSL_TYPE_STRUCT:
411 size = 0;
412 for (i = 0; i < type->length; i++) {
413 size += type_size(type->fields.structure[i].type);
414 }
415 return size;
416 default:
417 assert(0);
418 }
419 }
420
421 temp_entry *
422 ir_to_mesa_visitor::find_variable_storage(ir_variable *var)
423 {
424
425 temp_entry *entry;
426
427 foreach_iter(exec_list_iterator, iter, this->variable_storage) {
428 entry = (temp_entry *)iter.get();
429
430 if (entry->var == var)
431 return entry;
432 }
433
434 return NULL;
435 }
436
437 void
438 ir_to_mesa_visitor::visit(ir_variable *ir)
439 {
440 (void)ir;
441 }
442
443 void
444 ir_to_mesa_visitor::visit(ir_loop *ir)
445 {
446 assert(!ir->from);
447 assert(!ir->to);
448 assert(!ir->increment);
449 assert(!ir->counter);
450
451 ir_to_mesa_emit_op1(NULL, OPCODE_BGNLOOP,
452 ir_to_mesa_undef_dst, ir_to_mesa_undef);
453
454 visit_exec_list(&ir->body_instructions, this);
455
456 ir_to_mesa_emit_op1(NULL, OPCODE_ENDLOOP,
457 ir_to_mesa_undef_dst, ir_to_mesa_undef);
458 }
459
460 void
461 ir_to_mesa_visitor::visit(ir_loop_jump *ir)
462 {
463 switch (ir->mode) {
464 case ir_loop_jump::jump_break:
465 ir_to_mesa_emit_op1(NULL, OPCODE_BRK,
466 ir_to_mesa_undef_dst, ir_to_mesa_undef);
467 break;
468 case ir_loop_jump::jump_continue:
469 ir_to_mesa_emit_op1(NULL, OPCODE_CONT,
470 ir_to_mesa_undef_dst, ir_to_mesa_undef);
471 break;
472 }
473 }
474
475
476 void
477 ir_to_mesa_visitor::visit(ir_function_signature *ir)
478 {
479 assert(0);
480 (void)ir;
481 }
482
483 void
484 ir_to_mesa_visitor::visit(ir_function *ir)
485 {
486 /* Ignore function bodies other than main() -- we shouldn't see calls to
487 * them since they should all be inlined before we get to ir_to_mesa.
488 */
489 if (strcmp(ir->name, "main") == 0) {
490 const ir_function_signature *sig;
491 exec_list empty;
492
493 sig = ir->matching_signature(&empty);
494
495 assert(sig);
496
497 foreach_iter(exec_list_iterator, iter, sig->body) {
498 ir_instruction *ir = (ir_instruction *)iter.get();
499
500 ir->accept(this);
501 }
502 }
503 }
504
505 void
506 ir_to_mesa_visitor::visit(ir_expression *ir)
507 {
508 unsigned int operand;
509 struct ir_to_mesa_src_reg op[2];
510 struct ir_to_mesa_src_reg result_src;
511 struct ir_to_mesa_dst_reg result_dst;
512 const glsl_type *vec4_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 4, 1);
513 const glsl_type *vec3_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 3, 1);
514 const glsl_type *vec2_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 2, 1);
515
516 for (operand = 0; operand < ir->get_num_operands(); operand++) {
517 this->result.file = PROGRAM_UNDEFINED;
518 ir->operands[operand]->accept(this);
519 if (this->result.file == PROGRAM_UNDEFINED) {
520 ir_print_visitor v;
521 printf("Failed to get tree for expression operand:\n");
522 ir->operands[operand]->accept(&v);
523 exit(1);
524 }
525 op[operand] = this->result;
526
527 /* Only expression implemented for matrices yet */
528 assert(!ir->operands[operand]->type->is_matrix() ||
529 ir->operation == ir_binop_mul);
530 }
531
532 this->result.file = PROGRAM_UNDEFINED;
533
534 /* Storage for our result. Ideally for an assignment we'd be using
535 * the actual storage for the result here, instead.
536 */
537 result_src = get_temp(ir->type);
538 /* convenience for the emit functions below. */
539 result_dst = ir_to_mesa_dst_reg_from_src(result_src);
540 /* Limit writes to the channels that will be used by result_src later.
541 * This does limit this temp's use as a temporary for multi-instruction
542 * sequences.
543 */
544 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
545
546 switch (ir->operation) {
547 case ir_unop_logic_not:
548 ir_to_mesa_emit_op2(ir, OPCODE_SEQ, result_dst,
549 op[0], src_reg_for_float(0.0));
550 break;
551 case ir_unop_neg:
552 op[0].negate = ~op[0].negate;
553 result_src = op[0];
554 break;
555 case ir_unop_exp:
556 ir_to_mesa_emit_scalar_op1(ir, OPCODE_EXP, result_dst, op[0]);
557 break;
558 case ir_unop_exp2:
559 ir_to_mesa_emit_scalar_op1(ir, OPCODE_EX2, result_dst, op[0]);
560 break;
561 case ir_unop_log:
562 ir_to_mesa_emit_scalar_op1(ir, OPCODE_LOG, result_dst, op[0]);
563 break;
564 case ir_unop_log2:
565 ir_to_mesa_emit_scalar_op1(ir, OPCODE_LG2, result_dst, op[0]);
566 break;
567 case ir_unop_sin:
568 ir_to_mesa_emit_scalar_op1(ir, OPCODE_SIN, result_dst, op[0]);
569 break;
570 case ir_unop_cos:
571 ir_to_mesa_emit_scalar_op1(ir, OPCODE_COS, result_dst, op[0]);
572 break;
573 case ir_binop_add:
574 ir_to_mesa_emit_op2(ir, OPCODE_ADD, result_dst, op[0], op[1]);
575 break;
576 case ir_binop_sub:
577 ir_to_mesa_emit_op2(ir, OPCODE_SUB, result_dst, op[0], op[1]);
578 break;
579 case ir_binop_mul:
580 if (ir->operands[0]->type->is_matrix() &&
581 !ir->operands[1]->type->is_matrix()) {
582 if (ir->operands[1]->type->is_scalar()) {
583 ir_to_mesa_dst_reg dst_column = result_dst;
584 ir_to_mesa_src_reg src_column = op[0];
585 for (int i = 0; i < ir->operands[0]->type->matrix_columns; i++) {
586 ir_to_mesa_emit_op2(ir, OPCODE_MUL,
587 dst_column, src_column, op[1]);
588 dst_column.index++;
589 src_column.index++;
590 }
591 } else {
592 ir_to_mesa_src_reg src_column = op[0];
593 ir_to_mesa_src_reg src_chan = op[1];
594 assert(!ir->operands[1]->type->is_matrix() ||
595 !"FINISHME: matrix * matrix");
596 for (int i = 0; i < ir->operands[0]->type->matrix_columns; i++) {
597 src_chan.swizzle = MAKE_SWIZZLE4(i, i, i, i);
598 if (i == 0) {
599 ir_to_mesa_emit_op2(ir, OPCODE_MUL,
600 result_dst, src_column, src_chan);
601 } else {
602 ir_to_mesa_emit_op3(ir, OPCODE_MAD,
603 result_dst, src_column, src_chan,
604 result_src);
605 }
606 src_column.index++;
607 }
608 }
609 } else {
610 assert(!ir->operands[0]->type->is_matrix());
611 assert(!ir->operands[1]->type->is_matrix());
612 ir_to_mesa_emit_op2(ir, OPCODE_MUL, result_dst, op[0], op[1]);
613 }
614 break;
615 case ir_binop_div:
616 ir_to_mesa_emit_scalar_op1(ir, OPCODE_RCP, result_dst, op[1]);
617 ir_to_mesa_emit_op2(ir, OPCODE_MUL, result_dst, op[0], result_src);
618 break;
619
620 case ir_binop_less:
621 ir_to_mesa_emit_op2(ir, OPCODE_SLT, result_dst, op[0], op[1]);
622 break;
623 case ir_binop_greater:
624 ir_to_mesa_emit_op2(ir, OPCODE_SGT, result_dst, op[0], op[1]);
625 break;
626 case ir_binop_lequal:
627 ir_to_mesa_emit_op2(ir, OPCODE_SLE, result_dst, op[0], op[1]);
628 break;
629 case ir_binop_gequal:
630 ir_to_mesa_emit_op2(ir, OPCODE_SGE, result_dst, op[0], op[1]);
631 break;
632 case ir_binop_equal:
633 ir_to_mesa_emit_op2(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
634 break;
635 case ir_binop_logic_xor:
636 case ir_binop_nequal:
637 ir_to_mesa_emit_op2(ir, OPCODE_SNE, result_dst, op[0], op[1]);
638 break;
639
640 case ir_binop_logic_or:
641 /* This could be a saturated add and skip the SNE. */
642 ir_to_mesa_emit_op2(ir, OPCODE_ADD,
643 result_dst,
644 op[0], op[1]);
645
646 ir_to_mesa_emit_op2(ir, OPCODE_SNE,
647 result_dst,
648 result_src, src_reg_for_float(0.0));
649 break;
650
651 case ir_binop_logic_and:
652 /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
653 ir_to_mesa_emit_op2(ir, OPCODE_MUL,
654 result_dst,
655 op[0], op[1]);
656 break;
657
658 case ir_binop_dot:
659 if (ir->operands[0]->type == vec4_type) {
660 assert(ir->operands[1]->type == vec4_type);
661 ir_to_mesa_emit_op2(ir, OPCODE_DP4,
662 result_dst,
663 op[0], op[1]);
664 } else if (ir->operands[0]->type == vec3_type) {
665 assert(ir->operands[1]->type == vec3_type);
666 ir_to_mesa_emit_op2(ir, OPCODE_DP3,
667 result_dst,
668 op[0], op[1]);
669 } else if (ir->operands[0]->type == vec2_type) {
670 assert(ir->operands[1]->type == vec2_type);
671 ir_to_mesa_emit_op2(ir, OPCODE_DP2,
672 result_dst,
673 op[0], op[1]);
674 }
675 break;
676 case ir_unop_sqrt:
677 ir_to_mesa_emit_scalar_op1(ir, OPCODE_RSQ, result_dst, op[0]);
678 ir_to_mesa_emit_op1(ir, OPCODE_RCP, result_dst, result_src);
679 break;
680 case ir_unop_rsq:
681 ir_to_mesa_emit_scalar_op1(ir, OPCODE_RSQ, result_dst, op[0]);
682 break;
683 case ir_unop_i2f:
684 /* Mesa IR lacks types, ints are stored as truncated floats. */
685 result_src = op[0];
686 break;
687 case ir_unop_f2i:
688 ir_to_mesa_emit_op1(ir, OPCODE_TRUNC, result_dst, op[0]);
689 break;
690 case ir_unop_f2b:
691 ir_to_mesa_emit_op2(ir, OPCODE_SNE, result_dst,
692 result_src, src_reg_for_float(0.0));
693 break;
694 case ir_unop_trunc:
695 ir_to_mesa_emit_op1(ir, OPCODE_TRUNC, result_dst, op[0]);
696 break;
697 case ir_unop_ceil:
698 op[0].negate = ~op[0].negate;
699 ir_to_mesa_emit_op1(ir, OPCODE_FLR, result_dst, op[0]);
700 result_src.negate = ~result_src.negate;
701 break;
702 case ir_unop_floor:
703 ir_to_mesa_emit_op1(ir, OPCODE_FLR, result_dst, op[0]);
704 break;
705 case ir_binop_min:
706 ir_to_mesa_emit_op2(ir, OPCODE_MIN, result_dst, op[0], op[1]);
707 break;
708 case ir_binop_max:
709 ir_to_mesa_emit_op2(ir, OPCODE_MAX, result_dst, op[0], op[1]);
710 break;
711 default:
712 ir_print_visitor v;
713 printf("Failed to get tree for expression:\n");
714 ir->accept(&v);
715 exit(1);
716 break;
717 }
718
719 this->result = result_src;
720 }
721
722
723 void
724 ir_to_mesa_visitor::visit(ir_swizzle *ir)
725 {
726 ir_to_mesa_src_reg src_reg;
727 int i;
728 int swizzle[4];
729
730 /* Note that this is only swizzles in expressions, not those on the left
731 * hand side of an assignment, which do write masking. See ir_assignment
732 * for that.
733 */
734
735 ir->val->accept(this);
736 src_reg = this->result;
737 assert(src_reg.file != PROGRAM_UNDEFINED);
738
739 for (i = 0; i < 4; i++) {
740 if (i < ir->type->vector_elements) {
741 switch (i) {
742 case 0:
743 swizzle[i] = ir->mask.x;
744 break;
745 case 1:
746 swizzle[i] = ir->mask.y;
747 break;
748 case 2:
749 swizzle[i] = ir->mask.z;
750 break;
751 case 3:
752 swizzle[i] = ir->mask.w;
753 break;
754 }
755 } else {
756 /* If the type is smaller than a vec4, replicate the last
757 * channel out.
758 */
759 swizzle[i] = ir->type->vector_elements - 1;
760 }
761 }
762
763 src_reg.swizzle = MAKE_SWIZZLE4(swizzle[0],
764 swizzle[1],
765 swizzle[2],
766 swizzle[3]);
767
768 this->result = src_reg;
769 }
770
771 static temp_entry *
772 get_builtin_matrix_ref(void *mem_ctx, struct gl_program *prog, ir_variable *var)
773 {
774 /*
775 * NOTE: The ARB_vertex_program extension specified that matrices get
776 * loaded in registers in row-major order. With GLSL, we want column-
777 * major order. So, we need to transpose all matrices here...
778 */
779 static const struct {
780 const char *name;
781 int matrix;
782 int modifier;
783 } matrices[] = {
784 { "gl_ModelViewMatrix", STATE_MODELVIEW_MATRIX, STATE_MATRIX_TRANSPOSE },
785 { "gl_ModelViewMatrixInverse", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVTRANS },
786 { "gl_ModelViewMatrixTranspose", STATE_MODELVIEW_MATRIX, 0 },
787 { "gl_ModelViewMatrixInverseTranspose", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVERSE },
788
789 { "gl_ProjectionMatrix", STATE_PROJECTION_MATRIX, STATE_MATRIX_TRANSPOSE },
790 { "gl_ProjectionMatrixInverse", STATE_PROJECTION_MATRIX, STATE_MATRIX_INVTRANS },
791 { "gl_ProjectionMatrixTranspose", STATE_PROJECTION_MATRIX, 0 },
792 { "gl_ProjectionMatrixInverseTranspose", STATE_PROJECTION_MATRIX, STATE_MATRIX_INVERSE },
793
794 { "gl_ModelViewProjectionMatrix", STATE_MVP_MATRIX, STATE_MATRIX_TRANSPOSE },
795 { "gl_ModelViewProjectionMatrixInverse", STATE_MVP_MATRIX, STATE_MATRIX_INVTRANS },
796 { "gl_ModelViewProjectionMatrixTranspose", STATE_MVP_MATRIX, 0 },
797 { "gl_ModelViewProjectionMatrixInverseTranspose", STATE_MVP_MATRIX, STATE_MATRIX_INVERSE },
798
799 { "gl_TextureMatrix", STATE_TEXTURE_MATRIX, STATE_MATRIX_TRANSPOSE },
800 { "gl_TextureMatrixInverse", STATE_TEXTURE_MATRIX, STATE_MATRIX_INVTRANS },
801 { "gl_TextureMatrixTranspose", STATE_TEXTURE_MATRIX, 0 },
802 { "gl_TextureMatrixInverseTranspose", STATE_TEXTURE_MATRIX, STATE_MATRIX_INVERSE },
803
804 { "gl_NormalMatrix", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVERSE },
805
806 };
807 unsigned int i;
808 temp_entry *entry;
809
810 /* C++ gets angry when we try to use an int as a gl_state_index, so we use
811 * ints for gl_state_index. Make sure they're compatible.
812 */
813 assert(sizeof(gl_state_index) == sizeof(int));
814
815 for (i = 0; i < Elements(matrices); i++) {
816 if (strcmp(var->name, matrices[i].name) == 0) {
817 int j;
818 int last_pos = -1, base_pos = -1;
819 int tokens[STATE_LENGTH];
820
821 tokens[0] = matrices[i].matrix;
822 tokens[1] = 0; /* array index! */
823 tokens[4] = matrices[i].modifier;
824
825 /* Add a ref for each column. It looks like the reason we do
826 * it this way is that _mesa_add_state_reference doesn't work
827 * for things that aren't vec4s, so the tokens[2]/tokens[3]
828 * range has to be equal.
829 */
830 for (j = 0; j < 4; j++) {
831 tokens[2] = j;
832 tokens[3] = j;
833 int pos = _mesa_add_state_reference(prog->Parameters,
834 (gl_state_index *)tokens);
835 assert(last_pos == -1 || last_pos == base_pos + j);
836 if (base_pos == -1)
837 base_pos = pos;
838 }
839
840 entry = new(mem_ctx) temp_entry(var,
841 PROGRAM_STATE_VAR,
842 base_pos);
843
844 return entry;
845 }
846 }
847
848 return NULL;
849 }
850
851 void
852 ir_to_mesa_visitor::visit(ir_dereference_variable *ir)
853 {
854 ir_to_mesa_src_reg src_reg;
855 temp_entry *entry = find_variable_storage(ir->var);
856 unsigned int i, loc;
857 bool var_in;
858
859 if (!entry) {
860 switch (ir->var->mode) {
861 case ir_var_uniform:
862 entry = get_builtin_matrix_ref(this->mem_ctx, this->prog, ir->var);
863 if (entry)
864 break;
865
866 /* FINISHME: Fix up uniform name for arrays and things */
867 assert(ir->var->type->gl_type != 0 &&
868 ir->var->type->gl_type != GL_INVALID_ENUM);
869 loc = _mesa_add_uniform(this->prog->Parameters,
870 ir->var->name,
871 type_size(ir->var->type) * 4,
872 ir->var->type->gl_type,
873 NULL);
874 /* Always mark the uniform used at this point. If it isn't
875 * used, dead code elimination should have nuked the decl already.
876 */
877 this->prog->Parameters->Parameters[loc].Used = GL_TRUE;
878
879 entry = new(mem_ctx) temp_entry(ir->var, PROGRAM_UNIFORM, loc);
880 this->variable_storage.push_tail(entry);
881 break;
882 case ir_var_in:
883 case ir_var_out:
884 case ir_var_inout:
885 var_in = (ir->var->mode == ir_var_in ||
886 ir->var->mode == ir_var_inout);
887
888 for (i = 0; i < ARRAY_SIZE(builtin_var_to_mesa_reg); i++) {
889 bool in = builtin_var_to_mesa_reg[i].file == PROGRAM_INPUT;
890
891 if (strcmp(ir->var->name, builtin_var_to_mesa_reg[i].name) == 0 &&
892 !(var_in ^ in))
893 break;
894 }
895 if (i == ARRAY_SIZE(builtin_var_to_mesa_reg)) {
896 printf("Failed to find builtin for %s variable %s\n",
897 var_in ? "in" : "out",
898 ir->var->name);
899 abort();
900 }
901 entry = new(mem_ctx) temp_entry(ir->var,
902 builtin_var_to_mesa_reg[i].file,
903 builtin_var_to_mesa_reg[i].index);
904 break;
905 case ir_var_auto:
906 entry = new(mem_ctx) temp_entry(ir->var, PROGRAM_TEMPORARY,
907 this->next_temp);
908 this->variable_storage.push_tail(entry);
909
910 next_temp += type_size(ir->var->type);
911 break;
912 }
913
914 if (!entry) {
915 printf("Failed to make storage for %s\n", ir->var->name);
916 exit(1);
917 }
918 }
919
920 src_reg.file = entry->file;
921 src_reg.index = entry->index;
922 /* If the type is smaller than a vec4, replicate the last channel out. */
923 src_reg.swizzle = swizzle_for_size(ir->var->type->vector_elements);
924 src_reg.reladdr = false;
925 src_reg.negate = 0;
926
927 this->result = src_reg;
928 }
929
930 void
931 ir_to_mesa_visitor::visit(ir_dereference_array *ir)
932 {
933 ir_constant *index;
934 ir_to_mesa_src_reg src_reg;
935
936 index = ir->array_index->constant_expression_value();
937
938 /* By the time we make it to this stage, matrices should be broken down
939 * to vectors.
940 */
941 assert(!ir->type->is_matrix());
942
943 ir->array->accept(this);
944 src_reg = this->result;
945
946 if (src_reg.file == PROGRAM_INPUT ||
947 src_reg.file == PROGRAM_OUTPUT) {
948 assert(index); /* FINISHME: Handle variable indexing of builtins. */
949
950 src_reg.index += index->value.i[0];
951 } else {
952 if (index) {
953 src_reg.index += index->value.i[0];
954 } else {
955 ir_to_mesa_src_reg array_base = this->result;
956 /* Variable index array dereference. It eats the "vec4" of the
957 * base of the array and an index that offsets the Mesa register
958 * index.
959 */
960 ir->array_index->accept(this);
961
962 /* FINISHME: This doesn't work when we're trying to do the LHS
963 * of an assignment.
964 */
965 src_reg.reladdr = true;
966 ir_to_mesa_emit_op1(ir, OPCODE_ARL, ir_to_mesa_address_reg,
967 this->result);
968
969 this->result = get_temp(ir->type);
970 ir_to_mesa_emit_op1(ir, OPCODE_MOV,
971 ir_to_mesa_dst_reg_from_src(this->result),
972 src_reg);
973 }
974 }
975
976 /* If the type is smaller than a vec4, replicate the last channel out. */
977 src_reg.swizzle = swizzle_for_size(ir->type->vector_elements);
978
979 this->result = src_reg;
980 }
981
982 void
983 ir_to_mesa_visitor::visit(ir_dereference_record *ir)
984 {
985 unsigned int i;
986 const glsl_type *struct_type = ir->record->type;
987 int offset = 0;
988
989 ir->record->accept(this);
990
991 for (i = 0; i < struct_type->length; i++) {
992 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
993 break;
994 offset += type_size(struct_type->fields.structure[i].type);
995 }
996 this->result.index += offset;
997 }
998
999 /**
1000 * We want to be careful in assignment setup to hit the actual storage
1001 * instead of potentially using a temporary like we might with the
1002 * ir_dereference handler.
1003 *
1004 * Thanks to ir_swizzle_swizzle, and ir_vec_index_to_swizzle, we
1005 * should only see potentially one variable array index of a vector,
1006 * and one swizzle, before getting to actual vec4 storage. So handle
1007 * those, then go use ir_dereference to handle the rest.
1008 */
1009 static struct ir_to_mesa_dst_reg
1010 get_assignment_lhs(ir_instruction *ir, ir_to_mesa_visitor *v)
1011 {
1012 struct ir_to_mesa_dst_reg dst_reg;
1013 ir_dereference *deref;
1014 ir_swizzle *swiz;
1015
1016 /* Use the rvalue deref handler for the most part. We'll ignore
1017 * swizzles in it and write swizzles using writemask, though.
1018 */
1019 ir->accept(v);
1020 dst_reg = ir_to_mesa_dst_reg_from_src(v->result);
1021
1022 if ((deref = ir->as_dereference())) {
1023 ir_dereference_array *deref_array = ir->as_dereference_array();
1024 assert(!deref_array || deref_array->array->type->is_array());
1025
1026 ir->accept(v);
1027 } else if ((swiz = ir->as_swizzle())) {
1028 dst_reg.writemask = 0;
1029 if (swiz->mask.num_components >= 1)
1030 dst_reg.writemask |= (1 << swiz->mask.x);
1031 if (swiz->mask.num_components >= 2)
1032 dst_reg.writemask |= (1 << swiz->mask.y);
1033 if (swiz->mask.num_components >= 3)
1034 dst_reg.writemask |= (1 << swiz->mask.z);
1035 if (swiz->mask.num_components >= 4)
1036 dst_reg.writemask |= (1 << swiz->mask.w);
1037 }
1038
1039 return dst_reg;
1040 }
1041
1042 void
1043 ir_to_mesa_visitor::visit(ir_assignment *ir)
1044 {
1045 struct ir_to_mesa_dst_reg l;
1046 struct ir_to_mesa_src_reg r;
1047
1048 assert(!ir->lhs->type->is_matrix());
1049 assert(!ir->lhs->type->is_array());
1050 assert(ir->lhs->type->base_type != GLSL_TYPE_STRUCT);
1051
1052 l = get_assignment_lhs(ir->lhs, this);
1053
1054 ir->rhs->accept(this);
1055 r = this->result;
1056 assert(l.file != PROGRAM_UNDEFINED);
1057 assert(r.file != PROGRAM_UNDEFINED);
1058
1059 if (ir->condition) {
1060 ir_constant *condition_constant;
1061
1062 condition_constant = ir->condition->constant_expression_value();
1063
1064 assert(condition_constant && condition_constant->value.b[0]);
1065 }
1066
1067 ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r);
1068 }
1069
1070
1071 void
1072 ir_to_mesa_visitor::visit(ir_constant *ir)
1073 {
1074 ir_to_mesa_src_reg src_reg;
1075 GLfloat stack_vals[4];
1076 GLfloat *values = stack_vals;
1077 unsigned int i;
1078
1079 if (ir->type->is_matrix() || ir->type->is_array()) {
1080 assert(!"FINISHME: array/matrix constants");
1081 }
1082
1083 src_reg.file = PROGRAM_CONSTANT;
1084 switch (ir->type->base_type) {
1085 case GLSL_TYPE_FLOAT:
1086 values = &ir->value.f[0];
1087 break;
1088 case GLSL_TYPE_UINT:
1089 for (i = 0; i < ir->type->vector_elements; i++) {
1090 values[i] = ir->value.u[i];
1091 }
1092 break;
1093 case GLSL_TYPE_INT:
1094 for (i = 0; i < ir->type->vector_elements; i++) {
1095 values[i] = ir->value.i[i];
1096 }
1097 break;
1098 case GLSL_TYPE_BOOL:
1099 for (i = 0; i < ir->type->vector_elements; i++) {
1100 values[i] = ir->value.b[i];
1101 }
1102 break;
1103 default:
1104 assert(!"Non-float/uint/int/bool constant");
1105 }
1106
1107 src_reg.index = _mesa_add_unnamed_constant(this->prog->Parameters,
1108 values, ir->type->vector_elements,
1109 &src_reg.swizzle);
1110 src_reg.reladdr = false;
1111 src_reg.negate = 0;
1112
1113 this->result = src_reg;
1114 }
1115
1116
1117 void
1118 ir_to_mesa_visitor::visit(ir_call *ir)
1119 {
1120 printf("Can't support call to %s\n", ir->callee_name());
1121 exit(1);
1122 }
1123
1124
1125 void
1126 ir_to_mesa_visitor::visit(ir_texture *ir)
1127 {
1128 assert(0);
1129
1130 ir->coordinate->accept(this);
1131 }
1132
1133 void
1134 ir_to_mesa_visitor::visit(ir_return *ir)
1135 {
1136 assert(0);
1137
1138 ir->get_value()->accept(this);
1139 }
1140
1141
1142 void
1143 ir_to_mesa_visitor::visit(ir_if *ir)
1144 {
1145 ir_to_mesa_instruction *if_inst, *else_inst = NULL;
1146
1147 ir->condition->accept(this);
1148 assert(this->result.file != PROGRAM_UNDEFINED);
1149
1150 if_inst = ir_to_mesa_emit_op1(ir->condition,
1151 OPCODE_IF, ir_to_mesa_undef_dst,
1152 this->result);
1153
1154 this->instructions.push_tail(if_inst);
1155
1156 visit_exec_list(&ir->then_instructions, this);
1157
1158 if (!ir->else_instructions.is_empty()) {
1159 else_inst = ir_to_mesa_emit_op1(ir->condition, OPCODE_ELSE,
1160 ir_to_mesa_undef_dst,
1161 ir_to_mesa_undef);
1162 visit_exec_list(&ir->then_instructions, this);
1163 }
1164
1165 if_inst = ir_to_mesa_emit_op1(ir->condition, OPCODE_ENDIF,
1166 ir_to_mesa_undef_dst, ir_to_mesa_undef);
1167 }
1168
1169 ir_to_mesa_visitor::ir_to_mesa_visitor()
1170 {
1171 result.file = PROGRAM_UNDEFINED;
1172 next_temp = 1;
1173 }
1174
1175 static struct prog_src_register
1176 mesa_src_reg_from_ir_src_reg(ir_to_mesa_src_reg reg)
1177 {
1178 struct prog_src_register mesa_reg;
1179
1180 mesa_reg.File = reg.file;
1181 assert(reg.index < (1 << INST_INDEX_BITS) - 1);
1182 mesa_reg.Index = reg.index;
1183 mesa_reg.Swizzle = reg.swizzle;
1184 mesa_reg.RelAddr = reg.reladdr;
1185
1186 return mesa_reg;
1187 }
1188
1189 static void
1190 set_branchtargets(struct prog_instruction *mesa_instructions,
1191 int num_instructions)
1192 {
1193 int if_count = 0, loop_count;
1194 int *if_stack, *loop_stack;
1195 int if_stack_pos = 0, loop_stack_pos = 0;
1196 int i, j;
1197
1198 for (i = 0; i < num_instructions; i++) {
1199 switch (mesa_instructions[i].Opcode) {
1200 case OPCODE_IF:
1201 if_count++;
1202 break;
1203 case OPCODE_BGNLOOP:
1204 loop_count++;
1205 break;
1206 case OPCODE_BRK:
1207 case OPCODE_CONT:
1208 mesa_instructions[i].BranchTarget = -1;
1209 break;
1210 default:
1211 break;
1212 }
1213 }
1214
1215 if_stack = (int *)calloc(if_count, sizeof(*if_stack));
1216 loop_stack = (int *)calloc(loop_count, sizeof(*loop_stack));
1217
1218 for (i = 0; i < num_instructions; i++) {
1219 switch (mesa_instructions[i].Opcode) {
1220 case OPCODE_IF:
1221 if_stack[if_stack_pos] = i;
1222 if_stack_pos++;
1223 break;
1224 case OPCODE_ELSE:
1225 mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
1226 if_stack[if_stack_pos - 1] = i;
1227 break;
1228 case OPCODE_ENDIF:
1229 mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
1230 if_stack_pos--;
1231 break;
1232 case OPCODE_BGNLOOP:
1233 loop_stack[loop_stack_pos] = i;
1234 loop_stack_pos++;
1235 break;
1236 case OPCODE_ENDLOOP:
1237 loop_stack_pos--;
1238 /* Rewrite any breaks/conts at this nesting level (haven't
1239 * already had a BranchTarget assigned) to point to the end
1240 * of the loop.
1241 */
1242 for (j = loop_stack[loop_stack_pos]; j < i; j++) {
1243 if (mesa_instructions[j].Opcode == OPCODE_BRK ||
1244 mesa_instructions[j].Opcode == OPCODE_CONT) {
1245 if (mesa_instructions[j].BranchTarget == -1) {
1246 mesa_instructions[j].BranchTarget = i;
1247 }
1248 }
1249 }
1250 /* The loop ends point at each other. */
1251 mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos];
1252 mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i;
1253 default:
1254 break;
1255 }
1256 }
1257
1258 free(if_stack);
1259 }
1260
1261 static void
1262 print_program(struct prog_instruction *mesa_instructions,
1263 ir_instruction **mesa_instruction_annotation,
1264 int num_instructions)
1265 {
1266 ir_instruction *last_ir = NULL;
1267 int i;
1268
1269 for (i = 0; i < num_instructions; i++) {
1270 struct prog_instruction *mesa_inst = mesa_instructions + i;
1271 ir_instruction *ir = mesa_instruction_annotation[i];
1272
1273 if (last_ir != ir && ir) {
1274 ir_print_visitor print;
1275 ir->accept(&print);
1276 printf("\n");
1277 last_ir = ir;
1278 }
1279
1280 _mesa_print_instruction(mesa_inst);
1281 }
1282 }
1283
1284 static void
1285 count_resources(struct gl_program *prog)
1286 {
1287 prog->InputsRead = 0;
1288 prog->OutputsWritten = 0;
1289 unsigned int i;
1290
1291 for (i = 0; i < prog->NumInstructions; i++) {
1292 struct prog_instruction *inst = &prog->Instructions[i];
1293 unsigned int reg;
1294
1295 switch (inst->DstReg.File) {
1296 case PROGRAM_OUTPUT:
1297 prog->OutputsWritten |= BITFIELD64_BIT(inst->DstReg.Index);
1298 break;
1299 case PROGRAM_INPUT:
1300 prog->InputsRead |= BITFIELD64_BIT(inst->DstReg.Index);
1301 break;
1302 default:
1303 break;
1304 }
1305
1306 for (reg = 0; reg < _mesa_num_inst_src_regs(inst->Opcode); reg++) {
1307 switch (inst->SrcReg[reg].File) {
1308 case PROGRAM_OUTPUT:
1309 prog->OutputsWritten |= BITFIELD64_BIT(inst->SrcReg[reg].Index);
1310 break;
1311 case PROGRAM_INPUT:
1312 prog->InputsRead |= BITFIELD64_BIT(inst->SrcReg[reg].Index);
1313 break;
1314 default:
1315 break;
1316 }
1317 }
1318 }
1319 }
1320
1321 /* Each stage has some uniforms in its Parameters list. The Uniforms
1322 * list for the linked shader program has a pointer to these uniforms
1323 * in each of the stage's Parameters list, so that their values can be
1324 * updated when a uniform is set.
1325 */
1326 static void
1327 link_uniforms_to_shared_uniform_list(struct gl_uniform_list *uniforms,
1328 struct gl_program *prog)
1329 {
1330 unsigned int i;
1331
1332 for (i = 0; i < prog->Parameters->NumParameters; i++) {
1333 const struct gl_program_parameter *p = prog->Parameters->Parameters + i;
1334
1335 if (p->Type == PROGRAM_UNIFORM || p->Type == PROGRAM_SAMPLER) {
1336 struct gl_uniform *uniform =
1337 _mesa_append_uniform(uniforms, p->Name, prog->Target, i);
1338 if (uniform)
1339 uniform->Initialized = p->Initialized;
1340 }
1341 }
1342 }
1343
1344 struct gl_program *
1345 get_mesa_program(GLcontext *ctx, void *mem_ctx, struct glsl_shader *shader)
1346 {
1347 ir_to_mesa_visitor v;
1348 struct prog_instruction *mesa_instructions, *mesa_inst;
1349 ir_instruction **mesa_instruction_annotation;
1350 int i;
1351 exec_list *instructions = &shader->ir;
1352 struct gl_program *prog;
1353 GLenum target;
1354
1355 switch (shader->Type) {
1356 case GL_VERTEX_SHADER: target = GL_VERTEX_PROGRAM_ARB; break;
1357 case GL_FRAGMENT_SHADER: target = GL_FRAGMENT_PROGRAM_ARB; break;
1358 default: assert(!"should not be reached"); break;
1359 }
1360
1361 prog = ctx->Driver.NewProgram(ctx, target, 1);
1362 if (!prog)
1363 return NULL;
1364 prog->Parameters = _mesa_new_parameter_list();
1365 prog->Varying = _mesa_new_parameter_list();
1366 prog->Attributes = _mesa_new_parameter_list();
1367 v.ctx = ctx;
1368 v.prog = prog;
1369
1370 v.mem_ctx = talloc_new(NULL);
1371 visit_exec_list(instructions, &v);
1372 v.ir_to_mesa_emit_op1(NULL, OPCODE_END,
1373 ir_to_mesa_undef_dst, ir_to_mesa_undef);
1374
1375 prog->NumTemporaries = v.next_temp;
1376
1377 int num_instructions = 0;
1378 foreach_iter(exec_list_iterator, iter, v.instructions) {
1379 num_instructions++;
1380 }
1381
1382 mesa_instructions =
1383 (struct prog_instruction *)calloc(num_instructions,
1384 sizeof(*mesa_instructions));
1385 mesa_instruction_annotation = talloc_array(mem_ctx, ir_instruction *,
1386 num_instructions);
1387
1388 mesa_inst = mesa_instructions;
1389 i = 0;
1390 foreach_iter(exec_list_iterator, iter, v.instructions) {
1391 ir_to_mesa_instruction *inst = (ir_to_mesa_instruction *)iter.get();
1392
1393 mesa_inst->Opcode = inst->op;
1394 mesa_inst->DstReg.File = inst->dst_reg.file;
1395 mesa_inst->DstReg.Index = inst->dst_reg.index;
1396 mesa_inst->DstReg.CondMask = COND_TR;
1397 mesa_inst->DstReg.WriteMask = inst->dst_reg.writemask;
1398 mesa_inst->SrcReg[0] = mesa_src_reg_from_ir_src_reg(inst->src_reg[0]);
1399 mesa_inst->SrcReg[1] = mesa_src_reg_from_ir_src_reg(inst->src_reg[1]);
1400 mesa_inst->SrcReg[2] = mesa_src_reg_from_ir_src_reg(inst->src_reg[2]);
1401 mesa_instruction_annotation[i] = inst->ir;
1402
1403 mesa_inst++;
1404 i++;
1405 }
1406
1407 set_branchtargets(mesa_instructions, num_instructions);
1408 if (0) {
1409 print_program(mesa_instructions, mesa_instruction_annotation,
1410 num_instructions);
1411 }
1412
1413 prog->Instructions = mesa_instructions;
1414 prog->NumInstructions = num_instructions;
1415
1416 _mesa_reference_program(ctx, &shader->mesa_shader->Program, prog);
1417
1418 return prog;
1419 }
1420
1421 /* Takes a Mesa gl shader structure and compiles it, returning our Mesa-like
1422 * structure with the IR and such attached.
1423 */
1424 static struct glsl_shader *
1425 _mesa_get_glsl_shader(GLcontext *ctx, void *mem_ctx, struct gl_shader *sh)
1426 {
1427 struct glsl_shader *shader = talloc_zero(mem_ctx, struct glsl_shader);
1428 struct _mesa_glsl_parse_state *state;
1429
1430 shader->Type = sh->Type;
1431 shader->Name = sh->Name;
1432 shader->RefCount = 1;
1433 shader->Source = sh->Source;
1434 shader->SourceLen = strlen(sh->Source);
1435 shader->mesa_shader = sh;
1436
1437 state = talloc_zero(shader, struct _mesa_glsl_parse_state);
1438 switch (shader->Type) {
1439 case GL_VERTEX_SHADER: state->target = vertex_shader; break;
1440 case GL_FRAGMENT_SHADER: state->target = fragment_shader; break;
1441 case GL_GEOMETRY_SHADER: state->target = geometry_shader; break;
1442 }
1443
1444 state->scanner = NULL;
1445 state->translation_unit.make_empty();
1446 state->symbols = new(mem_ctx) glsl_symbol_table;
1447 state->info_log = talloc_strdup(shader, "");
1448 state->error = false;
1449 state->temp_index = 0;
1450 state->loop_or_switch_nesting = NULL;
1451 state->ARB_texture_rectangle_enable = true;
1452
1453 _mesa_glsl_lexer_ctor(state, shader->Source);
1454 _mesa_glsl_parse(state);
1455 _mesa_glsl_lexer_dtor(state);
1456
1457 shader->ir.make_empty();
1458 if (!state->error && !state->translation_unit.is_empty())
1459 _mesa_ast_to_hir(&shader->ir, state);
1460
1461 /* Optimization passes */
1462 if (!state->error && !shader->ir.is_empty()) {
1463 bool progress;
1464 do {
1465 progress = false;
1466
1467 progress = do_function_inlining(&shader->ir) || progress;
1468 progress = do_if_simplification(&shader->ir) || progress;
1469 progress = do_copy_propagation(&shader->ir) || progress;
1470 progress = do_dead_code_local(&shader->ir) || progress;
1471 progress = do_dead_code_unlinked(state, &shader->ir) || progress;
1472 progress = do_constant_variable_unlinked(&shader->ir) || progress;
1473 progress = do_constant_folding(&shader->ir) || progress;
1474 progress = do_vec_index_to_swizzle(&shader->ir) || progress;
1475 progress = do_swizzle_swizzle(&shader->ir) || progress;
1476 } while (progress);
1477 }
1478
1479 shader->symbols = state->symbols;
1480
1481 shader->CompileStatus = !state->error;
1482 shader->InfoLog = state->info_log;
1483
1484 talloc_free(state);
1485
1486 return shader;
1487 }
1488
1489 extern "C" {
1490
1491 void
1492 _mesa_glsl_compile_shader(GLcontext *ctx, struct gl_shader *sh)
1493 {
1494 struct glsl_shader *shader;
1495 TALLOC_CTX *mem_ctx = talloc_new(NULL);
1496
1497 shader = _mesa_get_glsl_shader(ctx, mem_ctx, sh);
1498
1499 sh->CompileStatus = shader->CompileStatus;
1500 sh->InfoLog = strdup(shader->InfoLog);
1501 talloc_free(mem_ctx);
1502 }
1503
1504 void
1505 _mesa_glsl_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
1506 {
1507 struct glsl_program *whole_program;
1508 unsigned int i;
1509
1510 _mesa_clear_shader_program_data(ctx, prog);
1511
1512 whole_program = talloc_zero(NULL, struct glsl_program);
1513 whole_program->LinkStatus = GL_TRUE;
1514 whole_program->NumShaders = prog->NumShaders;
1515 whole_program->Shaders = talloc_array(whole_program, struct glsl_shader *,
1516 prog->NumShaders);
1517
1518 for (i = 0; i < prog->NumShaders; i++) {
1519 whole_program->Shaders[i] = _mesa_get_glsl_shader(ctx, whole_program,
1520 prog->Shaders[i]);
1521 if (!whole_program->Shaders[i]->CompileStatus) {
1522 whole_program->InfoLog =
1523 talloc_asprintf_append(whole_program->InfoLog,
1524 "linking with uncompiled shader");
1525 whole_program->LinkStatus = GL_FALSE;
1526 }
1527 }
1528
1529 prog->Uniforms = _mesa_new_uniform_list();
1530 prog->Varying = _mesa_new_parameter_list();
1531 _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL);
1532 _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL);
1533
1534 if (whole_program->LinkStatus)
1535 link_shaders(whole_program);
1536
1537 prog->LinkStatus = whole_program->LinkStatus;
1538
1539 /* FINISHME: This should use the linker-generated code */
1540 if (prog->LinkStatus) {
1541 for (i = 0; i < prog->NumShaders; i++) {
1542 struct gl_program *linked_prog;
1543
1544 linked_prog = get_mesa_program(ctx, whole_program,
1545 whole_program->Shaders[i]);
1546 count_resources(linked_prog);
1547
1548 link_uniforms_to_shared_uniform_list(prog->Uniforms, linked_prog);
1549
1550 switch (whole_program->Shaders[i]->Type) {
1551 case GL_VERTEX_SHADER:
1552 _mesa_reference_vertprog(ctx, &prog->VertexProgram,
1553 (struct gl_vertex_program *)linked_prog);
1554 break;
1555 case GL_FRAGMENT_SHADER:
1556 _mesa_reference_fragprog(ctx, &prog->FragmentProgram,
1557 (struct gl_fragment_program *)linked_prog);
1558 break;
1559 }
1560 }
1561 }
1562
1563 talloc_free(whole_program);
1564 }
1565
1566 } /* extern "C" */