X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fstate_tracker%2Fst_glsl_to_tgsi.cpp;h=f7d79e9f50cd6b320d01d4f85b49a25e21359a17;hb=a43f68810a347f3e952a0bc401be6edb91e1baea;hp=438f21483c749128c46c1609e1927ee1581dc035;hpb=bf1cee9f24022e3da96d84fdc6baaa050d3eadf1;p=mesa.git diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 438f21483c7..f7d79e9f50c 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -70,6 +70,7 @@ extern "C" { #include "st_mesa_to_tgsi.h" } +#define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX #define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) | \ (1 << PROGRAM_ENV_PARAM) | \ (1 << PROGRAM_STATE_VAR) | \ @@ -77,6 +78,8 @@ extern "C" { (1 << PROGRAM_CONSTANT) | \ (1 << PROGRAM_UNIFORM)) +#define MAX_TEMPS 4096 + class st_src_reg; class st_dst_reg; @@ -171,7 +174,7 @@ st_src_reg::st_src_reg(st_dst_reg reg) this->index = reg.index; this->swizzle = SWIZZLE_XYZW; this->negate = 0; - this->reladdr = NULL; + this->reladdr = reg.reladdr; } st_dst_reg::st_dst_reg(st_src_reg reg) @@ -208,6 +211,7 @@ public: int sampler; /**< sampler index */ int tex_target; /**< One of TEXTURE_*_INDEX */ GLboolean tex_shadow; + int dead_mask; /**< Used in dead code elimination */ class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */ }; @@ -225,6 +229,20 @@ public: ir_variable *var; /* variable that maps to this, if any */ }; +class immediate_storage : public exec_node { +public: + immediate_storage(gl_constant_value *values, int size, int type) + { + memcpy(this->values, values, size * sizeof(gl_constant_value)); + this->size = size; + this->type = type; + } + + gl_constant_value values[4]; + int size; /**< Number of components (1-4) */ + int type; /**< GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */ +}; + class function_entry : public exec_node { public: ir_function_signature *sig; @@ -232,7 +250,7 @@ public: /** * identifier of this function signature used by the program. * - * At the point that Mesa instructions for function calls are + * At the point that TGSI instructions for function calls are * generated, we don't know the address of the first instruction of * the function body. So we make the BranchTarget that is called a * small integer and rewrite them during set_branchtargets(). @@ -247,10 +265,9 @@ public: glsl_to_tgsi_instruction *bgn_inst; /** - * Index of the first instruction of the function body in actual - * Mesa IR. + * Index of the first instruction of the function body in actual TGSI. * - * Set after convertion from glsl_to_tgsi_instruction to prog_instruction. + * Set after conversion from glsl_to_tgsi_instruction to TGSI. */ int inst; @@ -278,9 +295,13 @@ public: bool indirect_addr_consts; int glsl_version; + bool native_integers; variable_storage *find_variable_storage(ir_variable *var); + int add_constant(gl_register_file file, gl_constant_value values[4], + int size, int datatype, GLuint *swizzle_out); + function_entry *get_function_signature(ir_function_signature *sig); st_src_reg get_temp(const glsl_type *type); @@ -322,6 +343,10 @@ public: /** List of variable_storage */ exec_list variables; + /** List of immediate_storage */ + exec_list immediates; + int num_immediates; + /** List of function_entry */ exec_list function_signatures; int next_signature_id; @@ -348,11 +373,11 @@ public: /** * Emit the correct dot-product instruction for the type of arguments */ - void emit_dp(ir_instruction *ir, - st_dst_reg dst, - st_src_reg src0, - st_src_reg src1, - unsigned elements); + glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir, + st_dst_reg dst, + st_src_reg src0, + st_src_reg src1, + unsigned elements); void emit_scalar(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0); @@ -374,6 +399,7 @@ public: bool process_move_condition(ir_rvalue *ir); void remove_output_reads(gl_register_file type); + void simplify_cmp(void); void rename_temp_register(int index, int new_index); int get_first_temp_read(int index); @@ -383,13 +409,14 @@ public: void copy_propagate(void); void eliminate_dead_code(void); + int eliminate_dead_code_advanced(void); void merge_registers(void); void renumber_registers(void); void *mem_ctx; }; -static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, NULL); +static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR); static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR); @@ -479,6 +506,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, inst->src[1] = src1; inst->src[2] = src2; inst->ir = ir; + inst->dead_mask = 0; inst->function = NULL; @@ -499,6 +527,9 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, case PROGRAM_UNIFORM: this->indirect_addr_consts = true; break; + case PROGRAM_IMMEDIATE: + assert(!"immediates should not have indirect addressing"); + break; default: break; } @@ -518,6 +549,9 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, case PROGRAM_UNIFORM: this->indirect_addr_consts = true; break; + case PROGRAM_IMMEDIATE: + assert(!"immediates should not have indirect addressing"); + break; default: break; } @@ -567,7 +601,7 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT) type = GLSL_TYPE_FLOAT; - else if (glsl_version >= 130) + else if (native_integers) type = src0.type; #define case4(c, f, i, u) \ @@ -608,7 +642,7 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, return op; } -void +glsl_to_tgsi_instruction * glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir, st_dst_reg dst, st_src_reg src0, st_src_reg src1, unsigned elements) @@ -617,7 +651,7 @@ glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir, TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4 }; - emit(ir, dot_opcodes[elements - 2], dst, src0, src1); + return emit(ir, dot_opcodes[elements - 2], dst, src0, src1); } /** @@ -691,13 +725,13 @@ glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir, st_src_reg tmp = get_temp(glsl_type::float_type); if (src0.type == GLSL_TYPE_INT) - emit(ir, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0); + emit(NULL, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0); else if (src0.type == GLSL_TYPE_UINT) - emit(ir, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0); + emit(NULL, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0); else tmp = src0; - emit(ir, TGSI_OPCODE_ARL, dst, tmp); + emit(NULL, TGSI_OPCODE_ARL, dst, tmp); } /** @@ -795,15 +829,49 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op, } } +int +glsl_to_tgsi_visitor::add_constant(gl_register_file file, + gl_constant_value values[4], int size, int datatype, + GLuint *swizzle_out) +{ + if (file == PROGRAM_CONSTANT) { + return _mesa_add_typed_unnamed_constant(this->prog->Parameters, values, + size, datatype, swizzle_out); + } else { + int index = 0; + immediate_storage *entry; + assert(file == PROGRAM_IMMEDIATE); + + /* Search immediate storage to see if we already have an identical + * immediate that we can use instead of adding a duplicate entry. + */ + foreach_iter(exec_list_iterator, iter, this->immediates) { + entry = (immediate_storage *)iter.get(); + + if (entry->size == size && + entry->type == datatype && + !memcmp(entry->values, values, size * sizeof(gl_constant_value))) { + return index; + } + index++; + } + + /* Add this immediate to the list. */ + entry = new(mem_ctx) immediate_storage(values, size, datatype); + this->immediates.push_tail(entry); + this->num_immediates++; + return index; + } +} + struct st_src_reg glsl_to_tgsi_visitor::st_src_reg_for_float(float val) { - st_src_reg src(PROGRAM_CONSTANT, -1, GLSL_TYPE_FLOAT); + st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT); union gl_constant_value uval; uval.f = val; - src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters, - &uval, 1, GL_FLOAT, &src.swizzle); + src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle); return src; } @@ -811,14 +879,13 @@ glsl_to_tgsi_visitor::st_src_reg_for_float(float val) struct st_src_reg glsl_to_tgsi_visitor::st_src_reg_for_int(int val) { - st_src_reg src(PROGRAM_CONSTANT, -1, GLSL_TYPE_INT); + st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT); union gl_constant_value uval; - assert(glsl_version >= 130); + assert(native_integers); uval.i = val; - src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters, - &uval, 1, GL_INT, &src.swizzle); + src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle); return src; } @@ -826,7 +893,7 @@ glsl_to_tgsi_visitor::st_src_reg_for_int(int val) struct st_src_reg glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val) { - if (glsl_version >= 130) + if (native_integers) return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) : st_src_reg_for_int(val); else @@ -883,10 +950,8 @@ st_src_reg glsl_to_tgsi_visitor::get_temp(const glsl_type *type) { st_src_reg src; - int swizzle[4]; - int i; - src.type = glsl_version >= 130 ? type->base_type : GLSL_TYPE_FLOAT; + src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT; src.file = PROGRAM_TEMPORARY; src.index = next_temp; src.reladdr = NULL; @@ -895,12 +960,7 @@ glsl_to_tgsi_visitor::get_temp(const glsl_type *type) if (type->is_array() || type->is_record()) { src.swizzle = SWIZZLE_NOOP; } else { - for (i = 0; i < type->vector_elements; i++) - swizzle[i] = i; - for (; i < 4; i++) - swizzle[i] = type->vector_elements - 1; - src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], - swizzle[2], swizzle[3]); + src.swizzle = swizzle_for_size(type->vector_elements); } src.negate = 0; @@ -994,7 +1054,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) this->next_temp += type_size(ir->type); dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index, - glsl_version >= 130 ? ir->type->base_type : GLSL_TYPE_FLOAT)); + native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT)); } @@ -1010,7 +1070,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) } } else { st_src_reg src(PROGRAM_STATE_VAR, index, - glsl_version >= 130 ? ir->type->base_type : GLSL_TYPE_FLOAT); + native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT); src.swizzle = slots[i].swizzle; emit(ir, TGSI_OPCODE_MOV, dst, src); /* even a float takes up a whole vec4 reg in a struct/array. */ @@ -1129,6 +1189,7 @@ glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) { int nonmul_operand = 1 - mul_operand; st_src_reg a, b, c; + st_dst_reg result_dst; ir_expression *expr = ir->operands[mul_operand]->as_expression(); if (!expr || expr->operation != ir_binop_mul) @@ -1142,7 +1203,9 @@ glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) c = this->result; this->result = get_temp(ir->type); - emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, c); + result_dst = st_dst_reg(this->result); + result_dst.writemask = (1 << ir->type->vector_elements) - 1; + emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c); return true; } @@ -1163,10 +1226,32 @@ glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir) sat_src->accept(this); st_src_reg src = this->result; - this->result = get_temp(ir->type); - glsl_to_tgsi_instruction *inst; - inst = emit(ir, TGSI_OPCODE_MOV, st_dst_reg(this->result), src); - inst->saturate = true; + /* If we generated an expression instruction into a temporary in + * processing the saturate's operand, apply the saturate to that + * instruction. Otherwise, generate a MOV to do the saturate. + * + * Note that we have to be careful to only do this optimization if + * the instruction in question was what generated src->result. For + * example, ir_dereference_array might generate a MUL instruction + * to create the reladdr, and return us a src reg using that + * reladdr. That MUL result is not the value we're trying to + * saturate. + */ + ir_expression *sat_src_expr = sat_src->as_expression(); + if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul || + sat_src_expr->operation == ir_binop_add || + sat_src_expr->operation == ir_binop_dot)) { + glsl_to_tgsi_instruction *new_inst; + new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); + new_inst->saturate = true; + } else { + this->result = get_temp(ir->type); + st_dst_reg result_dst = st_dst_reg(this->result); + result_dst.writemask = (1 << ir->type->vector_elements) - 1; + glsl_to_tgsi_instruction *inst; + inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src); + inst->saturate = true; + } return true; } @@ -1251,7 +1336,17 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) switch (ir->operation) { case ir_unop_logic_not: - emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_type(result_dst.type, 0)); + if (result_dst.type != GLSL_TYPE_FLOAT) + emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_type(result_dst.type, 0)); + else { + /* Previously 'SEQ dst, src, 0.0' was used for this. However, many + * older GPUs implement SEQ using multiple instructions (i915 uses two + * SGE instructions and a MUL instruction). Since our logic values are + * 0.0 and 1.0, 1-x also implements !x. + */ + op[0].negate = ~op[0].negate; + emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0)); + } break; case ir_unop_neg: assert(result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_INT); @@ -1360,8 +1455,8 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) /* "==" operator producing a scalar boolean. */ if (ir->operands[0]->type->is_vector() || ir->operands[1]->type->is_vector()) { - st_src_reg temp = get_temp(glsl_version >= 130 ? - glsl_type::get_vec4_type(ir->operands[0]->type->base_type) : + st_src_reg temp = get_temp(native_integers ? + glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : glsl_type::vec4_type); assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); @@ -1375,8 +1470,8 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) /* "!=" operator producing a scalar boolean. */ if (ir->operands[0]->type->is_vector() || ir->operands[1]->type->is_vector()) { - st_src_reg temp = get_temp(glsl_version >= 130 ? - glsl_type::get_vec4_type(ir->operands[0]->type->base_type) : + st_src_reg temp = get_temp(native_integers ? + glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : glsl_type::vec4_type); assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); @@ -1387,22 +1482,69 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) } break; - case ir_unop_any: + case ir_unop_any: { assert(ir->operands[0]->type->is_vector()); - emit_dp(ir, result_dst, op[0], op[0], - ir->operands[0]->type->vector_elements); - emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); + + /* After the dot-product, the value will be an integer on the + * range [0,4]. Zero stays zero, and positive values become 1.0. + */ + glsl_to_tgsi_instruction *const dp = + emit_dp(ir, result_dst, op[0], op[0], + ir->operands[0]->type->vector_elements); + if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB && + result_dst.type == GLSL_TYPE_FLOAT) { + /* The clamping to [0,1] can be done for free in the fragment + * shader with a saturate. + */ + dp->saturate = true; + } else if (result_dst.type == GLSL_TYPE_FLOAT) { + /* Negating the result of the dot-product gives values on the range + * [-4, 0]. Zero stays zero, and negative values become 1.0. This + * is achieved using SLT. + */ + st_src_reg slt_src = result_src; + slt_src.negate = ~slt_src.negate; + emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); + } + else { + /* Use SNE 0 if integers are being used as boolean values. */ + emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0)); + } break; + } case ir_binop_logic_xor: emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); break; - case ir_binop_logic_or: - /* This could be a saturated add and skip the SNE. */ - emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); - emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); + case ir_binop_logic_or: { + /* After the addition, the value will be an integer on the + * range [0,2]. Zero stays zero, and positive values become 1.0. + */ + glsl_to_tgsi_instruction *add = + emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); + if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB && + result_dst.type == GLSL_TYPE_FLOAT) { + /* The clamping to [0,1] can be done for free in the fragment + * shader with a saturate if floats are being used as boolean values. + */ + add->saturate = true; + } else if (result_dst.type == GLSL_TYPE_FLOAT) { + /* Negating the result of the addition gives values on the range + * [-2, 0]. Zero stays zero, and negative values become 1.0. This + * is achieved using SLT. + */ + st_src_reg slt_src = result_src; + slt_src.negate = ~slt_src.negate; + emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); + } else { + /* Use an SNE on the result of the addition. Zero stays zero, + * 1 stays 1, and 2 becomes 1. + */ + emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0)); + } break; + } case ir_binop_logic_and: /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */ @@ -1430,16 +1572,19 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) break; case ir_unop_i2f: case ir_unop_b2f: - if (glsl_version >= 130) { + if (native_integers) { emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]); break; } + case ir_unop_i2u: + case ir_unop_u2i: + /* Converting between signed and unsigned integers is a no-op. */ case ir_unop_b2i: /* Booleans are stored as integers (or floats in GLSL 1.20 and lower). */ result_src = op[0]; break; case ir_unop_f2i: - if (glsl_version >= 130) + if (native_integers) emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]); else emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); @@ -1480,7 +1625,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) break; } case ir_unop_u2f: - if (glsl_version >= 130) { + if (native_integers) { emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]); break; } @@ -1632,7 +1777,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) } this->result = st_src_reg(entry->file, entry->index, var->type); - if (glsl_version <= 120) + if (!native_integers) this->result.type = GLSL_TYPE_FLOAT; } @@ -1651,9 +1796,8 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) if (index) { src.index += index->value.i[0] * element_size; } else { - st_src_reg array_base = this->result; /* Variable index array dereference. It eats the "vec4" of the - * base of the array and an index that offsets the Mesa register + * base of the array and an index that offsets the TGSI register * index. */ ir->array_index->accept(this); @@ -1669,6 +1813,18 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) this->result, st_src_reg_for_float(element_size)); } + /* If there was already a relative address register involved, add the + * new and the old together to get the new offset. + */ + if (src.reladdr != NULL) { + st_src_reg accum_reg = get_temp(glsl_type::float_type); + + emit(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg), + index_reg, *src.reladdr); + + index_reg = accum_reg; + } + src.reladdr = ralloc(mem_ctx, st_src_reg); memcpy(src.reladdr, &index_reg, sizeof(index_reg)); } @@ -1837,7 +1993,8 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) if (ir->write_mask == 0) { assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); l.writemask = WRITEMASK_XYZW; - } else if (ir->lhs->type->is_scalar()) { + } else if (ir->lhs->type->is_scalar() && + ir->lhs->variable_referenced()->mode == ir_var_out) { /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the * FINISHME: W component of fragment shader output zero, work correctly. */ @@ -1847,7 +2004,6 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) int first_enabled_chan = 0; int rhs_chan = 0; - assert(ir->lhs->type->is_vector()); l.writemask = ir->write_mask; for (int i = 0; i < 4; i++) { @@ -1860,7 +2016,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) /* Swizzle a small RHS vector into the channels being written. * * glsl ir treats write_mask as dictating how many channels are - * present on the RHS while Mesa IR treats write_mask as just + * present on the RHS while TGSI treats write_mask as just * showing which channels of the vec4 RHS get written. */ for (int i = 0; i < 4; i++) { @@ -1881,15 +2037,32 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) st_src_reg condition = this->result; for (i = 0; i < type_size(ir->lhs->type); i++) { + st_src_reg l_src = st_src_reg(l); + l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements); + if (switch_order) { - emit(ir, TGSI_OPCODE_CMP, l, condition, st_src_reg(l), r); + emit(ir, TGSI_OPCODE_CMP, l, condition, l_src, r); } else { - emit(ir, TGSI_OPCODE_CMP, l, condition, r, st_src_reg(l)); + emit(ir, TGSI_OPCODE_CMP, l, condition, r, l_src); } l.index++; r.index++; } + } else if (ir->rhs->as_expression() && + this->instructions.get_tail() && + ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir && + type_size(ir->lhs->type) == 1 && + l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst.writemask) { + /* To avoid emitting an extra MOV when assigning an expression to a + * variable, emit the last instruction of the expression again, but + * replace the destination register with the target of the assignment. + * Dead code elimination will remove the original instruction. + */ + glsl_to_tgsi_instruction *inst, *new_inst; + inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); + new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]); + new_inst->saturate = inst->saturate; } else { for (i = 0; i < type_size(ir->lhs->type); i++) { emit(ir, TGSI_OPCODE_MOV, l, r); @@ -1908,9 +2081,11 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) gl_constant_value *values = (gl_constant_value *) stack_vals; GLenum gl_type = GL_NONE; unsigned int i; + static int in_array = 0; + gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE; /* Unfortunately, 4 floats is all we can get into - * _mesa_add_unnamed_constant. So, make a temp to store an + * _mesa_add_typed_unnamed_constant. So, make a temp to store an * aggregate constant and move each constant value into it. If we * get lucky, copy propagation will eliminate the extra moves. */ @@ -1944,6 +2119,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) int size = type_size(ir->type->fields.array); assert(size > 0); + in_array++; for (i = 0; i < ir->type->length; i++) { ir->array_elements[i]->accept(this); @@ -1956,6 +2132,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) } } this->result = temp_base; + in_array--; return; } @@ -1967,12 +2144,12 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) assert(ir->type->base_type == GLSL_TYPE_FLOAT); values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements]; - src = st_src_reg(PROGRAM_CONSTANT, -1, ir->type->base_type); - src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters, - values, - ir->type->vector_elements, - GL_FLOAT, - &src.swizzle); + src = st_src_reg(file, -1, ir->type->base_type); + src.index = add_constant(file, + values, + ir->type->vector_elements, + GL_FLOAT, + &src.swizzle); emit(ir, TGSI_OPCODE_MOV, mat_column, src); mat_column.index++; @@ -1982,7 +2159,6 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) return; } - src.file = PROGRAM_CONSTANT; switch (ir->type->base_type) { case GLSL_TYPE_FLOAT: gl_type = GL_FLOAT; @@ -1991,27 +2167,27 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) } break; case GLSL_TYPE_UINT: - gl_type = glsl_version >= 130 ? GL_UNSIGNED_INT : GL_FLOAT; + gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT; for (i = 0; i < ir->type->vector_elements; i++) { - if (glsl_version >= 130) + if (native_integers) values[i].u = ir->value.u[i]; else values[i].f = ir->value.u[i]; } break; case GLSL_TYPE_INT: - gl_type = glsl_version >= 130 ? GL_INT : GL_FLOAT; + gl_type = native_integers ? GL_INT : GL_FLOAT; for (i = 0; i < ir->type->vector_elements; i++) { - if (glsl_version >= 130) + if (native_integers) values[i].i = ir->value.i[i]; else values[i].f = ir->value.i[i]; } break; case GLSL_TYPE_BOOL: - gl_type = glsl_version >= 130 ? GL_BOOL : GL_FLOAT; + gl_type = native_integers ? GL_BOOL : GL_FLOAT; for (i = 0; i < ir->type->vector_elements; i++) { - if (glsl_version >= 130) + if (native_integers) values[i].b = ir->value.b[i]; else values[i].f = ir->value.b[i]; @@ -2021,10 +2197,12 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) assert(!"Non-float/uint/int/bool constant"); } - this->result = st_src_reg(PROGRAM_CONSTANT, -1, ir->type); - this->result.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters, - values, ir->type->vector_elements, gl_type, - &this->result.swizzle); + this->result = st_src_reg(file, -1, ir->type); + this->result.index = add_constant(file, + values, + ir->type->vector_elements, + gl_type, + &this->result.swizzle); } function_entry * @@ -2161,8 +2339,8 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) /* Put our coords in a temp. We'll need to modify them for shadow, * projection, or LOD, so the only case we'd use it as is is if - * we're doing plain old texturing. Mesa IR optimization should - * handle cleaning up our mess in that case. + * we're doing plain old texturing. The optimization passes on + * glsl_to_tgsi_visitor should handle cleaning up our mess in that case. */ coord = get_temp(glsl_type::vec4_type); coord_dst = st_dst_reg(coord); @@ -2357,7 +2535,7 @@ glsl_to_tgsi_visitor::visit(ir_discard *ir) void glsl_to_tgsi_visitor::visit(ir_if *ir) { - glsl_to_tgsi_instruction *cond_inst, *if_inst, *else_inst = NULL; + glsl_to_tgsi_instruction *cond_inst, *if_inst; glsl_to_tgsi_instruction *prev_inst; prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); @@ -2389,7 +2567,7 @@ glsl_to_tgsi_visitor::visit(ir_if *ir) visit_exec_list(&ir->then_instructions, this); if (!ir->else_instructions.is_empty()) { - else_inst = emit(ir->condition, TGSI_OPCODE_ELSE); + emit(ir->condition, TGSI_OPCODE_ELSE); visit_exec_list(&ir->else_instructions, this); } @@ -2401,6 +2579,7 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() result.file = PROGRAM_UNDEFINED; next_temp = 1; next_signature_id = 1; + num_immediates = 0; current_function = NULL; num_address_regs = 0; indirect_addr_temps = false; @@ -2728,11 +2907,11 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) GLint outputMap[VERT_RESULT_MAX]; GLint outputTypes[VERT_RESULT_MAX]; GLuint numVaryingReads = 0; - GLboolean usedTemps[MAX_PROGRAM_TEMPS]; + GLboolean usedTemps[MAX_TEMPS]; GLuint firstTemp = 0; _mesa_find_used_registers(prog, PROGRAM_TEMPORARY, - usedTemps, MAX_PROGRAM_TEMPS); + usedTemps, MAX_TEMPS); assert(type == PROGRAM_VARYING || type == PROGRAM_OUTPUT); assert(prog->Target == GL_VERTEX_PROGRAM_ARB || type != PROGRAM_VARYING); @@ -2752,7 +2931,7 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) if (outputMap[var] == -1) { numVaryingReads++; outputMap[var] = _mesa_find_free_register(usedTemps, - MAX_PROGRAM_TEMPS, + MAX_TEMPS, firstTemp); outputTypes[var] = inst->src[j].type; firstTemp = outputMap[var] + 1; @@ -2788,6 +2967,97 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) } } +/** + * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which + * are read from the given src in this instruction + */ +static int +get_src_arg_mask(st_dst_reg dst, st_src_reg src) +{ + int read_mask = 0, comp; + + /* Now, given the src swizzle and the written channels, find which + * components are actually read + */ + for (comp = 0; comp < 4; ++comp) { + const unsigned coord = GET_SWZ(src.swizzle, comp); + ASSERT(coord < 4); + if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W) + read_mask |= 1 << coord; + } + + return read_mask; +} + +/** + * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP + * instruction is the first instruction to write to register T0. There are + * several lowering passes done in GLSL IR (e.g. branches and + * relative addressing) that create a large number of conditional assignments + * that ir_to_mesa converts to CMP instructions like the one mentioned above. + * + * Here is why this conversion is safe: + * CMP T0, T1 T2 T0 can be expanded to: + * if (T1 < 0.0) + * MOV T0, T2; + * else + * MOV T0, T0; + * + * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same + * as the original program. If (T1 < 0.0) evaluates to false, executing + * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized. + * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2 + * because any instruction that was going to read from T0 after this was going + * to read a garbage value anyway. + */ +void +glsl_to_tgsi_visitor::simplify_cmp(void) +{ + unsigned tempWrites[MAX_TEMPS]; + unsigned outputWrites[MAX_PROGRAM_OUTPUTS]; + + memset(tempWrites, 0, sizeof(tempWrites)); + memset(outputWrites, 0, sizeof(outputWrites)); + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + unsigned prevWriteMask = 0; + + /* Give up if we encounter relative addressing or flow control. */ + if (inst->dst.reladdr || + tgsi_get_opcode_info(inst->op)->is_branch || + inst->op == TGSI_OPCODE_BGNSUB || + inst->op == TGSI_OPCODE_CONT || + inst->op == TGSI_OPCODE_END || + inst->op == TGSI_OPCODE_ENDSUB || + inst->op == TGSI_OPCODE_RET) { + return; + } + + if (inst->dst.file == PROGRAM_OUTPUT) { + assert(inst->dst.index < MAX_PROGRAM_OUTPUTS); + prevWriteMask = outputWrites[inst->dst.index]; + outputWrites[inst->dst.index] |= inst->dst.writemask; + } else if (inst->dst.file == PROGRAM_TEMPORARY) { + assert(inst->dst.index < MAX_TEMPS); + prevWriteMask = tempWrites[inst->dst.index]; + tempWrites[inst->dst.index] |= inst->dst.writemask; + } + + /* For a CMP to be considered a conditional write, the destination + * register and source register two must be the same. */ + if (inst->op == TGSI_OPCODE_CMP + && !(inst->dst.writemask & prevWriteMask) + && inst->src[2].file == inst->dst.file + && inst->src[2].index == inst->dst.index + && inst->dst.writemask == get_src_arg_mask(inst->dst, inst->src[2])) { + + inst->op = TGSI_OPCODE_MOV; + inst->src[0] = inst->src[1]; + } + } +} + /* Replaces all references to a temporary register index with another index. */ void glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index) @@ -3162,6 +3432,151 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void) } } +/* + * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead + * code elimination. This is less primitive than eliminate_dead_code(), as it + * is per-channel and can detect consecutive writes without a read between them + * as dead code. However, there is some dead code that can be eliminated by + * eliminate_dead_code() but not this function - for example, this function + * cannot eliminate an instruction writing to a register that is never read and + * is the only instruction writing to that register. + * + * The glsl_to_tgsi_visitor lazily produces code assuming that this pass + * will occur. + */ +int +glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) +{ + glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx, + glsl_to_tgsi_instruction *, + this->next_temp * 4); + int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); + int level = 0; + int removed = 0; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + assert(inst->dst.file != PROGRAM_TEMPORARY + || inst->dst.index < this->next_temp); + + switch (inst->op) { + case TGSI_OPCODE_BGNLOOP: + case TGSI_OPCODE_ENDLOOP: + /* End of a basic block, clear the write array entirely. + * FIXME: This keeps us from killing dead code when the writes are + * on either side of a loop, even when the register isn't touched + * inside the loop. + */ + memset(writes, 0, sizeof(*writes) * this->next_temp * 4); + break; + + case TGSI_OPCODE_ENDIF: + --level; + break; + + case TGSI_OPCODE_ELSE: + /* Clear all channels written inside the preceding if block from the + * write array, but leave those that were not touched. + * + * FIXME: This destroys opportunities to remove dead code inside of + * IF blocks that are followed by an ELSE block. + */ + for (int r = 0; r < this->next_temp; r++) { + for (int c = 0; c < 4; c++) { + if (!writes[4 * r + c]) + continue; + + if (write_level[4 * r + c] >= level) + writes[4 * r + c] = NULL; + } + } + break; + + case TGSI_OPCODE_IF: + ++level; + /* fallthrough to default case to mark the condition as read */ + + default: + /* Continuing the block, clear any channels from the write array that + * are read by this instruction. + */ + for (unsigned i = 0; i < Elements(inst->src); i++) { + if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){ + /* Any temporary might be read, so no dead code elimination + * across this instruction. + */ + memset(writes, 0, sizeof(*writes) * this->next_temp * 4); + } else if (inst->src[i].file == PROGRAM_TEMPORARY) { + /* Clear where it's used as src. */ + int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0); + src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1); + src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2); + src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3); + + for (int c = 0; c < 4; c++) { + if (src_chans & (1 << c)) { + writes[4 * inst->src[i].index + c] = NULL; + } + } + } + } + break; + } + + /* If this instruction writes to a temporary, add it to the write array. + * If there is already an instruction in the write array for one or more + * of the channels, flag that channel write as dead. + */ + if (inst->dst.file == PROGRAM_TEMPORARY && + !inst->dst.reladdr && + !inst->saturate) { + for (int c = 0; c < 4; c++) { + if (inst->dst.writemask & (1 << c)) { + if (writes[4 * inst->dst.index + c]) { + if (write_level[4 * inst->dst.index + c] < level) + continue; + else + writes[4 * inst->dst.index + c]->dead_mask |= (1 << c); + } + writes[4 * inst->dst.index + c] = inst; + write_level[4 * inst->dst.index + c] = level; + } + } + } + } + + /* Anything still in the write array at this point is dead code. */ + for (int r = 0; r < this->next_temp; r++) { + for (int c = 0; c < 4; c++) { + glsl_to_tgsi_instruction *inst = writes[4 * r + c]; + if (inst) + inst->dead_mask |= (1 << c); + } + } + + /* Now actually remove the instructions that are completely dead and update + * the writemask of other instructions with dead channels. + */ + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + if (!inst->dead_mask || !inst->dst.writemask) + continue; + else if (inst->dead_mask == inst->dst.writemask) { + iter.remove(); + delete inst; + removed++; + } else + inst->dst.writemask &= ~(inst->dead_mask); + } + + ralloc_free(write_level); + ralloc_free(writes); + + return removed; +} + /* Merges temporary registers together where possible to reduce the number of * registers needed to run a program. * @@ -3233,6 +3648,209 @@ glsl_to_tgsi_visitor::renumber_registers(void) this->next_temp = new_index; } +/** + * Returns a fragment program which implements the current pixel transfer ops. + * Based on get_pixel_transfer_program in st_atom_pixeltransfer.c. + */ +extern "C" void +get_pixel_transfer_visitor(struct st_fragment_program *fp, + glsl_to_tgsi_visitor *original, + int scale_and_bias, int pixel_maps) +{ + glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor(); + struct st_context *st = st_context(original->ctx); + struct gl_program *prog = &fp->Base.Base; + struct gl_program_parameter_list *params = _mesa_new_parameter_list(); + st_src_reg coord, src0; + st_dst_reg dst0; + glsl_to_tgsi_instruction *inst; + + /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */ + v->ctx = original->ctx; + v->prog = prog; + v->glsl_version = original->glsl_version; + v->native_integers = original->native_integers; + v->options = original->options; + v->next_temp = original->next_temp; + v->num_address_regs = original->num_address_regs; + v->samplers_used = prog->SamplersUsed = original->samplers_used; + v->indirect_addr_temps = original->indirect_addr_temps; + v->indirect_addr_consts = original->indirect_addr_consts; + memcpy(&v->immediates, &original->immediates, sizeof(v->immediates)); + + /* + * Get initial pixel color from the texture. + * TEX colorTemp, fragment.texcoord[0], texture[0], 2D; + */ + coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); + src0 = v->get_temp(glsl_type::vec4_type); + dst0 = st_dst_reg(src0); + inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); + inst->sampler = 0; + inst->tex_target = TEXTURE_2D_INDEX; + + prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0); + prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */ + v->samplers_used |= (1 << 0); + + if (scale_and_bias) { + static const gl_state_index scale_state[STATE_LENGTH] = + { STATE_INTERNAL, STATE_PT_SCALE, + (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 }; + static const gl_state_index bias_state[STATE_LENGTH] = + { STATE_INTERNAL, STATE_PT_BIAS, + (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 }; + GLint scale_p, bias_p; + st_src_reg scale, bias; + + scale_p = _mesa_add_state_reference(params, scale_state); + bias_p = _mesa_add_state_reference(params, bias_state); + + /* MAD colorTemp, colorTemp, scale, bias; */ + scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT); + bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT); + inst = v->emit(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias); + } + + if (pixel_maps) { + st_src_reg temp = v->get_temp(glsl_type::vec4_type); + st_dst_reg temp_dst = st_dst_reg(temp); + + assert(st->pixel_xfer.pixelmap_texture); + + /* With a little effort, we can do four pixel map look-ups with + * two TEX instructions: + */ + + /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */ + temp_dst.writemask = WRITEMASK_XY; /* write R,G */ + inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0); + inst->sampler = 1; + inst->tex_target = TEXTURE_2D_INDEX; + + /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */ + src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W); + temp_dst.writemask = WRITEMASK_ZW; /* write B,A */ + inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0); + inst->sampler = 1; + inst->tex_target = TEXTURE_2D_INDEX; + + prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */ + v->samplers_used |= (1 << 1); + + /* MOV colorTemp, temp; */ + inst = v->emit(NULL, TGSI_OPCODE_MOV, dst0, temp); + } + + /* Now copy the instructions from the original glsl_to_tgsi_visitor into the + * new visitor. */ + foreach_iter(exec_list_iterator, iter, original->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + st_src_reg src_regs[3]; + + if (inst->dst.file == PROGRAM_OUTPUT) + prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index); + + for (int i=0; i<3; i++) { + src_regs[i] = inst->src[i]; + if (src_regs[i].file == PROGRAM_INPUT && + src_regs[i].index == FRAG_ATTRIB_COL0) + { + src_regs[i].file = PROGRAM_TEMPORARY; + src_regs[i].index = src0.index; + } + else if (src_regs[i].file == PROGRAM_INPUT) + prog->InputsRead |= (1 << src_regs[i].index); + } + + v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); + } + + /* Make modifications to fragment program info. */ + prog->Parameters = _mesa_combine_parameter_lists(params, + original->prog->Parameters); + prog->Attributes = _mesa_clone_parameter_list(original->prog->Attributes); + prog->Varying = _mesa_clone_parameter_list(original->prog->Varying); + _mesa_free_parameter_list(params); + count_resources(v, prog); + fp->glsl_to_tgsi = v; +} + +/** + * Make fragment program for glBitmap: + * Sample the texture and kill the fragment if the bit is 0. + * This program will be combined with the user's fragment program. + * + * Based on make_bitmap_fragment_program in st_cb_bitmap.c. + */ +extern "C" void +get_bitmap_visitor(struct st_fragment_program *fp, + glsl_to_tgsi_visitor *original, int samplerIndex) +{ + glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor(); + struct st_context *st = st_context(original->ctx); + struct gl_program *prog = &fp->Base.Base; + st_src_reg coord, src0; + st_dst_reg dst0; + glsl_to_tgsi_instruction *inst; + + /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */ + v->ctx = original->ctx; + v->prog = prog; + v->glsl_version = original->glsl_version; + v->native_integers = original->native_integers; + v->options = original->options; + v->next_temp = original->next_temp; + v->num_address_regs = original->num_address_regs; + v->samplers_used = prog->SamplersUsed = original->samplers_used; + v->indirect_addr_temps = original->indirect_addr_temps; + v->indirect_addr_consts = original->indirect_addr_consts; + memcpy(&v->immediates, &original->immediates, sizeof(v->immediates)); + + /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */ + coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); + src0 = v->get_temp(glsl_type::vec4_type); + dst0 = st_dst_reg(src0); + inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); + inst->sampler = samplerIndex; + inst->tex_target = TEXTURE_2D_INDEX; + + prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0); + prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */ + v->samplers_used |= (1 << samplerIndex); + + /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */ + src0.negate = NEGATE_XYZW; + if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM) + src0.swizzle = SWIZZLE_XXXX; + inst = v->emit(NULL, TGSI_OPCODE_KIL, undef_dst, src0); + + /* Now copy the instructions from the original glsl_to_tgsi_visitor into the + * new visitor. */ + foreach_iter(exec_list_iterator, iter, original->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + st_src_reg src_regs[3]; + + if (inst->dst.file == PROGRAM_OUTPUT) + prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index); + + for (int i=0; i<3; i++) { + src_regs[i] = inst->src[i]; + if (src_regs[i].file == PROGRAM_INPUT) + prog->InputsRead |= (1 << src_regs[i].index); + } + + v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); + } + + /* Make modifications to fragment program info. */ + prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters); + prog->Attributes = _mesa_clone_parameter_list(original->prog->Attributes); + prog->Varying = _mesa_clone_parameter_list(original->prog->Varying); + count_resources(v, prog); + fp->glsl_to_tgsi = v; +} + /* ------------------------- TGSI conversion stuff -------------------------- */ struct label { unsigned branch_target; @@ -3245,8 +3863,9 @@ struct label { struct st_translate { struct ureg_program *ureg; - struct ureg_dst temps[MAX_PROGRAM_TEMPS]; + struct ureg_dst temps[MAX_TEMPS]; struct ureg_src *constants; + struct ureg_src *immediates; struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; struct ureg_dst address[1]; @@ -3295,15 +3914,14 @@ static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = { * of labels built here and patch the TGSI code with the actual * location of each label. */ -static unsigned *get_label( struct st_translate *t, - unsigned branch_target ) +static unsigned *get_label(struct st_translate *t, unsigned branch_target) { unsigned i; if (t->labels_count + 1 >= t->labels_size) { t->labels_size = 1 << (util_logbase2(t->labels_size) + 1); t->labels = (struct label *)realloc(t->labels, - t->labels_size * sizeof t->labels[0]); + t->labels_size * sizeof(struct label)); if (t->labels == NULL) { static unsigned dummy; t->error = TRUE; @@ -3317,17 +3935,16 @@ static unsigned *get_label( struct st_translate *t, } /** - * Called prior to emitting the TGSI code for each Mesa instruction. + * Called prior to emitting the TGSI code for each instruction. * Allocate additional space for instructions if needed. - * Update the insn[] array so the next Mesa instruction points to + * Update the insn[] array so the next glsl_to_tgsi_instruction points to * the next TGSI instruction. */ -static void set_insn_start( struct st_translate *t, - unsigned start ) +static void set_insn_start(struct st_translate *t, unsigned start) { if (t->insn_count + 1 >= t->insn_size) { t->insn_size = 1 << (util_logbase2(t->insn_size) + 1); - t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof t->insn[0]); + t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof(t->insn[0])); if (t->insn == NULL) { t->error = TRUE; return; @@ -3338,20 +3955,45 @@ static void set_insn_start( struct st_translate *t, } /** - * Map a Mesa dst register to a TGSI ureg_dst register. + * Map a glsl_to_tgsi constant/immediate to a TGSI immediate. + */ +static struct ureg_src +emit_immediate(struct st_translate *t, + gl_constant_value values[4], + int type, int size) +{ + struct ureg_program *ureg = t->ureg; + + switch(type) + { + case GL_FLOAT: + return ureg_DECL_immediate(ureg, &values[0].f, size); + case GL_INT: + return ureg_DECL_immediate_int(ureg, &values[0].i, size); + case GL_UNSIGNED_INT: + case GL_BOOL: + return ureg_DECL_immediate_uint(ureg, &values[0].u, size); + default: + assert(!"should not get here - type must be float, int, uint, or bool"); + return ureg_src_undef(); + } +} + +/** + * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register. */ static struct ureg_dst -dst_register( struct st_translate *t, - gl_register_file file, - GLuint index ) +dst_register(struct st_translate *t, + gl_register_file file, + GLuint index) { - switch( file ) { + switch(file) { case PROGRAM_UNDEFINED: return ureg_dst_undef(); case PROGRAM_TEMPORARY: if (ureg_dst_is_undef(t->temps[index])) - t->temps[index] = ureg_DECL_temporary( t->ureg ); + t->temps[index] = ureg_DECL_temporary(t->ureg); return t->temps[index]; @@ -3374,20 +4016,20 @@ dst_register( struct st_translate *t, return t->address[index]; default: - debug_assert( 0 ); + assert(!"unknown dst register file"); return ureg_dst_undef(); } } /** - * Map a Mesa src register to a TGSI ureg_src register. + * Map a glsl_to_tgsi src register to a TGSI ureg_src register. */ static struct ureg_src -src_register( struct st_translate *t, - gl_register_file file, - GLuint index ) +src_register(struct st_translate *t, + gl_register_file file, + GLuint index) { - switch( file ) { + switch(file) { case PROGRAM_UNDEFINED: return ureg_src_undef(); @@ -3395,7 +4037,7 @@ src_register( struct st_translate *t, assert(index >= 0); assert(index < Elements(t->temps)); if (ureg_dst_is_undef(t->temps[index])) - t->temps[index] = ureg_DECL_temporary( t->ureg ); + t->temps[index] = ureg_DECL_temporary(t->ureg); return ureg_src(t->temps[index]); case PROGRAM_NAMED_PARAM: @@ -3407,10 +4049,13 @@ src_register( struct st_translate *t, case PROGRAM_STATE_VAR: case PROGRAM_CONSTANT: /* ie, immediate */ if (index < 0) - return ureg_DECL_constant( t->ureg, 0 ); + return ureg_DECL_constant(t->ureg, 0); else return t->constants[index]; + case PROGRAM_IMMEDIATE: + return t->immediates[index]; + case PROGRAM_INPUT: assert(t->inputMapping[index] < Elements(t->inputs)); return t->inputs[t->inputMapping[index]]; @@ -3427,7 +4072,7 @@ src_register( struct st_translate *t, return t->systemValues[index]; default: - debug_assert( 0 ); + assert(!"unknown src register file"); return ureg_src_undef(); } } @@ -3436,22 +4081,21 @@ src_register( struct st_translate *t, * Create a TGSI ureg_dst register from an st_dst_reg. */ static struct ureg_dst -translate_dst( struct st_translate *t, - const st_dst_reg *dst_reg, - boolean saturate ) +translate_dst(struct st_translate *t, + const st_dst_reg *dst_reg, + bool saturate) { - struct ureg_dst dst = dst_register( t, - dst_reg->file, - dst_reg->index ); + struct ureg_dst dst = dst_register(t, + dst_reg->file, + dst_reg->index); - dst = ureg_writemask( dst, - dst_reg->writemask ); + dst = ureg_writemask(dst, dst_reg->writemask); if (saturate) - dst = ureg_saturate( dst ); + dst = ureg_saturate(dst); if (dst_reg->reladdr != NULL) - dst = ureg_dst_indirect( dst, ureg_src(t->address[0]) ); + dst = ureg_dst_indirect(dst, ureg_src(t->address[0])); return dst; } @@ -3460,16 +4104,15 @@ translate_dst( struct st_translate *t, * Create a TGSI ureg_src register from an st_src_reg. */ static struct ureg_src -translate_src( struct st_translate *t, - const st_src_reg *src_reg ) +translate_src(struct st_translate *t, const st_src_reg *src_reg) { - struct ureg_src src = src_register( t, src_reg->file, src_reg->index ); + struct ureg_src src = src_register(t, src_reg->file, src_reg->index); - src = ureg_swizzle( src, - GET_SWZ( src_reg->swizzle, 0 ) & 0x3, - GET_SWZ( src_reg->swizzle, 1 ) & 0x3, - GET_SWZ( src_reg->swizzle, 2 ) & 0x3, - GET_SWZ( src_reg->swizzle, 3 ) & 0x3); + src = ureg_swizzle(src, + GET_SWZ(src_reg->swizzle, 0) & 0x3, + GET_SWZ(src_reg->swizzle, 1) & 0x3, + GET_SWZ(src_reg->swizzle, 2) & 0x3, + GET_SWZ(src_reg->swizzle, 3) & 0x3); if ((src_reg->negate & 0xf) == NEGATE_XYZW) src = ureg_negate(src); @@ -3501,8 +4144,8 @@ translate_src( struct st_translate *t, } static void -compile_tgsi_instruction(struct st_translate *t, - const struct glsl_to_tgsi_instruction *inst) +compile_tgsi_instruction(struct st_translate *t, + const struct glsl_to_tgsi_instruction *inst) { struct ureg_program *ureg = t->ureg; GLuint i; @@ -3511,29 +4154,29 @@ compile_tgsi_instruction(struct st_translate *t, unsigned num_dst; unsigned num_src; - num_dst = num_inst_dst_regs( inst->op ); - num_src = num_inst_src_regs( inst->op ); + num_dst = num_inst_dst_regs(inst->op); + num_src = num_inst_src_regs(inst->op); if (num_dst) - dst[0] = translate_dst( t, - &inst->dst, - inst->saturate); + dst[0] = translate_dst(t, + &inst->dst, + inst->saturate); for (i = 0; i < num_src; i++) - src[i] = translate_src( t, &inst->src[i] ); + src[i] = translate_src(t, &inst->src[i]); - switch( inst->op ) { + switch(inst->op) { case TGSI_OPCODE_BGNLOOP: case TGSI_OPCODE_CAL: case TGSI_OPCODE_ELSE: case TGSI_OPCODE_ENDLOOP: case TGSI_OPCODE_IF: - debug_assert(num_dst == 0); - ureg_label_insn( ureg, - inst->op, - src, num_src, - get_label( t, - inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0 )); + assert(num_dst == 0); + ureg_label_insn(ureg, + inst->op, + src, num_src, + get_label(t, + inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0)); return; case TGSI_OPCODE_TEX: @@ -3542,35 +4185,23 @@ compile_tgsi_instruction(struct st_translate *t, case TGSI_OPCODE_TXL: case TGSI_OPCODE_TXP: src[num_src++] = t->samplers[inst->sampler]; - ureg_tex_insn( ureg, - inst->op, - dst, num_dst, - translate_texture_target( inst->tex_target, - inst->tex_shadow ), - src, num_src ); + ureg_tex_insn(ureg, + inst->op, + dst, num_dst, + translate_texture_target(inst->tex_target, inst->tex_shadow), + src, num_src); return; case TGSI_OPCODE_SCS: - dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY ); - ureg_insn( ureg, - inst->op, - dst, num_dst, - src, num_src ); - break; - - case TGSI_OPCODE_XPD: - dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ ); - ureg_insn( ureg, - inst->op, - dst, num_dst, - src, num_src ); + dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY); + ureg_insn(ureg, inst->op, dst, num_dst, src, num_src); break; default: - ureg_insn( ureg, - inst->op, - dst, num_dst, - src, num_src ); + ureg_insn(ureg, + inst->op, + dst, num_dst, + src, num_src); break; } } @@ -3580,9 +4211,9 @@ compile_tgsi_instruction(struct st_translate *t, * Basically, add (adjX, adjY) to the fragment position. */ static void -emit_adjusted_wpos( struct st_translate *t, - const struct gl_program *program, - GLfloat adjX, GLfloat adjY) +emit_adjusted_wpos(struct st_translate *t, + const struct gl_program *program, + float adjX, float adjY) { struct ureg_program *ureg = t->ureg; struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg); @@ -3604,9 +4235,9 @@ emit_adjusted_wpos( struct st_translate *t, * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM). */ static void -emit_wpos_inversion( struct st_translate *t, - const struct gl_program *program, - boolean invert) +emit_wpos_inversion(struct st_translate *t, + const struct gl_program *program, + bool invert) { struct ureg_program *ureg = t->ureg; @@ -3625,7 +4256,7 @@ emit_wpos_inversion( struct st_translate *t, unsigned wposTransConst = _mesa_add_state_reference(program->Parameters, wposTransformState); - struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst ); + struct ureg_src wpostrans = ureg_DECL_constant(ureg, wposTransConst); struct ureg_dst wpos_temp; struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; @@ -3634,26 +4265,26 @@ emit_wpos_inversion( struct st_translate *t, if (wpos_input.File == TGSI_FILE_TEMPORARY) wpos_temp = ureg_dst(wpos_input); else { - wpos_temp = ureg_DECL_temporary( ureg ); - ureg_MOV( ureg, wpos_temp, wpos_input ); + wpos_temp = ureg_DECL_temporary(ureg); + ureg_MOV(ureg, wpos_temp, wpos_input); } if (invert) { /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy */ - ureg_MAD( ureg, - ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), - wpos_input, - ureg_scalar(wpostrans, 0), - ureg_scalar(wpostrans, 1)); + ureg_MAD(ureg, + ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y), + wpos_input, + ureg_scalar(wpostrans, 0), + ureg_scalar(wpostrans, 1)); } else { /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww */ - ureg_MAD( ureg, - ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), - wpos_input, - ureg_scalar(wpostrans, 2), - ureg_scalar(wpostrans, 3)); + ureg_MAD(ureg, + ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y), + wpos_input, + ureg_scalar(wpostrans, 2), + ureg_scalar(wpostrans, 3)); } /* Use wpos_temp as position input from here on: @@ -3797,7 +4428,7 @@ st_translate_program( const GLuint outputMapping[], const ubyte outputSemanticName[], const ubyte outputSemanticIndex[], - boolean passthrough_edgeflags ) + boolean passthrough_edgeflags) { struct st_translate translate, *t; unsigned i; @@ -3843,27 +4474,24 @@ st_translate_program( for (i = 0; i < numOutputs; i++) { switch (outputSemanticName[i]) { case TGSI_SEMANTIC_POSITION: - t->outputs[i] = ureg_DECL_output( ureg, - TGSI_SEMANTIC_POSITION, /* Z / Depth */ - outputSemanticIndex[i] ); - - t->outputs[i] = ureg_writemask( t->outputs[i], - TGSI_WRITEMASK_Z ); + t->outputs[i] = ureg_DECL_output(ureg, + TGSI_SEMANTIC_POSITION, /* Z/Depth */ + outputSemanticIndex[i]); + t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z); break; case TGSI_SEMANTIC_STENCIL: - t->outputs[i] = ureg_DECL_output( ureg, - TGSI_SEMANTIC_STENCIL, /* Stencil */ - outputSemanticIndex[i] ); - t->outputs[i] = ureg_writemask( t->outputs[i], - TGSI_WRITEMASK_Y ); + t->outputs[i] = ureg_DECL_output(ureg, + TGSI_SEMANTIC_STENCIL, /* Stencil */ + outputSemanticIndex[i]); + t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y); break; case TGSI_SEMANTIC_COLOR: - t->outputs[i] = ureg_DECL_output( ureg, - TGSI_SEMANTIC_COLOR, - outputSemanticIndex[i] ); + t->outputs[i] = ureg_DECL_output(ureg, + TGSI_SEMANTIC_COLOR, + outputSemanticIndex[i]); break; default: - debug_assert(0); + assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR"); return PIPE_ERROR_BAD_INPUT; } } @@ -3877,9 +4505,9 @@ st_translate_program( } for (i = 0; i < numOutputs; i++) { - t->outputs[i] = ureg_DECL_output( ureg, - outputSemanticName[i], - outputSemanticIndex[i] ); + t->outputs[i] = ureg_DECL_output(ureg, + outputSemanticName[i], + outputSemanticIndex[i]); } } else { @@ -3890,9 +4518,9 @@ st_translate_program( } for (i = 0; i < numOutputs; i++) { - t->outputs[i] = ureg_DECL_output( ureg, - outputSemanticName[i], - outputSemanticIndex[i] ); + t->outputs[i] = ureg_DECL_output(ureg, + outputSemanticName[i], + outputSemanticIndex[i]); if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && proginfo->Id) { /* Writing to the point size result register requires special * handling to implement clamping. @@ -3906,8 +4534,8 @@ st_translate_program( unsigned pointSizeClampConst = _mesa_add_state_reference(proginfo->Parameters, pointSizeClampState); - struct ureg_dst psizregtemp = ureg_DECL_temporary( ureg ); - t->pointSizeConst = ureg_DECL_constant( ureg, pointSizeClampConst ); + struct ureg_dst psizregtemp = ureg_DECL_temporary(ureg); + t->pointSizeConst = ureg_DECL_constant(ureg, pointSizeClampConst); t->pointSizeResult = t->outputs[i]; t->pointSizeOutIndex = i; t->outputs[i] = psizregtemp; @@ -3920,8 +4548,8 @@ st_translate_program( /* Declare address register. */ if (program->num_address_regs > 0) { - debug_assert( program->num_address_regs == 1 ); - t->address[0] = ureg_DECL_address( ureg ); + assert(program->num_address_regs == 1); + t->address[0] = ureg_DECL_address(ureg); } /* Declare misc input registers @@ -3946,16 +4574,15 @@ st_translate_program( */ for (i = 0; i < (unsigned)program->next_temp; i++) { /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */ - t->temps[i] = ureg_DECL_temporary( t->ureg ); + t->temps[i] = ureg_DECL_temporary(t->ureg); } } - /* Emit constants and immediates. Mesa uses a single index space - * for these, so we put all the translated regs in t->constants. - * XXX: this entire if block depends on proginfo->Parameters from Mesa IR + /* Emit constants and uniforms. TGSI uses a single index space for these, + * so we put all the translated regs in t->constants. */ if (proginfo->Parameters) { - t->constants = (struct ureg_src *)CALLOC( proginfo->Parameters->NumParameters * sizeof t->constants[0] ); + t->constants = (struct ureg_src *)CALLOC(proginfo->Parameters->NumParameters * sizeof(t->constants[0])); if (t->constants == NULL) { ret = PIPE_ERROR_OUT_OF_MEMORY; goto out; @@ -3968,65 +4595,55 @@ st_translate_program( case PROGRAM_STATE_VAR: case PROGRAM_NAMED_PARAM: case PROGRAM_UNIFORM: - t->constants[i] = ureg_DECL_constant( ureg, i ); + t->constants[i] = ureg_DECL_constant(ureg, i); break; - /* Emit immediates only when there's no indirect addressing of - * the const buffer. - * FIXME: Be smarter and recognize param arrays: - * indirect addressing is only valid within the referenced - * array. - */ + /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect + * addressing of the const buffer. + * FIXME: Be smarter and recognize param arrays: + * indirect addressing is only valid within the referenced + * array. + */ case PROGRAM_CONSTANT: if (program->indirect_addr_consts) - t->constants[i] = ureg_DECL_constant( ureg, i ); + t->constants[i] = ureg_DECL_constant(ureg, i); else - switch(proginfo->Parameters->Parameters[i].DataType) - { - case GL_FLOAT: - case GL_FLOAT_VEC2: - case GL_FLOAT_VEC3: - case GL_FLOAT_VEC4: - t->constants[i] = ureg_DECL_immediate(ureg, (float *)proginfo->Parameters->ParameterValues[i], 4); - break; - case GL_INT: - case GL_INT_VEC2: - case GL_INT_VEC3: - case GL_INT_VEC4: - t->constants[i] = ureg_DECL_immediate_int(ureg, (int *)proginfo->Parameters->ParameterValues[i], 4); - break; - case GL_UNSIGNED_INT: - case GL_UNSIGNED_INT_VEC2: - case GL_UNSIGNED_INT_VEC3: - case GL_UNSIGNED_INT_VEC4: - case GL_BOOL: - case GL_BOOL_VEC2: - case GL_BOOL_VEC3: - case GL_BOOL_VEC4: - t->constants[i] = ureg_DECL_immediate_uint(ureg, (unsigned *)proginfo->Parameters->ParameterValues[i], 4); - break; - default: - assert(!"should not get here"); - } + t->constants[i] = emit_immediate(t, + proginfo->Parameters->ParameterValues[i], + proginfo->Parameters->Parameters[i].DataType, + 4); break; default: break; } } } + + /* Emit immediate values. + */ + t->immediates = (struct ureg_src *)CALLOC(program->num_immediates * sizeof(struct ureg_src)); + if (t->immediates == NULL) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto out; + } + i = 0; + foreach_iter(exec_list_iterator, iter, program->immediates) { + immediate_storage *imm = (immediate_storage *)iter.get(); + t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size); + } /* texture samplers */ for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { if (program->samplers_used & (1 << i)) { - t->samplers[i] = ureg_DECL_sampler( ureg, i ); + t->samplers[i] = ureg_DECL_sampler(ureg, i); } } /* Emit each instruction in turn: */ foreach_iter(exec_list_iterator, iter, program->instructions) { - set_insn_start( t, ureg_get_instruction_number( ureg )); - compile_tgsi_instruction( t, (glsl_to_tgsi_instruction *)iter.get() ); + set_insn_start(t, ureg_get_instruction_number(ureg)); + compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get()); if (t->prevInstWrotePointSize && proginfo->Id) { /* The previous instruction wrote to the (fake) vertex point size @@ -4036,14 +4653,14 @@ st_translate_program( * Note that we can't do this easily at the end of program due to * possible early return. */ - set_insn_start( t, ureg_get_instruction_number( ureg )); - ureg_MAX( t->ureg, - ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X), - ureg_src(t->outputs[t->pointSizeOutIndex]), - ureg_swizzle(t->pointSizeConst, 1,1,1,1)); - ureg_MIN( t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X), - ureg_src(t->outputs[t->pointSizeOutIndex]), - ureg_swizzle(t->pointSizeConst, 2,2,2,2)); + set_insn_start(t, ureg_get_instruction_number(ureg)); + ureg_MAX(t->ureg, + ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X), + ureg_src(t->outputs[t->pointSizeOutIndex]), + ureg_swizzle(t->pointSizeConst, 1,1,1,1)); + ureg_MIN(t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X), + ureg_src(t->outputs[t->pointSizeOutIndex]), + ureg_swizzle(t->pointSizeConst, 2,2,2,2)); } t->prevInstWrotePointSize = GL_FALSE; } @@ -4051,15 +4668,15 @@ st_translate_program( /* Fix up all emitted labels: */ for (i = 0; i < t->labels_count; i++) { - ureg_fixup_label( ureg, - t->labels[i].token, - t->insn[t->labels[i].branch_target] ); + ureg_fixup_label(ureg, t->labels[i].token, + t->insn[t->labels[i].branch_target]); } out: FREE(t->insn); FREE(t->labels); FREE(t->constants); + FREE(t->immediates); if (t->error) { debug_printf("%s: translate error flag set\n", __FUNCTION__); @@ -4082,7 +4699,7 @@ get_mesa_program(struct gl_context *ctx, struct gl_program *prog; GLenum target; const char *target_string; - GLboolean progress; + bool progress; struct gl_shader_compiler_options *options = &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)]; @@ -4117,6 +4734,7 @@ get_mesa_program(struct gl_context *ctx, v->shader_program = shader_program; v->options = options; v->glsl_version = ctx->Const.GLSLVersion; + v->native_integers = ctx->Const.NativeIntegers; add_uniforms_to_parameters_list(shader_program, shader, prog); @@ -4170,17 +4788,20 @@ get_mesa_program(struct gl_context *ctx, v->remove_output_reads(PROGRAM_OUTPUT); if (target == GL_VERTEX_PROGRAM_ARB) v->remove_output_reads(PROGRAM_VARYING); + + /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ + v->simplify_cmp(); + v->copy_propagate(); + while (v->eliminate_dead_code_advanced()); - /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. - * FIXME: These passes to optimize temporary registers don't work when there + /* FIXME: These passes to optimize temporary registers don't work when there * is indirect addressing of the temporary register space. We need proper * array support so that we don't have to give up these passes in every * shader that uses arrays. */ if (!v->indirect_addr_temps) { - v->copy_propagate(); - v->merge_registers(); v->eliminate_dead_code(); + v->merge_registers(); v->renumber_registers(); } @@ -4293,7 +4914,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress; - progress = lower_quadop_vector(ir, true) || progress; + progress = lower_quadop_vector(ir, false) || progress; if (options->EmitNoIfs) { progress = lower_discard(ir) || progress;