X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fstate_tracker%2Fst_glsl_to_tgsi.cpp;h=60a4e2831a415e4337b4359d95c77ece5d82cbfe;hb=bb4c5d72d7c7cb1d9e7016e2c07c36875f30011a;hp=3a69a439822cc2b815ea7ef1e7c96165fe344e8b;hpb=f751730ad003bb19ce85bc4d0abddaf40edde6c1;p=mesa.git diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 3a69a439822..60a4e2831a4 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -42,17 +42,17 @@ #include "ir_optimization.h" #include "ast.h" -extern "C" { #include "main/mtypes.h" -#include "main/shaderapi.h" #include "main/shaderobj.h" -#include "main/uniforms.h" #include "program/hash_table.h" + +extern "C" { +#include "main/shaderapi.h" +#include "main/uniforms.h" #include "program/prog_instruction.h" #include "program/prog_optimize.h" #include "program/prog_print.h" #include "program/program.h" -#include "program/prog_uniform.h" #include "program/prog_parameter.h" #include "program/sampler.h" @@ -78,8 +78,17 @@ extern "C" { (1 << PROGRAM_CONSTANT) | \ (1 << PROGRAM_UNIFORM)) +/** + * Maximum number of temporary registers. + * + * It is too big for stack allocated arrays -- it will cause stack overflow on + * Windows and likely Mac OS X. + */ #define MAX_TEMPS 4096 +/* will be 4 for GLSL 4.00 */ +#define MAX_GLSL_TEXTURE_OFFSET 1 + class st_src_reg; class st_dst_reg; @@ -174,7 +183,7 @@ st_src_reg::st_src_reg(st_dst_reg reg) this->index = reg.index; this->swizzle = SWIZZLE_XYZW; this->negate = 0; - this->reladdr = NULL; + this->reladdr = reg.reladdr; } st_dst_reg::st_dst_reg(st_src_reg reg) @@ -211,6 +220,8 @@ public: int sampler; /**< sampler index */ int tex_target; /**< One of TEXTURE_*_INDEX */ GLboolean tex_shadow; + struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET]; + unsigned tex_offset_num_offset; int dead_mask; /**< Used in dead code elimination */ class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */ @@ -229,6 +240,20 @@ public: ir_variable *var; /* variable that maps to this, if any */ }; +class immediate_storage : public exec_node { +public: + immediate_storage(gl_constant_value *values, int size, int type) + { + memcpy(this->values, values, size * sizeof(gl_constant_value)); + this->size = size; + this->type = type; + } + + gl_constant_value values[4]; + int size; /**< Number of components (1-4) */ + int type; /**< GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */ +}; + class function_entry : public exec_node { public: ir_function_signature *sig; @@ -272,7 +297,6 @@ public: struct gl_program *prog; struct gl_shader_program *shader_program; struct gl_shader_compiler_options *options; - struct gl_program_parameter_list *immediates; int next_temp; @@ -280,11 +304,16 @@ public: int samplers_used; bool indirect_addr_temps; bool indirect_addr_consts; + int num_clip_distances; int glsl_version; + bool native_integers; variable_storage *find_variable_storage(ir_variable *var); + int add_constant(gl_register_file file, gl_constant_value values[4], + int size, int datatype, GLuint *swizzle_out); + function_entry *get_function_signature(ir_function_signature *sig); st_src_reg get_temp(const glsl_type *type); @@ -326,6 +355,10 @@ public: /** List of variable_storage */ exec_list variables; + /** List of immediate_storage */ + exec_list immediates; + unsigned num_immediates; + /** List of function_entry */ exec_list function_signatures; int next_signature_id; @@ -352,11 +385,11 @@ public: /** * Emit the correct dot-product instruction for the type of arguments */ - void emit_dp(ir_instruction *ir, - st_dst_reg dst, - st_src_reg src0, - st_src_reg src1, - unsigned elements); + glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir, + st_dst_reg dst, + st_src_reg src0, + st_src_reg src1, + unsigned elements); void emit_scalar(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0); @@ -364,20 +397,23 @@ public: void emit_scalar(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0, st_src_reg src1); + void try_emit_float_set(ir_instruction *ir, unsigned op, st_dst_reg dst); + void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0); void emit_scs(ir_instruction *ir, unsigned op, st_dst_reg dst, const st_src_reg &src); - GLboolean try_emit_mad(ir_expression *ir, - int mul_operand); - GLboolean try_emit_sat(ir_expression *ir); + bool try_emit_mad(ir_expression *ir, + int mul_operand); + bool try_emit_mad_for_and_not(ir_expression *ir, + int mul_operand); + bool try_emit_sat(ir_expression *ir); void emit_swz(ir_expression *ir); bool process_move_condition(ir_rvalue *ir); - void remove_output_reads(gl_register_file type); void simplify_cmp(void); void rename_temp_register(int index, int new_index); @@ -489,7 +525,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, inst->function = NULL; - if (op == TGSI_OPCODE_ARL) + if (op == TGSI_OPCODE_ARL || op == TGSI_OPCODE_UARL) this->num_address_regs = 1; /* Update indirect addressing status used by TGSI */ @@ -539,7 +575,10 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, } this->instructions.push_tail(inst); - + + if (native_integers) + try_emit_float_set(ir, op, dst); + return inst; } @@ -565,11 +604,28 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op) return emit(ir, op, undef_dst, undef_src, undef_src, undef_src); } + /** + * Emits the code to convert the result of float SET instructions to integers. + */ +void +glsl_to_tgsi_visitor::try_emit_float_set(ir_instruction *ir, unsigned op, + st_dst_reg dst) +{ + if ((op == TGSI_OPCODE_SEQ || + op == TGSI_OPCODE_SNE || + op == TGSI_OPCODE_SGE || + op == TGSI_OPCODE_SLT)) + { + st_src_reg src = st_src_reg(dst); + src.negate = ~src.negate; + dst.type = GLSL_TYPE_FLOAT; + emit(ir, TGSI_OPCODE_F2I, dst, src); + } +} + /** * Determines whether to use an integer, unsigned integer, or float opcode * based on the operands and input opcode, then emits the result. - * - * TODO: type checking for remaining TGSI opcodes */ unsigned glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, @@ -580,8 +636,8 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT) type = GLSL_TYPE_FLOAT; - else if (glsl_version >= 130) - type = src0.type; + else if (native_integers) + type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type; #define case4(c, f, i, u) \ case TGSI_OPCODE_##c: \ @@ -607,12 +663,10 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, case3(SGE, ISGE, USGE); case3(SLT, ISLT, USLT); - case2iu(SHL, SHL); case2iu(ISHR, USHR); - case2iu(NOT, NOT); - case2iu(AND, AND); - case2iu(OR, OR); - case2iu(XOR, XOR); + + case2fi(SSG, ISSG); + case3(ABS, IABS, IABS); default: break; } @@ -621,7 +675,7 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, return op; } -void +glsl_to_tgsi_instruction * glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir, st_dst_reg dst, st_src_reg src0, st_src_reg src1, unsigned elements) @@ -630,7 +684,7 @@ glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir, TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4 }; - emit(ir, dot_opcodes[elements - 2], dst, src0, src1); + return emit(ir, dot_opcodes[elements - 2], dst, src0, src1); } /** @@ -701,16 +755,12 @@ void glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0) { - st_src_reg tmp = get_temp(glsl_type::float_type); + int op = TGSI_OPCODE_ARL; - if (src0.type == GLSL_TYPE_INT) - emit(NULL, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0); - else if (src0.type == GLSL_TYPE_UINT) - emit(NULL, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0); - else - tmp = src0; - - emit(NULL, TGSI_OPCODE_ARL, dst, tmp); + if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT) + op = TGSI_OPCODE_UARL; + + emit(NULL, op, dst, src0); } /** @@ -808,38 +858,71 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op, } } -struct st_src_reg +int +glsl_to_tgsi_visitor::add_constant(gl_register_file file, + gl_constant_value values[4], int size, int datatype, + GLuint *swizzle_out) +{ + if (file == PROGRAM_CONSTANT) { + return _mesa_add_typed_unnamed_constant(this->prog->Parameters, values, + size, datatype, swizzle_out); + } else { + int index = 0; + immediate_storage *entry; + assert(file == PROGRAM_IMMEDIATE); + + /* Search immediate storage to see if we already have an identical + * immediate that we can use instead of adding a duplicate entry. + */ + foreach_iter(exec_list_iterator, iter, this->immediates) { + entry = (immediate_storage *)iter.get(); + + if (entry->size == size && + entry->type == datatype && + !memcmp(entry->values, values, size * sizeof(gl_constant_value))) { + return index; + } + index++; + } + + /* Add this immediate to the list. */ + entry = new(mem_ctx) immediate_storage(values, size, datatype); + this->immediates.push_tail(entry); + this->num_immediates++; + return index; + } +} + +st_src_reg glsl_to_tgsi_visitor::st_src_reg_for_float(float val) { st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT); union gl_constant_value uval; uval.f = val; - src.index = _mesa_add_typed_unnamed_constant(this->immediates, &uval, 1, - GL_FLOAT, &src.swizzle); + src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle); return src; } -struct st_src_reg +st_src_reg glsl_to_tgsi_visitor::st_src_reg_for_int(int val) { st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT); union gl_constant_value uval; - assert(glsl_version >= 130); + assert(native_integers); uval.i = val; - src.index = _mesa_add_typed_unnamed_constant(this->immediates, &uval, 1, - GL_INT, &src.swizzle); + src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle); return src; } -struct st_src_reg +st_src_reg glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val) { - if (glsl_version >= 130) + if (native_integers) return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) : st_src_reg_for_int(val); else @@ -896,10 +979,8 @@ st_src_reg glsl_to_tgsi_visitor::get_temp(const glsl_type *type) { st_src_reg src; - int swizzle[4]; - int i; - src.type = glsl_version >= 130 ? type->base_type : GLSL_TYPE_FLOAT; + src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT; src.file = PROGRAM_TEMPORARY; src.index = next_temp; src.reladdr = NULL; @@ -908,12 +989,7 @@ glsl_to_tgsi_visitor::get_temp(const glsl_type *type) if (type->is_array() || type->is_record()) { src.swizzle = SWIZZLE_NOOP; } else { - for (i = 0; i < type->vector_elements; i++) - swizzle[i] = i; - for (; i < 4; i++) - swizzle[i] = type->vector_elements - 1; - src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], - swizzle[2], swizzle[3]); + src.swizzle = swizzle_for_size(type->vector_elements); } src.negate = 0; @@ -944,29 +1020,6 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) fp->OriginUpperLeft = ir->origin_upper_left; fp->PixelCenterInteger = ir->pixel_center_integer; - - } else if (strcmp(ir->name, "gl_FragDepth") == 0) { - struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; - switch (ir->depth_layout) { - case ir_depth_layout_none: - fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_NONE; - break; - case ir_depth_layout_any: - fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_ANY; - break; - case ir_depth_layout_greater: - fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_GREATER; - break; - case ir_depth_layout_less: - fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_LESS; - break; - case ir_depth_layout_unchanged: - fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_UNCHANGED; - break; - default: - assert(0); - break; - } } if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) { @@ -986,7 +1039,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) } } - struct variable_storage *storage; + variable_storage *storage; st_dst_reg dst; if (i == ir->num_state_slots) { /* We'll set the index later. */ @@ -1007,7 +1060,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) this->next_temp += type_size(ir->type); dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index, - glsl_version >= 130 ? ir->type->base_type : GLSL_TYPE_FLOAT)); + native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT)); } @@ -1023,7 +1076,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) } } else { st_src_reg src(PROGRAM_STATE_VAR, index, - glsl_version >= 130 ? ir->type->base_type : GLSL_TYPE_FLOAT); + native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT); src.swizzle = slots[i].swizzle; emit(ir, TGSI_OPCODE_MOV, dst, src); /* even a float takes up a whole vec4 reg in a struct/array. */ @@ -1137,7 +1190,7 @@ glsl_to_tgsi_visitor::visit(ir_function *ir) } } -GLboolean +bool glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) { int nonmul_operand = 1 - mul_operand; @@ -1163,7 +1216,47 @@ glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) return true; } -GLboolean +/** + * Emit MAD(a, -b, a) instead of AND(a, NOT(b)) + * + * The logic values are 1.0 for true and 0.0 for false. Logical-and is + * implemented using multiplication, and logical-or is implemented using + * addition. Logical-not can be implemented as (true - x), or (1.0 - x). + * As result, the logical expression (a & !b) can be rewritten as: + * + * - a * !b + * - a * (1 - b) + * - (a * 1) - (a * b) + * - a + -(a * b) + * - a + (a * -b) + * + * This final expression can be implemented as a single MAD(a, -b, a) + * instruction. + */ +bool +glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand) +{ + const int other_operand = 1 - try_operand; + st_src_reg a, b; + + ir_expression *expr = ir->operands[try_operand]->as_expression(); + if (!expr || expr->operation != ir_unop_logic_not) + return false; + + ir->operands[other_operand]->accept(this); + a = this->result; + expr->operands[0]->accept(this); + b = this->result; + + b.negate = ~b.negate; + + this->result = get_temp(ir->type); + emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a); + + return true; +} + +bool glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir) { /* Saturates were only introduced to vertex programs in @@ -1179,12 +1272,32 @@ glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir) sat_src->accept(this); st_src_reg src = this->result; - this->result = get_temp(ir->type); - st_dst_reg result_dst = st_dst_reg(this->result); - result_dst.writemask = (1 << ir->type->vector_elements) - 1; - glsl_to_tgsi_instruction *inst; - inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src); - inst->saturate = true; + /* If we generated an expression instruction into a temporary in + * processing the saturate's operand, apply the saturate to that + * instruction. Otherwise, generate a MOV to do the saturate. + * + * Note that we have to be careful to only do this optimization if + * the instruction in question was what generated src->result. For + * example, ir_dereference_array might generate a MUL instruction + * to create the reladdr, and return us a src reg using that + * reladdr. That MUL result is not the value we're trying to + * saturate. + */ + ir_expression *sat_src_expr = sat_src->as_expression(); + if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul || + sat_src_expr->operation == ir_binop_add || + sat_src_expr->operation == ir_binop_dot)) { + glsl_to_tgsi_instruction *new_inst; + new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); + new_inst->saturate = true; + } else { + this->result = get_temp(ir->type); + st_dst_reg result_dst = st_dst_reg(this->result); + result_dst.writemask = (1 << ir->type->vector_elements) - 1; + glsl_to_tgsi_instruction *inst; + inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src); + inst->saturate = true; + } return true; } @@ -1224,6 +1337,16 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) if (try_emit_mad(ir, 0)) return; } + + /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b)) + */ + if (ir->operation == ir_binop_logic_and) { + if (try_emit_mad_for_and_not(ir, 1)) + return; + if (try_emit_mad_for_and_not(ir, 0)) + return; + } + if (try_emit_sat(ir)) return; @@ -1269,11 +1392,20 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) switch (ir->operation) { case ir_unop_logic_not: - emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_type(result_dst.type, 0)); + if (result_dst.type != GLSL_TYPE_FLOAT) + emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]); + else { + /* Previously 'SEQ dst, src, 0.0' was used for this. However, many + * older GPUs implement SEQ using multiple instructions (i915 uses two + * SGE instructions and a MUL instruction). Since our logic values are + * 0.0 and 1.0, 1-x also implements !x. + */ + op[0].negate = ~op[0].negate; + emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0)); + } break; case ir_unop_neg: - assert(result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_INT); - if (result_dst.type == GLSL_TYPE_INT) + if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT) emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]); else { op[0].negate = ~op[0].negate; @@ -1281,7 +1413,6 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) } break; case ir_unop_abs: - assert(result_dst.type == GLSL_TYPE_FLOAT); emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]); break; case ir_unop_sign: @@ -1360,10 +1491,10 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]); break; case ir_binop_greater: - emit(ir, TGSI_OPCODE_SGT, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_SLT, result_dst, op[1], op[0]); break; case ir_binop_lequal: - emit(ir, TGSI_OPCODE_SLE, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_SGE, result_dst, op[1], op[0]); break; case ir_binop_gequal: emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]); @@ -1378,13 +1509,56 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) /* "==" operator producing a scalar boolean. */ if (ir->operands[0]->type->is_vector() || ir->operands[1]->type->is_vector()) { - st_src_reg temp = get_temp(glsl_version >= 130 ? + st_src_reg temp = get_temp(native_integers ? glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : glsl_type::vec4_type); - assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); - emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); - emit_dp(ir, result_dst, temp, temp, vector_elements); - emit(ir, TGSI_OPCODE_SEQ, result_dst, result_src, st_src_reg_for_float(0.0)); + + if (native_integers) { + st_dst_reg temp_dst = st_dst_reg(temp); + st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp); + + emit(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]); + + /* Emit 1-3 AND operations to combine the SEQ results. */ + switch (ir->operands[0]->type->vector_elements) { + case 2: + break; + case 3: + temp_dst.writemask = WRITEMASK_Y; + temp1.swizzle = SWIZZLE_YYYY; + temp2.swizzle = SWIZZLE_ZZZZ; + emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); + break; + case 4: + temp_dst.writemask = WRITEMASK_X; + temp1.swizzle = SWIZZLE_XXXX; + temp2.swizzle = SWIZZLE_YYYY; + emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); + temp_dst.writemask = WRITEMASK_Y; + temp1.swizzle = SWIZZLE_ZZZZ; + temp2.swizzle = SWIZZLE_WWWW; + emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); + } + + temp1.swizzle = SWIZZLE_XXXX; + temp2.swizzle = SWIZZLE_YYYY; + emit(ir, TGSI_OPCODE_AND, result_dst, temp1, temp2); + } else { + emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); + + /* After the dot-product, the value will be an integer on the + * range [0,4]. Zero becomes 1.0, and positive values become zero. + */ + emit_dp(ir, result_dst, temp, temp, vector_elements); + + /* Negating the result of the dot-product gives values on the range + * [-4, 0]. Zero becomes 1.0, and negative values become zero. + * This is achieved using SGE. + */ + st_src_reg sge_src = result_src; + sge_src.negate = ~sge_src.negate; + emit(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0)); + } } else { emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); } @@ -1393,38 +1567,143 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) /* "!=" operator producing a scalar boolean. */ if (ir->operands[0]->type->is_vector() || ir->operands[1]->type->is_vector()) { - st_src_reg temp = get_temp(glsl_version >= 130 ? + st_src_reg temp = get_temp(native_integers ? glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : glsl_type::vec4_type); - assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); - emit_dp(ir, result_dst, temp, temp, vector_elements); - emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); + + if (native_integers) { + st_dst_reg temp_dst = st_dst_reg(temp); + st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp); + + /* Emit 1-3 OR operations to combine the SNE results. */ + switch (ir->operands[0]->type->vector_elements) { + case 2: + break; + case 3: + temp_dst.writemask = WRITEMASK_Y; + temp1.swizzle = SWIZZLE_YYYY; + temp2.swizzle = SWIZZLE_ZZZZ; + emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); + break; + case 4: + temp_dst.writemask = WRITEMASK_X; + temp1.swizzle = SWIZZLE_XXXX; + temp2.swizzle = SWIZZLE_YYYY; + emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); + temp_dst.writemask = WRITEMASK_Y; + temp1.swizzle = SWIZZLE_ZZZZ; + temp2.swizzle = SWIZZLE_WWWW; + emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); + } + + temp1.swizzle = SWIZZLE_XXXX; + temp2.swizzle = SWIZZLE_YYYY; + emit(ir, TGSI_OPCODE_OR, result_dst, temp1, temp2); + } else { + /* After the dot-product, the value will be an integer on the + * range [0,4]. Zero stays zero, and positive values become 1.0. + */ + glsl_to_tgsi_instruction *const dp = + emit_dp(ir, result_dst, temp, temp, vector_elements); + if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { + /* The clamping to [0,1] can be done for free in the fragment + * shader with a saturate. + */ + dp->saturate = true; + } else { + /* Negating the result of the dot-product gives values on the range + * [-4, 0]. Zero stays zero, and negative values become 1.0. This + * achieved using SLT. + */ + st_src_reg slt_src = result_src; + slt_src.negate = ~slt_src.negate; + emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); + } + } } else { emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); } break; - case ir_unop_any: + case ir_unop_any: { assert(ir->operands[0]->type->is_vector()); - emit_dp(ir, result_dst, op[0], op[0], - ir->operands[0]->type->vector_elements); - emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); + + /* After the dot-product, the value will be an integer on the + * range [0,4]. Zero stays zero, and positive values become 1.0. + */ + glsl_to_tgsi_instruction *const dp = + emit_dp(ir, result_dst, op[0], op[0], + ir->operands[0]->type->vector_elements); + if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB && + result_dst.type == GLSL_TYPE_FLOAT) { + /* The clamping to [0,1] can be done for free in the fragment + * shader with a saturate. + */ + dp->saturate = true; + } else if (result_dst.type == GLSL_TYPE_FLOAT) { + /* Negating the result of the dot-product gives values on the range + * [-4, 0]. Zero stays zero, and negative values become 1.0. This + * is achieved using SLT. + */ + st_src_reg slt_src = result_src; + slt_src.negate = ~slt_src.negate; + emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); + } + else { + /* Use SNE 0 if integers are being used as boolean values. */ + emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0)); + } break; + } case ir_binop_logic_xor: - emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); + if (native_integers) + emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]); + else + emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); break; - case ir_binop_logic_or: - /* This could be a saturated add and skip the SNE. */ - emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); - emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); + case ir_binop_logic_or: { + if (native_integers) { + /* If integers are used as booleans, we can use an actual "or" + * instruction. + */ + assert(native_integers); + emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]); + } else { + /* After the addition, the value will be an integer on the + * range [0,2]. Zero stays zero, and positive values become 1.0. + */ + glsl_to_tgsi_instruction *add = + emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); + if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { + /* The clamping to [0,1] can be done for free in the fragment + * shader with a saturate if floats are being used as boolean values. + */ + add->saturate = true; + } else { + /* Negating the result of the addition gives values on the range + * [-2, 0]. Zero stays zero, and negative values become 1.0. This + * is achieved using SLT. + */ + st_src_reg slt_src = result_src; + slt_src.negate = ~slt_src.negate; + emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); + } + } break; + } case ir_binop_logic_and: - /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */ - emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); + /* If native integers are disabled, the bool args are stored as float 0.0 + * or 1.0, so "mul" gives us "and". If they're enabled, just use the + * actual AND opcode. + */ + if (native_integers) + emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]); + else + emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); break; case ir_binop_dot: @@ -1447,37 +1726,63 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); break; case ir_unop_i2f: - case ir_unop_b2f: - if (glsl_version >= 130) { + if (native_integers) { emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]); break; } - case ir_unop_b2i: - /* Booleans are stored as integers (or floats in GLSL 1.20 and lower). */ + /* fallthrough to next case otherwise */ + case ir_unop_b2f: + if (native_integers) { + emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0)); + break; + } + /* fallthrough to next case otherwise */ + case ir_unop_i2u: + case ir_unop_u2i: + /* Converting between signed and unsigned integers is a no-op. */ result_src = op[0]; break; + case ir_unop_b2i: + if (native_integers) { + /* Booleans are stored as integers using ~0 for true and 0 for false. + * GLSL requires that int(bool) return 1 for true and 0 for false. + * This conversion is done with AND, but it could be done with NEG. + */ + emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1)); + } else { + /* Booleans and integers are both stored as floats when native + * integers are disabled. + */ + result_src = op[0]; + } + break; case ir_unop_f2i: - if (glsl_version >= 130) + if (native_integers) emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]); else emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); break; case ir_unop_f2b: + emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); + break; case ir_unop_i2b: - emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], - st_src_reg_for_type(result_dst.type, 0)); + if (native_integers) + emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]); + else + emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); break; case ir_unop_trunc: emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); break; case ir_unop_ceil: - op[0].negate = ~op[0].negate; - emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]); - result_src.negate = ~result_src.negate; + emit(ir, TGSI_OPCODE_CEIL, result_dst, op[0]); break; case ir_unop_floor: emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]); break; + case ir_unop_round_even: + emit(ir, TGSI_OPCODE_ROUND, result_dst, op[0]); + break; case ir_unop_fract: emit(ir, TGSI_OPCODE_FRC, result_dst, op[0]); break; @@ -1493,41 +1798,41 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) break; case ir_unop_bit_not: - if (glsl_version >= 130) { + if (native_integers) { emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]); break; } case ir_unop_u2f: - if (glsl_version >= 130) { + if (native_integers) { emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]); break; } case ir_binop_lshift: - if (glsl_version >= 130) { - emit(ir, TGSI_OPCODE_SHL, result_dst, op[0]); + if (native_integers) { + emit(ir, TGSI_OPCODE_SHL, result_dst, op[0], op[1]); break; } case ir_binop_rshift: - if (glsl_version >= 130) { - emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0]); + if (native_integers) { + emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0], op[1]); break; } case ir_binop_bit_and: - if (glsl_version >= 130) { - emit(ir, TGSI_OPCODE_AND, result_dst, op[0]); + if (native_integers) { + emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]); break; } case ir_binop_bit_xor: - if (glsl_version >= 130) { - emit(ir, TGSI_OPCODE_XOR, result_dst, op[0]); + if (native_integers) { + emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]); break; } case ir_binop_bit_or: - if (glsl_version >= 130) { - emit(ir, TGSI_OPCODE_OR, result_dst, op[0]); + if (native_integers) { + emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]); break; } - case ir_unop_round_even: + assert(!"GLSL 1.30 features unsupported"); break; @@ -1613,20 +1918,12 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) entry = new(mem_ctx) variable_storage(var, PROGRAM_INPUT, var->location); - if (this->prog->Target == GL_VERTEX_PROGRAM_ARB && - var->location >= VERT_ATTRIB_GENERIC0) { - _mesa_add_attribute(this->prog->Attributes, - var->name, - _mesa_sizeof_glsl_type(var->type->gl_type), - var->type->gl_type, - var->location - VERT_ATTRIB_GENERIC0); - } break; case ir_var_out: assert(var->location != -1); entry = new(mem_ctx) variable_storage(var, PROGRAM_OUTPUT, - var->location); + var->location + var->index); break; case ir_var_system_value: entry = new(mem_ctx) variable_storage(var, @@ -1650,7 +1947,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) } this->result = st_src_reg(entry->file, entry->index, var->type); - if (glsl_version <= 120) + if (!native_integers) this->result.type = GLSL_TYPE_FLOAT; } @@ -1669,7 +1966,6 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) if (index) { src.index += index->value.i[0] * element_size; } else { - st_src_reg array_base = this->result; /* Variable index array dereference. It eats the "vec4" of the * base of the array and an index that offsets the TGSI register * index. @@ -1681,10 +1977,24 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) if (element_size == 1) { index_reg = this->result; } else { - index_reg = get_temp(glsl_type::float_type); + index_reg = get_temp(native_integers ? + glsl_type::int_type : glsl_type::float_type); emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg), - this->result, st_src_reg_for_float(element_size)); + this->result, st_src_reg_for_type(index_reg.type, element_size)); + } + + /* If there was already a relative address register involved, add the + * new and the old together to get the new offset. + */ + if (src.reladdr != NULL) { + st_src_reg accum_reg = get_temp(native_integers ? + glsl_type::int_type : glsl_type::float_type); + + emit(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg), + index_reg, *src.reladdr); + + index_reg = accum_reg; } src.reladdr = ralloc(mem_ctx, st_src_reg); @@ -1900,12 +2210,25 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) for (i = 0; i < type_size(ir->lhs->type); i++) { st_src_reg l_src = st_src_reg(l); + st_src_reg condition_temp = condition; l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements); + if (native_integers) { + /* This is necessary because TGSI's CMP instruction expects the + * condition to be a float, and we store booleans as integers. + * If TGSI had a UCMP instruction or similar, this extra + * instruction would not be necessary. + */ + condition_temp = get_temp(glsl_type::vec4_type); + condition.negate = 0; + emit(ir, TGSI_OPCODE_I2F, st_dst_reg(condition_temp), condition); + condition_temp.swizzle = condition.swizzle; + } + if (switch_order) { - emit(ir, TGSI_OPCODE_CMP, l, condition, l_src, r); + emit(ir, TGSI_OPCODE_CMP, l, condition_temp, l_src, r); } else { - emit(ir, TGSI_OPCODE_CMP, l, condition, r, l_src); + emit(ir, TGSI_OPCODE_CMP, l, condition_temp, r, l_src); } l.index++; @@ -1914,15 +2237,18 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) } else if (ir->rhs->as_expression() && this->instructions.get_tail() && ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir && - type_size(ir->lhs->type) == 1) { + type_size(ir->lhs->type) == 1 && + l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst.writemask) { /* To avoid emitting an extra MOV when assigning an expression to a * variable, emit the last instruction of the expression again, but * replace the destination register with the target of the assignment. * Dead code elimination will remove the original instruction. */ - glsl_to_tgsi_instruction *inst; + glsl_to_tgsi_instruction *inst, *new_inst; inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); - emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]); + new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]); + new_inst->saturate = inst->saturate; + inst->dead_mask = inst->dst.writemask; } else { for (i = 0; i < type_size(ir->lhs->type); i++) { emit(ir, TGSI_OPCODE_MOV, l, r); @@ -1941,12 +2267,8 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) gl_constant_value *values = (gl_constant_value *) stack_vals; GLenum gl_type = GL_NONE; unsigned int i; - gl_register_file file; - gl_program_parameter_list *param_list; static int in_array = 0; - - file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE; - param_list = in_array ? this->prog->Parameters : this->immediates; + gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE; /* Unfortunately, 4 floats is all we can get into * _mesa_add_typed_unnamed_constant. So, make a temp to store an @@ -2009,11 +2331,11 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements]; src = st_src_reg(file, -1, ir->type->base_type); - src.index = _mesa_add_typed_unnamed_constant(param_list, - values, - ir->type->vector_elements, - GL_FLOAT, - &src.swizzle); + src.index = add_constant(file, + values, + ir->type->vector_elements, + GL_FLOAT, + &src.swizzle); emit(ir, TGSI_OPCODE_MOV, mat_column, src); mat_column.index++; @@ -2031,28 +2353,28 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) } break; case GLSL_TYPE_UINT: - gl_type = glsl_version >= 130 ? GL_UNSIGNED_INT : GL_FLOAT; + gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT; for (i = 0; i < ir->type->vector_elements; i++) { - if (glsl_version >= 130) + if (native_integers) values[i].u = ir->value.u[i]; else values[i].f = ir->value.u[i]; } break; case GLSL_TYPE_INT: - gl_type = glsl_version >= 130 ? GL_INT : GL_FLOAT; + gl_type = native_integers ? GL_INT : GL_FLOAT; for (i = 0; i < ir->type->vector_elements; i++) { - if (glsl_version >= 130) + if (native_integers) values[i].i = ir->value.i[i]; else values[i].f = ir->value.i[i]; } break; case GLSL_TYPE_BOOL: - gl_type = glsl_version >= 130 ? GL_BOOL : GL_FLOAT; + gl_type = native_integers ? GL_BOOL : GL_FLOAT; for (i = 0; i < ir->type->vector_elements; i++) { - if (glsl_version >= 130) - values[i].b = ir->value.b[i]; + if (native_integers) + values[i].u = ir->value.b[i] ? ~0 : 0; else values[i].f = ir->value.b[i]; } @@ -2062,9 +2384,11 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) } this->result = st_src_reg(file, -1, ir->type); - this->result.index = _mesa_add_typed_unnamed_constant(param_list, - values, ir->type->vector_elements, gl_type, - &this->result.swizzle); + this->result.index = add_constant(file, + values, + ir->type->vector_elements, + gl_type, + &this->result.swizzle); } function_entry * @@ -2113,7 +2437,7 @@ void glsl_to_tgsi_visitor::visit(ir_call *ir) { glsl_to_tgsi_instruction *call_inst; - ir_function_signature *sig = ir->get_callee(); + ir_function_signature *sig = ir->callee; function_entry *entry = get_function_signature(sig); int i; @@ -2192,21 +2516,23 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) void glsl_to_tgsi_visitor::visit(ir_texture *ir) { - st_src_reg result_src, coord, lod_info, projector, dx, dy; + st_src_reg result_src, coord, lod_info, projector, dx, dy, offset; st_dst_reg result_dst, coord_dst; glsl_to_tgsi_instruction *inst = NULL; unsigned opcode = TGSI_OPCODE_NOP; - ir->coordinate->accept(this); + if (ir->coordinate) { + ir->coordinate->accept(this); - /* Put our coords in a temp. We'll need to modify them for shadow, - * projection, or LOD, so the only case we'd use it as is is if - * we're doing plain old texturing. The optimization passes on - * glsl_to_tgsi_visitor should handle cleaning up our mess in that case. - */ - coord = get_temp(glsl_type::vec4_type); - coord_dst = st_dst_reg(coord); - emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); + /* Put our coords in a temp. We'll need to modify them for shadow, + * projection, or LOD, so the only case we'd use it as is is if + * we're doing plain old texturing. The optimization passes on + * glsl_to_tgsi_visitor should handle cleaning up our mess in that case. + */ + coord = get_temp(glsl_type::vec4_type); + coord_dst = st_dst_reg(coord); + emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); + } if (ir->projector) { ir->projector->accept(this); @@ -2240,11 +2566,24 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) ir->lod_info.grad.dPdy->accept(this); dy = this->result; break; - case ir_txf: /* TODO: use TGSI_OPCODE_TXF here */ - assert(!"GLSL 1.30 features unsupported"); + case ir_txs: + opcode = TGSI_OPCODE_TXQ; + ir->lod_info.lod->accept(this); + lod_info = this->result; + break; + case ir_txf: + opcode = TGSI_OPCODE_TXF; + ir->lod_info.lod->accept(this); + lod_info = this->result; + if (ir->offset) { + ir->offset->accept(this); + offset = this->result; + } break; } + const glsl_type *sampler_type = ir->sampler->type; + if (ir->projector) { if (opcode == TGSI_OPCODE_TEX) { /* Slot the projector in as the last component of the coord. */ @@ -2276,6 +2615,9 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) tmp_src = get_temp(glsl_type::vec4_type); st_dst_reg tmp_dst = st_dst_reg(tmp_src); + /* Projective division not allowed for array samplers. */ + assert(!sampler_type->sampler_array); + tmp_dst.writemask = WRITEMASK_Z; emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result); @@ -2300,12 +2642,22 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) * coord. */ ir->shadow_comparitor->accept(this); - coord_dst.writemask = WRITEMASK_Z; + + /* XXX This will need to be updated for cubemap array samplers. */ + if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D && + sampler_type->sampler_array) || + sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) { + coord_dst.writemask = WRITEMASK_W; + } else { + coord_dst.writemask = WRITEMASK_Z; + } + emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); coord_dst.writemask = WRITEMASK_XYZW; } - if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB) { + if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB || + opcode == TGSI_OPCODE_TXF) { /* TGSI stores LOD or LOD bias in the last channel of the coords. */ coord_dst.writemask = WRITEMASK_W; emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info); @@ -2314,7 +2666,11 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) if (opcode == TGSI_OPCODE_TXD) inst = emit(ir, opcode, result_dst, coord, dx, dy); - else + else if (opcode == TGSI_OPCODE_TXQ) + inst = emit(ir, opcode, result_dst, lod_info); + else if (opcode == TGSI_OPCODE_TXF) { + inst = emit(ir, opcode, result_dst, coord); + } else inst = emit(ir, opcode, result_dst, coord); if (ir->shadow_comparitor) @@ -2324,7 +2680,14 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) this->shader_program, this->prog); - const glsl_type *sampler_type = ir->sampler->type; + if (ir->offset) { + inst->tex_offset_num_offset = 1; + inst->tex_offsets[0].Index = offset.index; + inst->tex_offsets[0].File = offset.file; + inst->tex_offsets[0].SwizzleX = GET_SWZ(offset.swizzle, 0); + inst->tex_offsets[0].SwizzleY = GET_SWZ(offset.swizzle, 1); + inst->tex_offsets[0].SwizzleZ = GET_SWZ(offset.swizzle, 2); + } switch (sampler_type->sampler_dimensionality) { case GLSL_SAMPLER_DIM_1D: @@ -2347,6 +2710,9 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) case GLSL_SAMPLER_DIM_BUF: assert(!"FINISHME: Implement ARB_texture_buffer_object"); break; + case GLSL_SAMPLER_DIM_EXTERNAL: + inst->tex_target = TEXTURE_EXTERNAL_INDEX; + break; default: assert(!"Should not get here."); } @@ -2397,7 +2763,7 @@ glsl_to_tgsi_visitor::visit(ir_discard *ir) void glsl_to_tgsi_visitor::visit(ir_if *ir) { - glsl_to_tgsi_instruction *cond_inst, *if_inst, *else_inst = NULL; + glsl_to_tgsi_instruction *cond_inst, *if_inst; glsl_to_tgsi_instruction *prev_inst; prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); @@ -2429,7 +2795,7 @@ glsl_to_tgsi_visitor::visit(ir_if *ir) visit_exec_list(&ir->then_instructions, this); if (!ir->else_instructions.is_empty()) { - else_inst = emit(ir->condition, TGSI_OPCODE_ELSE); + emit(ir->condition, TGSI_OPCODE_ELSE); visit_exec_list(&ir->else_instructions, this); } @@ -2441,17 +2807,20 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() result.file = PROGRAM_UNDEFINED; next_temp = 1; next_signature_id = 1; + num_immediates = 0; current_function = NULL; num_address_regs = 0; indirect_addr_temps = false; indirect_addr_consts = false; - immediates = _mesa_new_parameter_list(); mem_ctx = ralloc_context(NULL); + ctx = NULL; + prog = NULL; + shader_program = NULL; + options = NULL; } glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor() { - _mesa_free_parameter_list(immediates); ralloc_free(mem_ctx); } @@ -2476,8 +2845,6 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) if (is_tex_instruction(inst->op)) { v->samplers_used |= 1 << inst->sampler; - prog->SamplerTargets[inst->sampler] = - (gl_texture_index)inst->tex_target; if (inst->tex_shadow) { prog->ShadowSamplers |= 1 << inst->sampler; } @@ -2485,172 +2852,9 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) } prog->SamplersUsed = v->samplers_used; - _mesa_update_shader_textures_used(prog); -} - -/** - * Check if the given vertex/fragment/shader program is within the - * resource limits of the context (number of texture units, etc). - * If any of those checks fail, record a linker error. - * - * XXX more checks are needed... - */ -static void -check_resources(const struct gl_context *ctx, - struct gl_shader_program *shader_program, - glsl_to_tgsi_visitor *prog, - struct gl_program *proginfo) -{ - switch (proginfo->Target) { - case GL_VERTEX_PROGRAM_ARB: - if (_mesa_bitcount(prog->samplers_used) > - ctx->Const.MaxVertexTextureImageUnits) { - fail_link(shader_program, "Too many vertex shader texture samplers"); - } - if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) { - fail_link(shader_program, "Too many vertex shader constants"); - } - break; - case MESA_GEOMETRY_PROGRAM: - if (_mesa_bitcount(prog->samplers_used) > - ctx->Const.MaxGeometryTextureImageUnits) { - fail_link(shader_program, "Too many geometry shader texture samplers"); - } - if (proginfo->Parameters->NumParameters > - MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) { - fail_link(shader_program, "Too many geometry shader constants"); - } - break; - case GL_FRAGMENT_PROGRAM_ARB: - if (_mesa_bitcount(prog->samplers_used) > - ctx->Const.MaxTextureImageUnits) { - fail_link(shader_program, "Too many fragment shader texture samplers"); - } - if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) { - fail_link(shader_program, "Too many fragment shader constants"); - } - break; - default: - _mesa_problem(ctx, "unexpected program type in check_resources()"); - } -} - - - -struct uniform_sort { - struct gl_uniform *u; - int pos; -}; - -/* The shader_program->Uniforms list is almost sorted in increasing - * uniform->{Frag,Vert}Pos locations, but not quite when there are - * uniforms shared between targets. We need to add parameters in - * increasing order for the targets. - */ -static int -sort_uniforms(const void *a, const void *b) -{ - struct uniform_sort *u1 = (struct uniform_sort *)a; - struct uniform_sort *u2 = (struct uniform_sort *)b; - - return u1->pos - u2->pos; -} - -/* Add the uniforms to the parameters. The linker chose locations - * in our parameters lists (which weren't created yet), which the - * uniforms code will use to poke values into our parameters list - * when uniforms are updated. - */ -static void -add_uniforms_to_parameters_list(struct gl_shader_program *shader_program, - struct gl_shader *shader, - struct gl_program *prog) -{ - unsigned int i; - unsigned int next_sampler = 0, num_uniforms = 0; - struct uniform_sort *sorted_uniforms; - - sorted_uniforms = ralloc_array(NULL, struct uniform_sort, - shader_program->Uniforms->NumUniforms); - - for (i = 0; i < shader_program->Uniforms->NumUniforms; i++) { - struct gl_uniform *uniform = shader_program->Uniforms->Uniforms + i; - int parameter_index = -1; - - switch (shader->Type) { - case GL_VERTEX_SHADER: - parameter_index = uniform->VertPos; - break; - case GL_FRAGMENT_SHADER: - parameter_index = uniform->FragPos; - break; - case GL_GEOMETRY_SHADER: - parameter_index = uniform->GeomPos; - break; - } - - /* Only add uniforms used in our target. */ - if (parameter_index != -1) { - sorted_uniforms[num_uniforms].pos = parameter_index; - sorted_uniforms[num_uniforms].u = uniform; - num_uniforms++; - } - } - - qsort(sorted_uniforms, num_uniforms, sizeof(struct uniform_sort), - sort_uniforms); - - for (i = 0; i < num_uniforms; i++) { - struct gl_uniform *uniform = sorted_uniforms[i].u; - int parameter_index = sorted_uniforms[i].pos; - const glsl_type *type = uniform->Type; - unsigned int size; - - if (type->is_vector() || - type->is_scalar()) { - size = type->vector_elements; - } else { - size = type_size(type) * 4; - } - - gl_register_file file; - if (type->is_sampler() || - (type->is_array() && type->fields.array->is_sampler())) { - file = PROGRAM_SAMPLER; - } else { - file = PROGRAM_UNIFORM; - } - - GLint index = _mesa_lookup_parameter_index(prog->Parameters, -1, - uniform->Name); - - if (index < 0) { - index = _mesa_add_parameter(prog->Parameters, file, - uniform->Name, size, type->gl_type, - NULL, NULL, 0x0); - - /* Sampler uniform values are stored in prog->SamplerUnits, - * and the entry in that array is selected by this index we - * store in ParameterValues[]. - */ - if (file == PROGRAM_SAMPLER) { - for (unsigned int j = 0; j < size / 4; j++) - prog->Parameters->ParameterValues[index + j][0].f = next_sampler++; - } - - /* The location chosen in the Parameters list here (returned - * from _mesa_add_uniform) has to match what the linker chose. - */ - if (index != parameter_index) { - fail_link(shader_program, "Allocation of uniform `%s' to target " - "failed (%d vs %d)\n", - uniform->Name, index, parameter_index); - } - } - } - - ralloc_free(sorted_uniforms); + if (v->shader_program != NULL) + _mesa_update_shader_textures_used(v->shader_program, prog); } static void @@ -2714,119 +2918,12 @@ set_uniform_initializer(struct gl_context *ctx, void *mem_ctx, element_type->matrix_columns, element_type->vector_elements, loc, 1, GL_FALSE, (GLfloat *)values); - loc += element_type->matrix_columns; } else { _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns, values, element_type->gl_type); - loc += type_size(element_type); } - } -} -static void -set_uniform_initializers(struct gl_context *ctx, - struct gl_shader_program *shader_program) -{ - void *mem_ctx = NULL; - - for (unsigned int i = 0; i < MESA_SHADER_TYPES; i++) { - struct gl_shader *shader = shader_program->_LinkedShaders[i]; - - if (shader == NULL) - continue; - - foreach_iter(exec_list_iterator, iter, *shader->ir) { - ir_instruction *ir = (ir_instruction *)iter.get(); - ir_variable *var = ir->as_variable(); - - if (!var || var->mode != ir_var_uniform || !var->constant_value) - continue; - - if (!mem_ctx) - mem_ctx = ralloc_context(NULL); - - set_uniform_initializer(ctx, mem_ctx, shader_program, var->name, - var->type, var->constant_value); - } - } - - ralloc_free(mem_ctx); -} - -/* - * Scan/rewrite program to remove reads of custom (output) registers. - * The passed type has to be either PROGRAM_OUTPUT or PROGRAM_VARYING - * (for vertex shaders). - * In GLSL shaders, varying vars can be read and written. - * On some hardware, trying to read an output register causes trouble. - * So, rewrite the program to use a temporary register in this case. - * - * Based on _mesa_remove_output_reads from programopt.c. - */ -void -glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) -{ - GLuint i; - GLint outputMap[VERT_RESULT_MAX]; - GLint outputTypes[VERT_RESULT_MAX]; - GLuint numVaryingReads = 0; - GLboolean usedTemps[MAX_TEMPS]; - GLuint firstTemp = 0; - - _mesa_find_used_registers(prog, PROGRAM_TEMPORARY, - usedTemps, MAX_TEMPS); - - assert(type == PROGRAM_VARYING || type == PROGRAM_OUTPUT); - assert(prog->Target == GL_VERTEX_PROGRAM_ARB || type != PROGRAM_VARYING); - - for (i = 0; i < VERT_RESULT_MAX; i++) - outputMap[i] = -1; - - /* look for instructions which read from varying vars */ - foreach_iter(exec_list_iterator, iter, this->instructions) { - glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); - const GLuint numSrc = num_inst_src_regs(inst->op); - GLuint j; - for (j = 0; j < numSrc; j++) { - if (inst->src[j].file == type) { - /* replace the read with a temp reg */ - const GLuint var = inst->src[j].index; - if (outputMap[var] == -1) { - numVaryingReads++; - outputMap[var] = _mesa_find_free_register(usedTemps, - MAX_TEMPS, - firstTemp); - outputTypes[var] = inst->src[j].type; - firstTemp = outputMap[var] + 1; - } - inst->src[j].file = PROGRAM_TEMPORARY; - inst->src[j].index = outputMap[var]; - } - } - } - - if (numVaryingReads == 0) - return; /* nothing to be done */ - - /* look for instructions which write to the varying vars identified above */ - foreach_iter(exec_list_iterator, iter, this->instructions) { - glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); - if (inst->dst.file == type && outputMap[inst->dst.index] >= 0) { - /* change inst to write to the temp reg, instead of the varying */ - inst->dst.file = PROGRAM_TEMPORARY; - inst->dst.index = outputMap[inst->dst.index]; - } - } - - /* insert new MOV instructions at the end */ - for (i = 0; i < VERT_RESULT_MAX; i++) { - if (outputMap[i] >= 0) { - /* MOV VAR[i], TEMP[tmp]; */ - st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i], outputTypes[i]); - st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW, outputTypes[i]); - dst.index = i; - this->emit(NULL, TGSI_OPCODE_MOV, dst, src); - } + loc++; } } @@ -2876,10 +2973,14 @@ get_src_arg_mask(st_dst_reg dst, st_src_reg src) void glsl_to_tgsi_visitor::simplify_cmp(void) { - unsigned tempWrites[MAX_TEMPS]; + unsigned *tempWrites; unsigned outputWrites[MAX_PROGRAM_OUTPUTS]; - memset(tempWrites, 0, sizeof(tempWrites)); + tempWrites = new unsigned[MAX_TEMPS]; + if (!tempWrites) { + return; + } + memset(tempWrites, 0, sizeof(unsigned) * MAX_TEMPS); memset(outputWrites, 0, sizeof(outputWrites)); foreach_iter(exec_list_iterator, iter, this->instructions) { @@ -2894,7 +2995,7 @@ glsl_to_tgsi_visitor::simplify_cmp(void) inst->op == TGSI_OPCODE_END || inst->op == TGSI_OPCODE_ENDSUB || inst->op == TGSI_OPCODE_RET) { - return; + break; } if (inst->dst.file == PROGRAM_OUTPUT) { @@ -2919,6 +3020,8 @@ glsl_to_tgsi_visitor::simplify_cmp(void) inst->src[0] = inst->src[1]; } } + + delete [] tempWrites; } /* Replaces all references to a temporary register index with another index. */ @@ -3326,34 +3429,37 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) switch (inst->op) { case TGSI_OPCODE_BGNLOOP: case TGSI_OPCODE_ENDLOOP: + case TGSI_OPCODE_CONT: + case TGSI_OPCODE_BRK: /* End of a basic block, clear the write array entirely. - * FIXME: This keeps us from killing dead code when the writes are + * + * This keeps us from killing dead code when the writes are * on either side of a loop, even when the register isn't touched - * inside the loop. + * inside the loop. However, glsl_to_tgsi_visitor doesn't seem to emit + * dead code of this type, so it shouldn't make a difference as long as + * the dead code elimination pass in the GLSL compiler does its job. */ memset(writes, 0, sizeof(*writes) * this->next_temp * 4); break; case TGSI_OPCODE_ENDIF: - --level; - break; - case TGSI_OPCODE_ELSE: - /* Clear all channels written inside the preceding if block from the - * write array, but leave those that were not touched. - * - * FIXME: This destroys opportunities to remove dead code inside of - * IF blocks that are followed by an ELSE block. + /* Promote the recorded level of all channels written inside the + * preceding if or else block to the level above the if/else block. */ for (int r = 0; r < this->next_temp; r++) { for (int c = 0; c < 4; c++) { if (!writes[4 * r + c]) continue; - if (write_level[4 * r + c] >= level) - writes[4 * r + c] = NULL; + if (write_level[4 * r + c] == level) + write_level[4 * r + c] = level-1; } } + + if(inst->op == TGSI_OPCODE_ENDIF) + --level; + break; case TGSI_OPCODE_IF: @@ -3364,7 +3470,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) /* Continuing the block, clear any channels from the write array that * are read by this instruction. */ - for (int i = 0; i < 4; i++) { + for (unsigned i = 0; i < Elements(inst->src); i++) { if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){ /* Any temporary might be read, so no dead code elimination * across this instruction. @@ -3426,7 +3532,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) if (!inst->dead_mask || !inst->dst.writemask) continue; - else if (inst->dead_mask == inst->dst.writemask) { + else if ((inst->dst.writemask & ~inst->dead_mask) == 0) { iter.remove(); delete inst; removed++; @@ -3531,15 +3637,17 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */ v->ctx = original->ctx; v->prog = prog; + v->shader_program = NULL; v->glsl_version = original->glsl_version; + v->native_integers = original->native_integers; v->options = original->options; v->next_temp = original->next_temp; v->num_address_regs = original->num_address_regs; v->samplers_used = prog->SamplersUsed = original->samplers_used; v->indirect_addr_temps = original->indirect_addr_temps; v->indirect_addr_consts = original->indirect_addr_consts; - _mesa_free_parameter_list(v->immediates); - v->immediates = _mesa_clone_parameter_list(original->immediates); + memcpy(&v->immediates, &original->immediates, sizeof(v->immediates)); + v->num_immediates = original->num_immediates; /* * Get initial pixel color from the texture. @@ -3552,7 +3660,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, inst->sampler = 0; inst->tex_target = TEXTURE_2D_INDEX; - prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0); + prog->InputsRead |= FRAG_BIT_TEX0; prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */ v->samplers_used |= (1 << 0); @@ -3609,6 +3717,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, * new visitor. */ foreach_iter(exec_list_iterator, iter, original->instructions) { glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + glsl_to_tgsi_instruction *newinst; st_src_reg src_regs[3]; if (inst->dst.file == PROGRAM_OUTPUT) @@ -3623,17 +3732,16 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, src_regs[i].index = src0.index; } else if (src_regs[i].file == PROGRAM_INPUT) - prog->InputsRead |= (1 << src_regs[i].index); + prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index); } - v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); + newinst = v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); + newinst->tex_target = inst->tex_target; } /* Make modifications to fragment program info. */ prog->Parameters = _mesa_combine_parameter_lists(params, original->prog->Parameters); - prog->Attributes = _mesa_clone_parameter_list(original->prog->Attributes); - prog->Varying = _mesa_clone_parameter_list(original->prog->Varying); _mesa_free_parameter_list(params); count_resources(v, prog); fp->glsl_to_tgsi = v; @@ -3660,15 +3768,17 @@ get_bitmap_visitor(struct st_fragment_program *fp, /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */ v->ctx = original->ctx; v->prog = prog; + v->shader_program = NULL; v->glsl_version = original->glsl_version; + v->native_integers = original->native_integers; v->options = original->options; v->next_temp = original->next_temp; v->num_address_regs = original->num_address_regs; v->samplers_used = prog->SamplersUsed = original->samplers_used; v->indirect_addr_temps = original->indirect_addr_temps; v->indirect_addr_consts = original->indirect_addr_consts; - _mesa_free_parameter_list(v->immediates); - v->immediates = _mesa_clone_parameter_list(original->immediates); + memcpy(&v->immediates, &original->immediates, sizeof(v->immediates)); + v->num_immediates = original->num_immediates; /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */ coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); @@ -3678,7 +3788,7 @@ get_bitmap_visitor(struct st_fragment_program *fp, inst->sampler = samplerIndex; inst->tex_target = TEXTURE_2D_INDEX; - prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0); + prog->InputsRead |= FRAG_BIT_TEX0; prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */ v->samplers_used |= (1 << samplerIndex); @@ -3692,6 +3802,7 @@ get_bitmap_visitor(struct st_fragment_program *fp, * new visitor. */ foreach_iter(exec_list_iterator, iter, original->instructions) { glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + glsl_to_tgsi_instruction *newinst; st_src_reg src_regs[3]; if (inst->dst.file == PROGRAM_OUTPUT) @@ -3700,16 +3811,15 @@ get_bitmap_visitor(struct st_fragment_program *fp, for (int i=0; i<3; i++) { src_regs[i] = inst->src[i]; if (src_regs[i].file == PROGRAM_INPUT) - prog->InputsRead |= (1 << src_regs[i].index); + prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index); } - v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); + newinst = v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); + newinst->tex_target = inst->tex_target; } /* Make modifications to fragment program info. */ prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters); - prog->Attributes = _mesa_clone_parameter_list(original->prog->Attributes); - prog->Varying = _mesa_clone_parameter_list(original->prog->Varying); count_resources(v, prog); fp->glsl_to_tgsi = v; } @@ -3735,12 +3845,6 @@ struct st_translate { struct ureg_src samplers[PIPE_MAX_SAMPLERS]; struct ureg_src systemValues[SYSTEM_VALUE_MAX]; - /* Extra info for handling point size clamping in vertex shader */ - struct ureg_dst pointSizeResult; /**< Actual point size output register */ - struct ureg_src pointSizeConst; /**< Point size range constant register */ - GLint pointSizeOutIndex; /**< Temp point size output register */ - GLboolean prevInstWrotePointSize; - const GLuint *inputMapping; const GLuint *outputMapping; @@ -3768,6 +3872,7 @@ struct st_translate { /** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */ static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = { TGSI_SEMANTIC_FACE, + TGSI_SEMANTIC_VERTEXID, TGSI_SEMANTIC_INSTANCEID }; @@ -3822,32 +3927,20 @@ static void set_insn_start(struct st_translate *t, unsigned start) */ static struct ureg_src emit_immediate(struct st_translate *t, - struct gl_program_parameter_list *params, - int index) + gl_constant_value values[4], + int type, int size) { struct ureg_program *ureg = t->ureg; - switch(params->Parameters[index].DataType) + switch(type) { case GL_FLOAT: - case GL_FLOAT_VEC2: - case GL_FLOAT_VEC3: - case GL_FLOAT_VEC4: - return ureg_DECL_immediate(ureg, (float *)params->ParameterValues[index], 4); + return ureg_DECL_immediate(ureg, &values[0].f, size); case GL_INT: - case GL_INT_VEC2: - case GL_INT_VEC3: - case GL_INT_VEC4: - return ureg_DECL_immediate_int(ureg, (int *)params->ParameterValues[index], 4); + return ureg_DECL_immediate_int(ureg, &values[0].i, size); case GL_UNSIGNED_INT: - case GL_UNSIGNED_INT_VEC2: - case GL_UNSIGNED_INT_VEC3: - case GL_UNSIGNED_INT_VEC4: case GL_BOOL: - case GL_BOOL_VEC2: - case GL_BOOL_VEC3: - case GL_BOOL_VEC4: - return ureg_DECL_immediate_uint(ureg, (unsigned *)params->ParameterValues[index], 4); + return ureg_DECL_immediate_uint(ureg, &values[0].u, size); default: assert(!"should not get here - type must be float, int, uint, or bool"); return ureg_src_undef(); @@ -3868,14 +3961,11 @@ dst_register(struct st_translate *t, case PROGRAM_TEMPORARY: if (ureg_dst_is_undef(t->temps[index])) - t->temps[index] = ureg_DECL_temporary(t->ureg); + t->temps[index] = ureg_DECL_local_temporary(t->ureg); return t->temps[index]; case PROGRAM_OUTPUT: - if (t->procType == TGSI_PROCESSOR_VERTEX && index == VERT_RESULT_PSIZ) - t->prevInstWrotePointSize = GL_TRUE; - if (t->procType == TGSI_PROCESSOR_VERTEX) assert(index < VERT_RESULT_MAX); else if (t->procType == TGSI_PROCESSOR_FRAGMENT) @@ -3912,7 +4002,7 @@ src_register(struct st_translate *t, assert(index >= 0); assert(index < Elements(t->temps)); if (ureg_dst_is_undef(t->temps[index])) - t->temps[index] = ureg_DECL_temporary(t->ureg); + t->temps[index] = ureg_DECL_local_temporary(t->ureg); return ureg_src(t->temps[index]); case PROGRAM_NAMED_PARAM: @@ -3958,7 +4048,7 @@ src_register(struct st_translate *t, static struct ureg_dst translate_dst(struct st_translate *t, const st_dst_reg *dst_reg, - bool saturate) + bool saturate, bool clamp_color) { struct ureg_dst dst = dst_register(t, dst_reg->file, @@ -3968,6 +4058,27 @@ translate_dst(struct st_translate *t, if (saturate) dst = ureg_saturate(dst); + else if (clamp_color && dst_reg->file == PROGRAM_OUTPUT) { + /* Clamp colors for ARB_color_buffer_float. */ + switch (t->procType) { + case TGSI_PROCESSOR_VERTEX: + /* XXX if the geometry shader is present, this must be done there + * instead of here. */ + if (dst_reg->index == VERT_RESULT_COL0 || + dst_reg->index == VERT_RESULT_COL1 || + dst_reg->index == VERT_RESULT_BFC0 || + dst_reg->index == VERT_RESULT_BFC1) { + dst = ureg_saturate(dst); + } + break; + + case TGSI_PROCESSOR_FRAGMENT: + if (dst_reg->index >= FRAG_RESULT_COLOR) { + dst = ureg_saturate(dst); + } + break; + } + } if (dst_reg->reladdr != NULL) dst = ureg_dst_indirect(dst, ureg_src(t->address[0])); @@ -4018,14 +4129,34 @@ translate_src(struct st_translate *t, const st_src_reg *src_reg) return src; } +static struct tgsi_texture_offset +translate_tex_offset(struct st_translate *t, + const struct tgsi_texture_offset *in_offset) +{ + struct tgsi_texture_offset offset; + + assert(in_offset->File == PROGRAM_IMMEDIATE); + + offset.File = TGSI_FILE_IMMEDIATE; + offset.Index = in_offset->Index; + offset.SwizzleX = in_offset->SwizzleX; + offset.SwizzleY = in_offset->SwizzleY; + offset.SwizzleZ = in_offset->SwizzleZ; + + return offset; +} + static void compile_tgsi_instruction(struct st_translate *t, - const struct glsl_to_tgsi_instruction *inst) + const glsl_to_tgsi_instruction *inst, + bool clamp_dst_color_output) { struct ureg_program *ureg = t->ureg; GLuint i; struct ureg_dst dst[1]; struct ureg_src src[4]; + struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET]; + unsigned num_dst; unsigned num_src; @@ -4035,7 +4166,8 @@ compile_tgsi_instruction(struct st_translate *t, if (num_dst) dst[0] = translate_dst(t, &inst->dst, - inst->saturate); + inst->saturate, + clamp_dst_color_output); for (i = 0; i < num_src; i++) src[i] = translate_src(t, &inst->src[i]); @@ -4059,11 +4191,17 @@ compile_tgsi_instruction(struct st_translate *t, case TGSI_OPCODE_TXD: case TGSI_OPCODE_TXL: case TGSI_OPCODE_TXP: + case TGSI_OPCODE_TXQ: + case TGSI_OPCODE_TXF: src[num_src++] = t->samplers[inst->sampler]; + for (i = 0; i < inst->tex_offset_num_offset; i++) { + texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i]); + } ureg_tex_insn(ureg, inst->op, dst, num_dst, - translate_texture_target(inst->tex_target, inst->tex_shadow), + st_translate_texture_target(inst->tex_target, inst->tex_shadow), + texoffsets, inst->tex_offset_num_offset, src, num_src); return; @@ -4082,37 +4220,15 @@ compile_tgsi_instruction(struct st_translate *t, } /** - * Emit the TGSI instructions to adjust the WPOS pixel center convention - * Basically, add (adjX, adjY) to the fragment position. - */ -static void -emit_adjusted_wpos(struct st_translate *t, - const struct gl_program *program, - float adjX, float adjY) -{ - struct ureg_program *ureg = t->ureg; - struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg); - struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; - - /* Note that we bias X and Y and pass Z and W through unchanged. - * The shader might also use gl_FragCoord.w and .z. - */ - ureg_ADD(ureg, wpos_temp, wpos_input, - ureg_imm4f(ureg, adjX, adjY, 0.0f, 0.0f)); - - t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); -} - - -/** - * Emit the TGSI instructions for inverting the WPOS y coordinate. + * Emit the TGSI instructions for inverting and adjusting WPOS. * This code is unavoidable because it also depends on whether * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM). */ static void -emit_wpos_inversion(struct st_translate *t, - const struct gl_program *program, - bool invert) +emit_wpos_adjustment( struct st_translate *t, + const struct gl_program *program, + boolean invert, + GLfloat adjX, GLfloat adjY[2]) { struct ureg_program *ureg = t->ureg; @@ -4131,35 +4247,55 @@ emit_wpos_inversion(struct st_translate *t, unsigned wposTransConst = _mesa_add_state_reference(program->Parameters, wposTransformState); - struct ureg_src wpostrans = ureg_DECL_constant(ureg, wposTransConst); - struct ureg_dst wpos_temp; + struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst ); + struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg ); struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; - /* MOV wpos_temp, input[wpos] - */ - if (wpos_input.File == TGSI_FILE_TEMPORARY) - wpos_temp = ureg_dst(wpos_input); - else { - wpos_temp = ureg_DECL_temporary(ureg); - ureg_MOV(ureg, wpos_temp, wpos_input); + /* First, apply the coordinate shift: */ + if (adjX || adjY[0] || adjY[1]) { + if (adjY[0] != adjY[1]) { + /* Adjust the y coordinate by adjY[1] or adjY[0] respectively + * depending on whether inversion is actually going to be applied + * or not, which is determined by testing against the inversion + * state variable used below, which will be either +1 or -1. + */ + struct ureg_dst adj_temp = ureg_DECL_local_temporary(ureg); + + ureg_CMP(ureg, adj_temp, + ureg_scalar(wpostrans, invert ? 2 : 0), + ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f), + ureg_imm4f(ureg, adjX, adjY[1], 0.0f, 0.0f)); + ureg_ADD(ureg, wpos_temp, wpos_input, ureg_src(adj_temp)); + } else { + ureg_ADD(ureg, wpos_temp, wpos_input, + ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f)); + } + wpos_input = ureg_src(wpos_temp); + } else { + /* MOV wpos_temp, input[wpos] + */ + ureg_MOV( ureg, wpos_temp, wpos_input ); } + /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be + * inversion/identity, or the other way around if we're drawing to an FBO. + */ if (invert) { /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy */ - ureg_MAD(ureg, - ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y), - wpos_input, - ureg_scalar(wpostrans, 0), - ureg_scalar(wpostrans, 1)); + ureg_MAD( ureg, + ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), + wpos_input, + ureg_scalar(wpostrans, 0), + ureg_scalar(wpostrans, 1)); } else { /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww */ - ureg_MAD(ureg, - ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y), - wpos_input, - ureg_scalar(wpostrans, 2), - ureg_scalar(wpostrans, 3)); + ureg_MAD( ureg, + ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), + wpos_input, + ureg_scalar(wpostrans, 2), + ureg_scalar(wpostrans, 3)); } /* Use wpos_temp as position input from here on: @@ -4180,8 +4316,37 @@ emit_wpos(struct st_context *st, const struct gl_fragment_program *fp = (const struct gl_fragment_program *) program; struct pipe_screen *pscreen = st->pipe->screen; + GLfloat adjX = 0.0f; + GLfloat adjY[2] = { 0.0f, 0.0f }; boolean invert = FALSE; + /* Query the pixel center conventions supported by the pipe driver and set + * adjX, adjY to help out if it cannot handle the requested one internally. + * + * The bias of the y-coordinate depends on whether y-inversion takes place + * (adjY[1]) or not (adjY[0]), which is in turn dependent on whether we are + * drawing to an FBO (causes additional inversion), and whether the the pipe + * driver origin and the requested origin differ (the latter condition is + * stored in the 'invert' variable). + * + * For height = 100 (i = integer, h = half-integer, l = lower, u = upper): + * + * center shift only: + * i -> h: +0.5 + * h -> i: -0.5 + * + * inversion only: + * l,i -> u,i: ( 0.0 + 1.0) * -1 + 100 = 99 + * l,h -> u,h: ( 0.5 + 0.0) * -1 + 100 = 99.5 + * u,i -> l,i: (99.0 + 1.0) * -1 + 100 = 0 + * u,h -> l,h: (99.5 + 0.0) * -1 + 100 = 0.5 + * + * inversion and center shift: + * l,i -> u,h: ( 0.0 + 0.5) * -1 + 100 = 99.5 + * l,h -> u,i: ( 0.5 + 0.5) * -1 + 100 = 99 + * u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5 + * u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0 + */ if (fp->OriginUpperLeft) { /* Fragment shader wants origin in upper-left */ if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) { @@ -4209,12 +4374,17 @@ emit_wpos(struct st_context *st, if (fp->PixelCenterInteger) { /* Fragment shader wants pixel center integer */ - if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) + if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { /* the driver supports pixel center integer */ + adjY[1] = 1.0f; ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); - else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) + } + else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { /* the driver supports pixel center half integer, need to bias X,Y */ - emit_adjusted_wpos(t, program, 0.5f, invert ? 0.5f : -0.5f); + adjX = -0.5f; + adjY[0] = -0.5f; + adjY[1] = 0.5f; + } else assert(0); } @@ -4225,8 +4395,8 @@ emit_wpos(struct st_context *st, } else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { /* the driver supports pixel center integer, need to bias X,Y */ + adjX = adjY[0] = adjY[1] = 0.5f; ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); - emit_adjusted_wpos(t, program, 0.5f, invert ? -0.5f : 0.5f); } else assert(0); @@ -4234,7 +4404,7 @@ emit_wpos(struct st_context *st, /* we invert after adjustment so that we avoid the MOV to temporary, * and reuse the adjustment ADD instead */ - emit_wpos_inversion(t, program, invert); + emit_wpos_adjustment(t, program, invert, adjX, adjY); } /** @@ -4303,24 +4473,37 @@ st_translate_program( const GLuint outputMapping[], const ubyte outputSemanticName[], const ubyte outputSemanticIndex[], - boolean passthrough_edgeflags) + boolean passthrough_edgeflags, + boolean clamp_color) { - struct st_translate translate, *t; + struct st_translate *t; unsigned i; enum pipe_error ret = PIPE_OK; assert(numInputs <= Elements(t->inputs)); assert(numOutputs <= Elements(t->outputs)); - t = &translate; + t = CALLOC_STRUCT(st_translate); + if (!t) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto out; + } + memset(t, 0, sizeof *t); t->procType = procType; t->inputMapping = inputMapping; t->outputMapping = outputMapping; t->ureg = ureg; - t->pointSizeOutIndex = -1; - t->prevInstWrotePointSize = GL_FALSE; + + if (program->shader_program) { + for (i = 0; i < program->shader_program->NumUserUniformStorage; i++) { + struct gl_uniform_storage *const storage = + &program->shader_program->UniformStorage[i]; + + _mesa_uniform_detach_all_driver_storage(storage); + } + } /* * Declare input attributes. @@ -4367,7 +4550,8 @@ st_translate_program( break; default: assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR"); - return PIPE_ERROR_BAD_INPUT; + ret = PIPE_ERROR_BAD_INPUT; + goto out; } } } @@ -4393,27 +4577,16 @@ st_translate_program( } for (i = 0; i < numOutputs; i++) { - t->outputs[i] = ureg_DECL_output(ureg, - outputSemanticName[i], - outputSemanticIndex[i]); - if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && proginfo->Id) { - /* Writing to the point size result register requires special - * handling to implement clamping. - */ - static const gl_state_index pointSizeClampState[STATE_LENGTH] - = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 }; - /* XXX: note we are modifying the incoming shader here! Need to - * do this before emitting the constant decls below, or this - * will be missed. - */ - unsigned pointSizeClampConst = - _mesa_add_state_reference(proginfo->Parameters, - pointSizeClampState); - struct ureg_dst psizregtemp = ureg_DECL_temporary(ureg); - t->pointSizeConst = ureg_DECL_constant(ureg, pointSizeClampConst); - t->pointSizeResult = t->outputs[i]; - t->pointSizeOutIndex = i; - t->outputs[i] = psizregtemp; + if (outputSemanticName[i] == TGSI_SEMANTIC_CLIPDIST) { + int mask = ((1 << (program->num_clip_distances - 4*outputSemanticIndex[i])) - 1) & TGSI_WRITEMASK_XYZW; + t->outputs[i] = ureg_DECL_output_masked(ureg, + outputSemanticName[i], + outputSemanticIndex[i], + mask); + } else { + t->outputs[i] = ureg_DECL_output(ureg, + outputSemanticName[i], + outputSemanticIndex[i]); } } if (passthrough_edgeflags) @@ -4449,7 +4622,7 @@ st_translate_program( */ for (i = 0; i < (unsigned)program->next_temp; i++) { /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */ - t->temps[i] = ureg_DECL_temporary(t->ureg); + t->temps[i] = ureg_DECL_local_temporary(t->ureg); } } @@ -4483,7 +4656,10 @@ st_translate_program( if (program->indirect_addr_consts) t->constants[i] = ureg_DECL_constant(ureg, i); else - t->constants[i] = emit_immediate(t, proginfo->Parameters, i); + t->constants[i] = emit_immediate(t, + proginfo->Parameters->ParameterValues[i], + proginfo->Parameters->Parameters[i].DataType, + 4); break; default: break; @@ -4493,15 +4669,18 @@ st_translate_program( /* Emit immediate values. */ - t->immediates = (struct ureg_src *)CALLOC(program->immediates->NumParameters * sizeof(struct ureg_src)); + t->immediates = (struct ureg_src *)CALLOC(program->num_immediates * sizeof(struct ureg_src)); if (t->immediates == NULL) { ret = PIPE_ERROR_OUT_OF_MEMORY; goto out; } - for (i = 0; i < program->immediates->NumParameters; i++) { - assert(program->immediates->Parameters[i].Type == PROGRAM_IMMEDIATE); - t->immediates[i] = emit_immediate(t, program->immediates, i); + i = 0; + foreach_iter(exec_list_iterator, iter, program->immediates) { + immediate_storage *imm = (immediate_storage *)iter.get(); + assert(i < program->num_immediates); + t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size); } + assert(i == program->num_immediates); /* texture samplers */ for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { @@ -4514,26 +4693,8 @@ st_translate_program( */ foreach_iter(exec_list_iterator, iter, program->instructions) { set_insn_start(t, ureg_get_instruction_number(ureg)); - compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get()); - - if (t->prevInstWrotePointSize && proginfo->Id) { - /* The previous instruction wrote to the (fake) vertex point size - * result register. Now we need to clamp that value to the min/max - * point size range, putting the result into the real point size - * register. - * Note that we can't do this easily at the end of program due to - * possible early return. - */ - set_insn_start(t, ureg_get_instruction_number(ureg)); - ureg_MAX(t->ureg, - ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X), - ureg_src(t->outputs[t->pointSizeOutIndex]), - ureg_swizzle(t->pointSizeConst, 1,1,1,1)); - ureg_MIN(t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X), - ureg_src(t->outputs[t->pointSizeOutIndex]), - ureg_swizzle(t->pointSizeConst, 2,2,2,2)); - } - t->prevInstWrotePointSize = GL_FALSE; + compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get(), + clamp_color); } /* Fix up all emitted labels: @@ -4543,14 +4704,32 @@ st_translate_program( t->insn[t->labels[i].branch_target]); } + if (program->shader_program) { + /* This has to be done last. Any operation the can cause + * prog->ParameterValues to get reallocated (e.g., anything that adds a + * program constant) has to happen before creating this linkage. + */ + for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { + if (program->shader_program->_LinkedShaders[i] == NULL) + continue; + + _mesa_associate_uniform_storage(ctx, program->shader_program, + program->shader_program->_LinkedShaders[i]->Program->Parameters); + } + } + out: - FREE(t->insn); - FREE(t->labels); - FREE(t->constants); - FREE(t->immediates); + if (t) { + FREE(t->insn); + FREE(t->labels); + FREE(t->constants); + FREE(t->immediates); + + if (t->error) { + debug_printf("%s: translate error flag set\n", __FUNCTION__); + } - if (t->error) { - debug_printf("%s: translate error flag set\n", __FUNCTION__); + FREE(t); } return ret; @@ -4564,7 +4743,8 @@ out: static struct gl_program * get_mesa_program(struct gl_context *ctx, struct gl_shader_program *shader_program, - struct gl_shader *shader) + struct gl_shader *shader, + int num_clip_distances) { glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor(); struct gl_program *prog; @@ -4598,15 +4778,19 @@ get_mesa_program(struct gl_context *ctx, if (!prog) return NULL; prog->Parameters = _mesa_new_parameter_list(); - prog->Varying = _mesa_new_parameter_list(); - prog->Attributes = _mesa_new_parameter_list(); v->ctx = ctx; v->prog = prog; v->shader_program = shader_program; v->options = options; v->glsl_version = ctx->Const.GLSLVersion; + v->native_integers = ctx->Const.NativeIntegers; + v->num_clip_distances = num_clip_distances; + + _mesa_generate_parameters_list_for_uniforms(shader_program, shader, + prog->Parameters); - add_uniforms_to_parameters_list(shader_program, shader, prog); + /* Remove reads from output registers. */ + lower_output_reads(shader->ir); /* Emit intermediate IR for main(). */ visit_exec_list(shader->ir, v); @@ -4654,11 +4838,6 @@ get_mesa_program(struct gl_context *ctx, } #endif - /* Remove reads to output registers, and to varyings in vertex shaders. */ - v->remove_output_reads(PROGRAM_OUTPUT); - if (target == GL_VERTEX_PROGRAM_ARB) - v->remove_output_reads(PROGRAM_VARYING); - /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ v->simplify_cmp(); v->copy_propagate(); @@ -4685,18 +4864,26 @@ get_mesa_program(struct gl_context *ctx, _mesa_print_ir(shader->ir, NULL); printf("\n"); printf("\n"); + fflush(stdout); } prog->Instructions = NULL; prog->NumInstructions = 0; - do_set_program_inouts(shader->ir, prog); + do_set_program_inouts(shader->ir, prog, shader->Type == GL_FRAGMENT_SHADER); count_resources(v, prog); - check_resources(ctx, shader_program, v, prog); - _mesa_reference_program(ctx, &shader->Program, prog); + /* This has to be done last. Any operation the can cause + * prog->ParameterValues to get reallocated (e.g., anything that adds a + * program constant) has to happen before creating this linkage. + */ + _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters); + if (!shader_program->LinkStatus) { + return NULL; + } + struct st_vertex_program *stvp; struct st_fragment_program *stfp; struct st_geometry_program *stgp; @@ -4722,6 +4909,25 @@ get_mesa_program(struct gl_context *ctx, return prog; } +/** + * Searches through the IR for a declaration of gl_ClipDistance and returns the + * declared size of the gl_ClipDistance array. Returns 0 if gl_ClipDistance is + * not declared in the IR. + */ +int get_clip_distance_size(exec_list *ir) +{ + foreach_iter (exec_list_iterator, iter, *ir) { + ir_instruction *inst = (ir_instruction *)iter.get(); + ir_variable *var = inst->as_variable(); + if (var == NULL) continue; + if (!strcmp(var->name, "gl_ClipDistance")) { + return var->type->length; + } + } + + return 0; +} + extern "C" { struct gl_shader * @@ -4760,6 +4966,7 @@ st_new_shader_program(struct gl_context *ctx, GLuint name) GLboolean st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) { + int num_clip_distances[MESA_SHADER_TYPES]; assert(prog->LinkStatus); for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { @@ -4771,25 +4978,38 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) const struct gl_shader_compiler_options *options = &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)]; + /* We have to determine the length of the gl_ClipDistance array before + * the array is lowered to two vec4s by lower_clip_distance(). + */ + num_clip_distances[i] = get_clip_distance_size(ir); + do { + unsigned what_to_lower = MOD_TO_FRACT | DIV_TO_MUL_RCP | + EXP_TO_EXP2 | LOG_TO_LOG2; + if (options->EmitNoPow) + what_to_lower |= POW_TO_EXP2; + if (!ctx->Const.NativeIntegers) + what_to_lower |= INT_DIV_TO_MUL_RCP; + progress = false; /* Lowering */ do_mat_op_to_vec(ir); - lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2 - | LOG_TO_LOG2 - | ((options->EmitNoPow) ? POW_TO_EXP2 : 0))); + lower_instructions(ir, what_to_lower); progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress; - progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress; + progress = do_common_optimization(ir, true, true, + options->MaxUnrollIterations) + || progress; - progress = lower_quadop_vector(ir, true) || progress; + progress = lower_quadop_vector(ir, false) || progress; + progress = lower_clip_distance(ir) || progress; - if (options->EmitNoIfs) { + if (options->MaxIfDepth == 0) progress = lower_discard(ir) || progress; - progress = lower_if_to_cond_assign(ir) || progress; - } + + progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress; if (options->EmitNoNoise) progress = lower_noise(ir) || progress; @@ -4819,32 +5039,22 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) if (prog->_LinkedShaders[i] == NULL) continue; - linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]); + linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i], + num_clip_distances[i]); if (linked_prog) { - bool ok = true; - - switch (prog->_LinkedShaders[i]->Type) { - case GL_VERTEX_SHADER: - _mesa_reference_vertprog(ctx, &prog->VertexProgram, - (struct gl_vertex_program *)linked_prog); - ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB, - linked_prog); - break; - case GL_FRAGMENT_SHADER: - _mesa_reference_fragprog(ctx, &prog->FragmentProgram, - (struct gl_fragment_program *)linked_prog); - ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB, - linked_prog); - break; - case GL_GEOMETRY_SHADER: - _mesa_reference_geomprog(ctx, &prog->GeometryProgram, - (struct gl_geometry_program *)linked_prog); - ok = ctx->Driver.ProgramStringNotify(ctx, GL_GEOMETRY_PROGRAM_NV, - linked_prog); - break; - } - if (!ok) { + static const GLenum targets[] = { + GL_VERTEX_PROGRAM_ARB, + GL_FRAGMENT_PROGRAM_ARB, + GL_GEOMETRY_PROGRAM_NV + }; + + _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program, + linked_prog); + if (!ctx->Driver.ProgramStringNotify(ctx, targets[i], linked_prog)) { + _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program, + NULL); + _mesa_reference_program(ctx, &linked_prog, NULL); return GL_FALSE; } } @@ -4855,53 +5065,28 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) return GL_TRUE; } - -/** - * Link a GLSL shader program. Called via glLinkProgram(). - */ void -st_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) +st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi, + const GLuint outputMapping[], + struct pipe_stream_output_info *so) { - unsigned int i; - - _mesa_clear_shader_program_data(ctx, prog); - - prog->LinkStatus = GL_TRUE; - - for (i = 0; i < prog->NumShaders; i++) { - if (!prog->Shaders[i]->CompileStatus) { - fail_link(prog, "linking with uncompiled shader"); - prog->LinkStatus = GL_FALSE; - } - } - - prog->Varying = _mesa_new_parameter_list(); - _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL); - _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL); - _mesa_reference_geomprog(ctx, &prog->GeometryProgram, NULL); - - if (prog->LinkStatus) { - link_shaders(ctx, prog); - } - - if (prog->LinkStatus) { - if (!ctx->Driver.LinkShader(ctx, prog)) { - prog->LinkStatus = GL_FALSE; - } + unsigned i; + struct gl_transform_feedback_info *info = + &glsl_to_tgsi->shader_program->LinkedTransformFeedback; + + for (i = 0; i < info->NumOutputs; i++) { + so->output[i].register_index = + outputMapping[info->Outputs[i].OutputRegister]; + so->output[i].start_component = info->Outputs[i].ComponentOffset; + so->output[i].num_components = info->Outputs[i].NumComponents; + so->output[i].output_buffer = info->Outputs[i].OutputBuffer; + so->output[i].dst_offset = info->Outputs[i].DstOffset; } - set_uniform_initializers(ctx, prog); - - if (ctx->Shader.Flags & GLSL_DUMP) { - if (!prog->LinkStatus) { - printf("GLSL shader program %d failed to link\n", prog->Name); - } - - if (prog->InfoLog && prog->InfoLog[0] != 0) { - printf("GLSL shader program %d info log:\n", prog->Name); - printf("%s\n", prog->InfoLog); - } + for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { + so->stride[i] = info->BufferStride[i]; } + so->num_outputs = info->NumOutputs; } } /* extern "C" */