X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fstate_tracker%2Fst_glsl_to_tgsi.cpp;h=a4e2c8da58697e6da6366c2284409d86af06e40b;hb=edcba62655635e0c1d355e5e3f7c24e895d34005;hp=a1ee24f66931bd0360fb6ba4ec20940da7ac2404;hpb=088494aa032bf32db8b67f1fb07e5797603a473d;p=mesa.git diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index a1ee24f6693..a4e2c8da586 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -74,21 +74,12 @@ extern "C" { (1 << PROGRAM_CONSTANT) | \ (1 << PROGRAM_UNIFORM)) -/** - * Maximum number of temporary registers. - * - * It is too big for stack allocated arrays -- it will cause stack overflow on - * Windows and likely Mac OS X. - */ -#define MAX_TEMPS 4096 - /** * Maximum number of arrays */ #define MAX_ARRAYS 256 -/* will be 4 for GLSL 4.00 */ -#define MAX_GLSL_TEXTURE_OFFSET 1 +#define MAX_GLSL_TEXTURE_OFFSET 4 class st_src_reg; class st_dst_reg; @@ -241,15 +232,17 @@ public: unsigned op; st_dst_reg dst; - st_src_reg src[3]; + st_src_reg src[4]; /** Pointer to the ir source this tree came from for debugging */ ir_instruction *ir; GLboolean cond_update; bool saturate; - int sampler; /**< sampler index */ + st_src_reg sampler; /**< sampler register */ + int sampler_array_size; /**< 1-based size of sampler array, 1 if not array */ int tex_target; /**< One of TEXTURE_*_INDEX */ GLboolean tex_shadow; - struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET]; + + st_src_reg tex_offsets[MAX_GLSL_TEXTURE_OFFSET]; unsigned tex_offset_num_offset; int dead_mask; /**< Used in dead code elimination */ @@ -325,6 +318,7 @@ public: struct gl_context *ctx; struct gl_program *prog; struct gl_shader_program *shader_program; + struct gl_shader *shader; struct gl_shader_compiler_options *options; int next_temp; @@ -410,7 +404,12 @@ public: glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0, st_src_reg src1, st_src_reg src2); - + + glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, + st_dst_reg dst, + st_src_reg src0, st_src_reg src1, + st_src_reg src2, st_src_reg src3); + unsigned get_opcode(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0, st_src_reg src1); @@ -439,7 +438,6 @@ public: int mul_operand); bool try_emit_mad_for_and_not(ir_expression *ir, int mul_operand); - bool try_emit_sat(ir_expression *ir); void emit_swz(ir_expression *ir); @@ -454,8 +452,7 @@ public: int get_last_temp_write(int index); void copy_propagate(void); - void eliminate_dead_code(void); - int eliminate_dead_code_advanced(void); + int eliminate_dead_code(void); void merge_registers(void); void renumber_registers(void); @@ -471,6 +468,7 @@ static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_T static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 0); static st_dst_reg address_reg2 = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 1); +static st_dst_reg sampler_reladdr = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 2); static void fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3); @@ -523,8 +521,9 @@ num_inst_src_regs(unsigned opcode) glsl_to_tgsi_instruction * glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, - st_dst_reg dst, - st_src_reg src0, st_src_reg src1, st_src_reg src2) + st_dst_reg dst, + st_src_reg src0, st_src_reg src1, + st_src_reg src2, st_src_reg src3) { glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction(); int num_reladdr = 0, i; @@ -539,7 +538,9 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, num_reladdr += src0.reladdr != NULL || src0.reladdr2 != NULL; num_reladdr += src1.reladdr != NULL || src1.reladdr2 != NULL; num_reladdr += src2.reladdr != NULL || src2.reladdr2 != NULL; + num_reladdr += src3.reladdr != NULL || src3.reladdr2 != NULL; + reladdr_to_temp(ir, &src3, &num_reladdr); reladdr_to_temp(ir, &src2, &num_reladdr); reladdr_to_temp(ir, &src1, &num_reladdr); reladdr_to_temp(ir, &src0, &num_reladdr); @@ -555,6 +556,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, inst->src[0] = src0; inst->src[1] = src1; inst->src[2] = src2; + inst->src[3] = src3; inst->ir = ir; inst->dead_mask = 0; @@ -576,7 +578,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, } } else { - for (i=0; i<3; i++) { + for (i=0; i<4; i++) { if(inst->src[i].reladdr) { switch(inst->src[i].file) { case PROGRAM_STATE_VAR: @@ -599,12 +601,19 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, return inst; } +glsl_to_tgsi_instruction * +glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, + st_dst_reg dst, st_src_reg src0, + st_src_reg src1, st_src_reg src2) +{ + return emit(ir, op, dst, src0, src1, src2, undef_src); +} glsl_to_tgsi_instruction * glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0, st_src_reg src1) { - return emit(ir, op, dst, src0, src1, undef_src); + return emit(ir, op, dst, src0, src1, undef_src, undef_src); } glsl_to_tgsi_instruction * @@ -612,13 +621,13 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0) { assert(dst.writemask != 0); - return emit(ir, op, dst, src0, undef_src, undef_src); + return emit(ir, op, dst, src0, undef_src, undef_src, undef_src); } glsl_to_tgsi_instruction * glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op) { - return emit(ir, op, undef_dst, undef_src, undef_src, undef_src); + return emit(ir, op, undef_dst, undef_src, undef_src, undef_src, undef_src); } /** @@ -631,7 +640,10 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, st_src_reg src0, st_src_reg src1) { int type = GLSL_TYPE_FLOAT; - + + if (op == TGSI_OPCODE_MOV) + return op; + assert(src0.type != GLSL_TYPE_ARRAY); assert(src0.type != GLSL_TYPE_STRUCT); assert(src1.type != GLSL_TYPE_ARRAY); @@ -686,7 +698,10 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, case2fi(SSG, ISSG); case3(ABS, IABS, IABS); - + + case2iu(IBFE, UBFE); + case2iu(IMSB, UMSB); + case2iu(IMUL_HI, UMUL_HI); default: break; } @@ -897,9 +912,7 @@ glsl_to_tgsi_visitor::add_constant(gl_register_file file, /* Search immediate storage to see if we already have an identical * immediate that we can use instead of adding a duplicate entry. */ - foreach_iter(exec_list_iterator, iter, this->immediates) { - entry = (immediate_storage *)iter.get(); - + foreach_in_list(immediate_storage, entry, &this->immediates) { if (entry->size == size && entry->type == datatype && !memcmp(entry->values, values, size * sizeof(gl_constant_value))) { @@ -983,6 +996,7 @@ type_size(const struct glsl_type *type) } return size; case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_IMAGE: /* Samplers take up one slot in UNIFORMS[], but they're baked in * at link time. */ @@ -1038,11 +1052,7 @@ variable_storage * glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var) { - variable_storage *entry; - - foreach_iter(exec_list_iterator, iter, this->variables) { - entry = (variable_storage *)iter.get(); - + foreach_in_list(variable_storage, entry, &this->variables) { if (entry->var == var) return entry; } @@ -1056,14 +1066,14 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) if (strcmp(ir->name, "gl_FragCoord") == 0) { struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; - fp->OriginUpperLeft = ir->origin_upper_left; - fp->PixelCenterInteger = ir->pixel_center_integer; + fp->OriginUpperLeft = ir->data.origin_upper_left; + fp->PixelCenterInteger = ir->data.pixel_center_integer; } - if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) { + if (ir->data.mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) { unsigned int i; - const ir_state_slot *const slots = ir->state_slots; - assert(ir->state_slots != NULL); + const ir_state_slot *const slots = ir->get_state_slots(); + assert(slots != NULL); /* Check if this statevar's setup in the STATE file exactly * matches how we'll want to reference it as a @@ -1071,7 +1081,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) * temporary storage and hope that it'll get copy-propagated * out. */ - for (i = 0; i < ir->num_state_slots; i++) { + for (i = 0; i < ir->get_num_state_slots(); i++) { if (slots[i].swizzle != SWIZZLE_XYZW) { break; } @@ -1079,7 +1089,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) variable_storage *storage; st_dst_reg dst; - if (i == ir->num_state_slots) { + if (i == ir->get_num_state_slots()) { /* We'll set the index later. */ storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1); this->variables.push_tail(storage); @@ -1090,7 +1100,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) * of the type. However, this had better match the number of state * elements that we're going to copy into the new temporary. */ - assert((int) ir->num_state_slots == type_size(ir->type)); + assert((int) ir->get_num_state_slots() == type_size(ir->type)); dst = st_dst_reg(get_temp(ir->type)); @@ -1100,7 +1110,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) } - for (unsigned int i = 0; i < ir->num_state_slots; i++) { + for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) { int index = _mesa_add_state_reference(this->prog->Parameters, (gl_state_index *)slots[i].tokens); @@ -1125,7 +1135,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) } if (storage->file == PROGRAM_TEMPORARY && - dst.index != storage->index + (int) ir->num_state_slots) { + dst.index != storage->index + (int) ir->get_num_state_slots()) { fail_link(this->shader_program, "failed to load builtin uniform `%s' (%d/%d regs loaded)\n", ir->name, dst.index - storage->index, @@ -1175,13 +1185,11 @@ glsl_to_tgsi_visitor::visit(ir_function *ir) const ir_function_signature *sig; exec_list empty; - sig = ir->matching_signature(NULL, &empty); + sig = ir->matching_signature(NULL, &empty, false); assert(sig); - foreach_iter(exec_list_iterator, iter, sig->body) { - ir_instruction *ir = (ir_instruction *)iter.get(); - + foreach_in_list(ir_instruction, ir, &sig->body) { ir->accept(this); } } @@ -1253,53 +1261,6 @@ glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operan return true; } -bool -glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir) -{ - /* Emit saturates in the vertex shader only if SM 3.0 is supported. - */ - if (this->prog->Target == GL_VERTEX_PROGRAM_ARB && - !st_context(this->ctx)->has_shader_model3) { - return false; - } - - ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); - if (!sat_src) - return false; - - sat_src->accept(this); - st_src_reg src = this->result; - - /* If we generated an expression instruction into a temporary in - * processing the saturate's operand, apply the saturate to that - * instruction. Otherwise, generate a MOV to do the saturate. - * - * Note that we have to be careful to only do this optimization if - * the instruction in question was what generated src->result. For - * example, ir_dereference_array might generate a MUL instruction - * to create the reladdr, and return us a src reg using that - * reladdr. That MUL result is not the value we're trying to - * saturate. - */ - ir_expression *sat_src_expr = sat_src->as_expression(); - if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul || - sat_src_expr->operation == ir_binop_add || - sat_src_expr->operation == ir_binop_dot)) { - glsl_to_tgsi_instruction *new_inst; - new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); - new_inst->saturate = true; - } else { - this->result = get_temp(ir->type); - st_dst_reg result_dst = st_dst_reg(this->result); - result_dst.writemask = (1 << ir->type->vector_elements) - 1; - glsl_to_tgsi_instruction *inst; - inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src); - inst->saturate = true; - } - - return true; -} - void glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr) @@ -1339,16 +1300,13 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b)) */ - if (ir->operation == ir_binop_logic_and) { + if (!native_integers && ir->operation == ir_binop_logic_and) { if (try_emit_mad_for_and_not(ir, 1)) return; if (try_emit_mad_for_and_not(ir, 0)) return; } - if (try_emit_sat(ir)) - return; - if (ir->operation == ir_quadop_vector) assert(!"ir_quadop_vector should have been lowered"); @@ -1443,11 +1401,23 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) case ir_unop_cos_reduced: emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]); break; + case ir_unop_saturate: { + glsl_to_tgsi_instruction *inst; + inst = emit(ir, TGSI_OPCODE_MOV, result_dst, op[0]); + inst->saturate = true; + break; + } case ir_unop_dFdx: + case ir_unop_dFdx_coarse: emit(ir, TGSI_OPCODE_DDX, result_dst, op[0]); break; + case ir_unop_dFdx_fine: + emit(ir, TGSI_OPCODE_DDX_FINE, result_dst, op[0]); + break; case ir_unop_dFdy: + case ir_unop_dFdy_coarse: + case ir_unop_dFdy_fine: { /* The X component contains 1 or -1 depending on whether the framebuffer * is a FBO or the window system buffer, respectively. @@ -1468,7 +1438,8 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) st_src_reg temp = get_temp(glsl_type::vec4_type); emit(ir, TGSI_OPCODE_MUL, st_dst_reg(temp), transform_y, op[0]); - emit(ir, TGSI_OPCODE_DDY, result_dst, temp); + emit(ir, ir->operation == ir_unop_dFdy_fine ? + TGSI_OPCODE_DDY_FINE : TGSI_OPCODE_DDY, result_dst, temp); break; } @@ -1648,30 +1619,82 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) case ir_unop_any: { assert(ir->operands[0]->type->is_vector()); - /* After the dot-product, the value will be an integer on the - * range [0,4]. Zero stays zero, and positive values become 1.0. - */ - glsl_to_tgsi_instruction *const dp = - emit_dp(ir, result_dst, op[0], op[0], - ir->operands[0]->type->vector_elements); - if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB && - result_dst.type == GLSL_TYPE_FLOAT) { - /* The clamping to [0,1] can be done for free in the fragment - * shader with a saturate. - */ - dp->saturate = true; - } else if (result_dst.type == GLSL_TYPE_FLOAT) { - /* Negating the result of the dot-product gives values on the range - * [-4, 0]. Zero stays zero, and negative values become 1.0. This - * is achieved using SLT. - */ - st_src_reg slt_src = result_src; - slt_src.negate = ~slt_src.negate; - emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); - } - else { - /* Use SNE 0 if integers are being used as boolean values. */ - emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0)); + if (native_integers) { + int dst_swizzle = 0, op0_swizzle, i; + st_src_reg accum = op[0]; + + op0_swizzle = op[0].swizzle; + accum.swizzle = MAKE_SWIZZLE4(GET_SWZ(op0_swizzle, 0), + GET_SWZ(op0_swizzle, 0), + GET_SWZ(op0_swizzle, 0), + GET_SWZ(op0_swizzle, 0)); + for (i = 0; i < 4; i++) { + if (result_dst.writemask & (1 << i)) { + dst_swizzle = MAKE_SWIZZLE4(i, i, i, i); + break; + } + } + assert(i != 4); + assert(ir->operands[0]->type->is_boolean()); + + /* OR all the components together, since they should be either 0 or ~0 + */ + switch (ir->operands[0]->type->vector_elements) { + case 4: + op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(op0_swizzle, 3), + GET_SWZ(op0_swizzle, 3), + GET_SWZ(op0_swizzle, 3), + GET_SWZ(op0_swizzle, 3)); + emit(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]); + accum = st_src_reg(result_dst); + accum.swizzle = dst_swizzle; + /* fallthrough */ + case 3: + op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(op0_swizzle, 2), + GET_SWZ(op0_swizzle, 2), + GET_SWZ(op0_swizzle, 2), + GET_SWZ(op0_swizzle, 2)); + emit(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]); + accum = st_src_reg(result_dst); + accum.swizzle = dst_swizzle; + /* fallthrough */ + case 2: + op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(op0_swizzle, 1), + GET_SWZ(op0_swizzle, 1), + GET_SWZ(op0_swizzle, 1), + GET_SWZ(op0_swizzle, 1)); + emit(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]); + break; + default: + assert(!"Unexpected vector size"); + break; + } + } else { + /* After the dot-product, the value will be an integer on the + * range [0,4]. Zero stays zero, and positive values become 1.0. + */ + glsl_to_tgsi_instruction *const dp = + emit_dp(ir, result_dst, op[0], op[0], + ir->operands[0]->type->vector_elements); + if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB && + result_dst.type == GLSL_TYPE_FLOAT) { + /* The clamping to [0,1] can be done for free in the fragment + * shader with a saturate. + */ + dp->saturate = true; + } else if (result_dst.type == GLSL_TYPE_FLOAT) { + /* Negating the result of the dot-product gives values on the range + * [-4, 0]. Zero stays zero, and negative values become 1.0. This + * is achieved using SLT. + */ + st_src_reg slt_src = result_src; + slt_src.negate = ~slt_src.negate; + emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); + } + else { + /* Use SNE 0 if integers are being used as boolean values. */ + emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0)); + } } break; } @@ -1880,25 +1903,46 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) break; case ir_binop_ubo_load: { - ir_constant *uniform_block = ir->operands[0]->as_constant(); + ir_constant *const_uniform_block = ir->operands[0]->as_constant(); ir_constant *const_offset_ir = ir->operands[1]->as_constant(); unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0; + unsigned const_block = const_uniform_block ? const_uniform_block->value.u[0] + 1 : 0; st_src_reg index_reg = get_temp(glsl_type::uint_type); st_src_reg cbuf; cbuf.type = glsl_type::vec4_type->base_type; cbuf.file = PROGRAM_CONSTANT; cbuf.index = 0; - cbuf.index2D = uniform_block->value.u[0] + 1; cbuf.reladdr = NULL; cbuf.negate = 0; assert(ir->type->is_vector() || ir->type->is_scalar()); if (const_offset_ir) { - index_reg = st_src_reg_for_int(const_offset / 16); - } else { - emit(ir, TGSI_OPCODE_USHR, st_dst_reg(index_reg), op[1], st_src_reg_for_int(4)); + /* Constant index into constant buffer */ + cbuf.reladdr = NULL; + cbuf.index = const_offset / 16; + } + else { + /* Relative/variable index into constant buffer */ + emit(ir, TGSI_OPCODE_USHR, st_dst_reg(index_reg), op[1], + st_src_reg_for_int(4)); + cbuf.reladdr = ralloc(mem_ctx, st_src_reg); + memcpy(cbuf.reladdr, &index_reg, sizeof(index_reg)); + } + + if (const_uniform_block) { + /* Constant constant buffer */ + cbuf.reladdr2 = NULL; + cbuf.index2D = const_block; + cbuf.has_index2 = true; + } + else { + /* Relative/variable constant buffer */ + cbuf.reladdr2 = ralloc(mem_ctx, st_src_reg); + cbuf.index2D = 1; + memcpy(cbuf.reladdr2, &op[0], sizeof(st_src_reg)); + cbuf.has_index2 = true; } cbuf.swizzle = swizzle_for_size(ir->type->vector_elements); @@ -1907,9 +1951,6 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) const_offset % 16 / 4, const_offset % 16 / 4); - cbuf.reladdr = ralloc(mem_ctx, st_src_reg); - memcpy(cbuf.reladdr, &index_reg, sizeof(index_reg)); - if (ir->type->base_type == GLSL_TYPE_BOOL) { emit(ir, TGSI_OPCODE_USNE, result_dst, cbuf, st_src_reg_for_int(0)); } else { @@ -1929,6 +1970,42 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) emit(ir, TGSI_OPCODE_CMP, result_dst, op[0], op[1], op[2]); } break; + case ir_triop_bitfield_extract: + emit(ir, TGSI_OPCODE_IBFE, result_dst, op[0], op[1], op[2]); + break; + case ir_quadop_bitfield_insert: + emit(ir, TGSI_OPCODE_BFI, result_dst, op[0], op[1], op[2], op[3]); + break; + case ir_unop_bitfield_reverse: + emit(ir, TGSI_OPCODE_BREV, result_dst, op[0]); + break; + case ir_unop_bit_count: + emit(ir, TGSI_OPCODE_POPC, result_dst, op[0]); + break; + case ir_unop_find_msb: + emit(ir, TGSI_OPCODE_IMSB, result_dst, op[0]); + break; + case ir_unop_find_lsb: + emit(ir, TGSI_OPCODE_LSB, result_dst, op[0]); + break; + case ir_binop_imul_high: + emit(ir, TGSI_OPCODE_IMUL_HI, result_dst, op[0], op[1]); + break; + case ir_triop_fma: + /* NOTE: Perhaps there should be a special opcode that enforces fused + * mul-add. Just use MAD for now. + */ + emit(ir, TGSI_OPCODE_MAD, result_dst, op[0], op[1], op[2]); + break; + case ir_unop_interpolate_at_centroid: + emit(ir, TGSI_OPCODE_INTERP_CENTROID, result_dst, op[0]); + break; + case ir_binop_interpolate_at_offset: + emit(ir, TGSI_OPCODE_INTERP_OFFSET, result_dst, op[0], op[1]); + break; + case ir_binop_interpolate_at_sample: + emit(ir, TGSI_OPCODE_INTERP_SAMPLE, result_dst, op[0], op[1]); + break; case ir_unop_pack_snorm_2x16: case ir_unop_pack_unorm_2x16: case ir_unop_pack_half_2x16: @@ -1942,22 +2019,14 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) case ir_unop_unpack_snorm_4x8: case ir_unop_unpack_unorm_4x8: case ir_binop_pack_half_2x16_split: - case ir_unop_bitfield_reverse: - case ir_unop_bit_count: - case ir_unop_find_msb: - case ir_unop_find_lsb: case ir_binop_bfm: - case ir_triop_fma: case ir_triop_bfi: - case ir_triop_bitfield_extract: - case ir_quadop_bitfield_insert: case ir_quadop_vector: case ir_binop_vector_extract: case ir_triop_vector_insert: case ir_binop_ldexp: case ir_binop_carry: case ir_binop_borrow: - case ir_binop_imul_high: /* This operation is not supported, or should have already been handled. */ assert(!"Invalid ir opcode in glsl_to_tgsi_visitor::visit()"); @@ -2020,10 +2089,10 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) ir_variable *var = ir->var; if (!entry) { - switch (var->mode) { + switch (var->data.mode) { case ir_var_uniform: entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM, - var->location); + var->data.location); this->variables.push_tail(entry); break; case ir_var_shader_in: @@ -2032,21 +2101,22 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) * generic attributes (glBindVertexLocation), and * user-defined varyings. */ - assert(var->location != -1); + assert(var->data.location != -1); entry = new(mem_ctx) variable_storage(var, PROGRAM_INPUT, - var->location); + var->data.location); break; case ir_var_shader_out: - assert(var->location != -1); + assert(var->data.location != -1); entry = new(mem_ctx) variable_storage(var, PROGRAM_OUTPUT, - var->location + var->index); + var->data.location + + var->data.index); break; case ir_var_system_value: entry = new(mem_ctx) variable_storage(var, PROGRAM_SYSTEM_VALUE, - var->location); + var->data.location); break; case ir_var_auto: case ir_var_temporary: @@ -2345,7 +2415,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); l.writemask = WRITEMASK_XYZW; } else if (ir->lhs->type->is_scalar() && - ir->lhs->variable_referenced()->mode == ir_var_shader_out) { + ir->lhs->variable_referenced()->data.mode == ir_var_shader_out) { /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the * FINISHME: W component of fragment shader output zero, work correctly. */ @@ -2454,8 +2524,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) st_src_reg temp_base = get_temp(ir->type); st_dst_reg temp = st_dst_reg(temp_base); - foreach_iter(exec_list_iterator, iter, ir->components) { - ir_constant *field_value = (ir_constant *)iter.get(); + foreach_in_list(ir_constant, field_value, &ir->components) { int size = type_size(field_value->type); assert(size > 0); @@ -2548,10 +2617,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) case GLSL_TYPE_BOOL: gl_type = native_integers ? GL_BOOL : GL_FLOAT; for (i = 0; i < ir->type->vector_elements; i++) { - if (native_integers) - values[i].u = ir->value.b[i] ? ~0 : 0; - else - values[i].f = ir->value.b[i]; + values[i].u = ir->value.b[i] ? ctx->Const.UniformBooleanTrue : 0; } break; default: @@ -2569,11 +2635,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) function_entry * glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig) { - function_entry *entry; - - foreach_iter(exec_list_iterator, iter, this->function_signatures) { - entry = (function_entry *)iter.get(); - + foreach_in_list_use_after(function_entry, entry, &this->function_signatures) { if (entry->sig == sig) return entry; } @@ -2584,8 +2646,7 @@ glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig) entry->bgn_inst = NULL; /* Allocate storage for all the parameters. */ - foreach_iter(exec_list_iterator, iter, sig->parameters) { - ir_variable *param = (ir_variable *)iter.get(); + foreach_in_list(ir_variable, param, &sig->parameters) { variable_storage *storage; storage = find_variable_storage(param); @@ -2616,13 +2677,13 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) int i; /* Process in parameters. */ - exec_list_iterator sig_iter = sig->parameters.iterator(); - foreach_iter(exec_list_iterator, iter, *ir) { - ir_rvalue *param_rval = (ir_rvalue *)iter.get(); - ir_variable *param = (ir_variable *)sig_iter.get(); + foreach_two_lists(formal_node, &sig->parameters, + actual_node, &ir->actual_parameters) { + ir_rvalue *param_rval = (ir_rvalue *) actual_node; + ir_variable *param = (ir_variable *) formal_node; - if (param->mode == ir_var_function_in || - param->mode == ir_var_function_inout) { + if (param->data.mode == ir_var_function_in || + param->data.mode == ir_var_function_inout) { variable_storage *storage = find_variable_storage(param); assert(storage); @@ -2642,23 +2703,20 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) r.index++; } } - - sig_iter.next(); } - assert(!sig_iter.has_next()); /* Emit call instruction */ call_inst = emit(ir, TGSI_OPCODE_CAL); call_inst->function = entry; /* Process out parameters. */ - sig_iter = sig->parameters.iterator(); - foreach_iter(exec_list_iterator, iter, *ir) { - ir_rvalue *param_rval = (ir_rvalue *)iter.get(); - ir_variable *param = (ir_variable *)sig_iter.get(); + foreach_two_lists(formal_node, &sig->parameters, + actual_node, &ir->actual_parameters) { + ir_rvalue *param_rval = (ir_rvalue *) actual_node; + ir_variable *param = (ir_variable *) formal_node; - if (param->mode == ir_var_function_out || - param->mode == ir_var_function_inout) { + if (param->data.mode == ir_var_function_out || + param->data.mode == ir_var_function_inout) { variable_storage *storage = find_variable_storage(param); assert(storage); @@ -2678,10 +2736,7 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) r.index++; } } - - sig_iter.next(); } - assert(!sig_iter.has_next()); /* Process return value. */ this->result = entry->return_reg; @@ -2690,12 +2745,17 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) void glsl_to_tgsi_visitor::visit(ir_texture *ir) { - st_src_reg result_src, coord, cube_sc, lod_info, projector, dx, dy, offset, sample_index; + st_src_reg result_src, coord, cube_sc, lod_info, projector, dx, dy; + st_src_reg offset[MAX_GLSL_TEXTURE_OFFSET], sample_index, component; + st_src_reg levels_src; st_dst_reg result_dst, coord_dst, cube_sc_dst; glsl_to_tgsi_instruction *inst = NULL; unsigned opcode = TGSI_OPCODE_NOP; const glsl_type *sampler_type = ir->sampler->type; + ir_rvalue *sampler_index = + _mesa_get_sampler_array_nonconst_index(ir->sampler); bool is_cube_array = false; + unsigned i; /* if we are a cube array sampler */ if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE && @@ -2733,16 +2793,22 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) opcode = (is_cube_array && ir->shadow_comparitor) ? TGSI_OPCODE_TEX2 : TGSI_OPCODE_TEX; if (ir->offset) { ir->offset->accept(this); - offset = this->result; + offset[0] = this->result; } break; case ir_txb: - opcode = is_cube_array ? TGSI_OPCODE_TXB2 : TGSI_OPCODE_TXB; + if (is_cube_array || + sampler_type == glsl_type::samplerCubeShadow_type) { + opcode = TGSI_OPCODE_TXB2; + } + else { + opcode = TGSI_OPCODE_TXB; + } ir->lod_info.bias->accept(this); lod_info = this->result; if (ir->offset) { ir->offset->accept(this); - offset = this->result; + offset[0] = this->result; } break; case ir_txl: @@ -2751,7 +2817,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) lod_info = this->result; if (ir->offset) { ir->offset->accept(this); - offset = this->result; + offset[0] = this->result; } break; case ir_txd: @@ -2762,7 +2828,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) dy = this->result; if (ir->offset) { ir->offset->accept(this); - offset = this->result; + offset[0] = this->result; } break; case ir_txs: @@ -2770,13 +2836,18 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) ir->lod_info.lod->accept(this); lod_info = this->result; break; + case ir_query_levels: + opcode = TGSI_OPCODE_TXQ; + lod_info = st_src_reg(PROGRAM_IMMEDIATE, 0, GLSL_TYPE_INT); + levels_src = get_temp(ir->type); + break; case ir_txf: opcode = TGSI_OPCODE_TXF; ir->lod_info.lod->accept(this); lod_info = this->result; if (ir->offset) { ir->offset->accept(this); - offset = this->result; + offset[0] = this->result; } break; case ir_txf_ms: @@ -2784,14 +2855,27 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) ir->lod_info.sample_index->accept(this); sample_index = this->result; break; - case ir_lod: - assert(!"Unexpected ir_lod opcode"); - break; case ir_tg4: - assert(!"Unexpected ir_tg4 opcode"); + opcode = TGSI_OPCODE_TG4; + ir->lod_info.component->accept(this); + component = this->result; + if (ir->offset) { + ir->offset->accept(this); + if (ir->offset->type->base_type == GLSL_TYPE_ARRAY) { + const glsl_type *elt_type = ir->offset->type->fields.array; + for (i = 0; i < ir->offset->type->length; i++) { + offset[i] = this->result; + offset[i].index += i * type_size(elt_type); + offset[i].type = elt_type->base_type; + offset[i].swizzle = swizzle_for_size(elt_type->vector_elements); + } + } else { + offset[0] = this->result; + } + } break; - case ir_query_levels: - assert(!"Unexpected ir_query_levels opcode"); + case ir_lod: + opcode = TGSI_OPCODE_LODQ; break; } @@ -2887,33 +2971,56 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) coord_dst.writemask = WRITEMASK_XYZW; } + if (sampler_index) { + sampler_index->accept(this); + emit_arl(ir, sampler_reladdr, this->result); + } + if (opcode == TGSI_OPCODE_TXD) inst = emit(ir, opcode, result_dst, coord, dx, dy); - else if (opcode == TGSI_OPCODE_TXQ) - inst = emit(ir, opcode, result_dst, lod_info); - else if (opcode == TGSI_OPCODE_TXF) { + else if (opcode == TGSI_OPCODE_TXQ) { + if (ir->op == ir_query_levels) { + /* the level is stored in W */ + inst = emit(ir, opcode, st_dst_reg(levels_src), lod_info); + result_dst.writemask = WRITEMASK_X; + levels_src.swizzle = SWIZZLE_WWWW; + emit(ir, TGSI_OPCODE_MOV, result_dst, levels_src); + } else + inst = emit(ir, opcode, result_dst, lod_info); + } else if (opcode == TGSI_OPCODE_TXF) { inst = emit(ir, opcode, result_dst, coord); } else if (opcode == TGSI_OPCODE_TXL2 || opcode == TGSI_OPCODE_TXB2) { inst = emit(ir, opcode, result_dst, coord, lod_info); } else if (opcode == TGSI_OPCODE_TEX2) { inst = emit(ir, opcode, result_dst, coord, cube_sc); - } else + } else if (opcode == TGSI_OPCODE_TG4) { + if (is_cube_array && ir->shadow_comparitor) { + inst = emit(ir, opcode, result_dst, coord, cube_sc); + } else { + inst = emit(ir, opcode, result_dst, coord, component); + } + } else inst = emit(ir, opcode, result_dst, coord); if (ir->shadow_comparitor) inst->tex_shadow = GL_TRUE; - inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler, - this->shader_program, - this->prog); + inst->sampler.index = _mesa_get_sampler_uniform_value(ir->sampler, + this->shader_program, + this->prog); + if (sampler_index) { + inst->sampler.reladdr = ralloc(mem_ctx, st_src_reg); + memcpy(inst->sampler.reladdr, &sampler_reladdr, sizeof(sampler_reladdr)); + inst->sampler_array_size = + ir->sampler->as_dereference_array()->array->type->array_size(); + } else { + inst->sampler_array_size = 1; + } if (ir->offset) { - inst->tex_offset_num_offset = 1; - inst->tex_offsets[0].Index = offset.index; - inst->tex_offsets[0].File = offset.file; - inst->tex_offsets[0].SwizzleX = GET_SWZ(offset.swizzle, 0); - inst->tex_offsets[0].SwizzleY = GET_SWZ(offset.swizzle, 1); - inst->tex_offsets[0].SwizzleZ = GET_SWZ(offset.swizzle, 2); + for (i = 0; i < MAX_GLSL_TEXTURE_OFFSET && offset[i].file != PROGRAM_UNDEFINED; i++) + inst->tex_offsets[i] = offset[i]; + inst->tex_offset_num_offset = i; } switch (sampler_type->sampler_dimensionality) { @@ -2981,8 +3088,18 @@ glsl_to_tgsi_visitor::visit(ir_discard *ir) { if (ir->condition) { ir->condition->accept(this); - this->result.negate = ~this->result.negate; - emit(ir, TGSI_OPCODE_KILL_IF, undef_dst, this->result); + st_src_reg condition = this->result; + + /* Convert the bool condition to a float so we can negate. */ + if (native_integers) { + st_src_reg temp = get_temp(ir->condition->type); + emit(ir, TGSI_OPCODE_AND, st_dst_reg(temp), + condition, st_src_reg_for_float(1.0)); + condition = temp; + } + + condition.negate = ~condition.negate; + emit(ir, TGSI_OPCODE_KILL_IF, undef_dst, condition); } else { /* unconditional kil */ emit(ir, TGSI_OPCODE_KILL); @@ -3019,14 +3136,18 @@ void glsl_to_tgsi_visitor::visit(ir_emit_vertex *ir) { assert(this->prog->Target == GL_GEOMETRY_PROGRAM_NV); - emit(ir, TGSI_OPCODE_EMIT); + + ir->stream->accept(this); + emit(ir, TGSI_OPCODE_EMIT, undef_dst, this->result); } void glsl_to_tgsi_visitor::visit(ir_end_primitive *ir) { assert(this->prog->Target == GL_GEOMETRY_PROGRAM_NV); - emit(ir, TGSI_OPCODE_ENDPRIM); + + ir->stream->accept(this); + emit(ir, TGSI_OPCODE_ENDPRIM, undef_dst, this->result); } glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() @@ -3046,7 +3167,9 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() ctx = NULL; prog = NULL; shader_program = NULL; + shader = NULL; options = NULL; + have_sqrt = false; } glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor() @@ -3069,14 +3192,14 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) { v->samplers_used = 0; - foreach_iter(exec_list_iterator, iter, v->instructions) { - glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); - + foreach_in_list(glsl_to_tgsi_instruction, inst, &v->instructions) { if (is_tex_instruction(inst->op)) { - v->samplers_used |= 1 << inst->sampler; + for (int i = 0; i < inst->sampler_array_size; i++) { + v->samplers_used |= 1 << (inst->sampler.index + i); - if (inst->tex_shadow) { - prog->ShadowSamplers |= 1 << inst->sampler; + if (inst->tex_shadow) { + prog->ShadowSamplers |= 1 << (inst->sampler.index + i); + } } } } @@ -3087,78 +3210,6 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) _mesa_update_shader_textures_used(v->shader_program, prog); } -static void -set_uniform_initializer(struct gl_context *ctx, void *mem_ctx, - struct gl_shader_program *shader_program, - const char *name, const glsl_type *type, - ir_constant *val) -{ - if (type->is_record()) { - ir_constant *field_constant; - - field_constant = (ir_constant *)val->components.get_head(); - - for (unsigned int i = 0; i < type->length; i++) { - const glsl_type *field_type = type->fields.structure[i].type; - const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name, - type->fields.structure[i].name); - set_uniform_initializer(ctx, mem_ctx, shader_program, field_name, - field_type, field_constant); - field_constant = (ir_constant *)field_constant->next; - } - return; - } - - unsigned offset; - unsigned index = _mesa_get_uniform_location(ctx, shader_program, name, - &offset); - if (offset == GL_INVALID_INDEX) { - fail_link(shader_program, - "Couldn't find uniform for initializer %s\n", name); - return; - } - int loc = _mesa_uniform_merge_location_offset(shader_program, index, offset); - - for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) { - ir_constant *element; - const glsl_type *element_type; - if (type->is_array()) { - element = val->array_elements[i]; - element_type = type->fields.array; - } else { - element = val; - element_type = type; - } - - void *values; - - if (element_type->base_type == GLSL_TYPE_BOOL) { - int *conv = ralloc_array(mem_ctx, int, element_type->components()); - for (unsigned int j = 0; j < element_type->components(); j++) { - conv[j] = element->value.b[j]; - } - values = (void *)conv; - element_type = glsl_type::get_instance(GLSL_TYPE_INT, - element_type->vector_elements, - 1); - } else { - values = &element->value; - } - - if (element_type->is_matrix()) { - _mesa_uniform_matrix(ctx, shader_program, - element_type->matrix_columns, - element_type->vector_elements, - loc, 1, GL_FALSE, (GLfloat *)values); - } else { - _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns, - values, element_type->gl_type); - } - - loc++; - } -} - /** * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which * are read from the given src in this instruction @@ -3205,18 +3256,13 @@ get_src_arg_mask(st_dst_reg dst, st_src_reg src) void glsl_to_tgsi_visitor::simplify_cmp(void) { - unsigned *tempWrites; + int tempWritesSize = 0; + unsigned *tempWrites = NULL; unsigned outputWrites[MAX_PROGRAM_OUTPUTS]; - tempWrites = new unsigned[MAX_TEMPS]; - if (!tempWrites) { - return; - } - memset(tempWrites, 0, sizeof(unsigned) * MAX_TEMPS); memset(outputWrites, 0, sizeof(outputWrites)); - foreach_iter(exec_list_iterator, iter, this->instructions) { - glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { unsigned prevWriteMask = 0; /* Give up if we encounter relative addressing or flow control. */ @@ -3235,7 +3281,19 @@ glsl_to_tgsi_visitor::simplify_cmp(void) prevWriteMask = outputWrites[inst->dst.index]; outputWrites[inst->dst.index] |= inst->dst.writemask; } else if (inst->dst.file == PROGRAM_TEMPORARY) { - assert(inst->dst.index < MAX_TEMPS); + if (inst->dst.index >= tempWritesSize) { + const int inc = 4096; + + tempWrites = (unsigned*) + realloc(tempWrites, + (tempWritesSize + inc) * sizeof(unsigned)); + if (!tempWrites) + return; + + memset(tempWrites + tempWritesSize, 0, inc * sizeof(unsigned)); + tempWritesSize += inc; + } + prevWriteMask = tempWrites[inst->dst.index]; tempWrites[inst->dst.index] |= inst->dst.writemask; } else @@ -3254,15 +3312,14 @@ glsl_to_tgsi_visitor::simplify_cmp(void) } } - delete [] tempWrites; + free(tempWrites); } /* Replaces all references to a temporary register index with another index. */ void glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index) { - foreach_iter(exec_list_iterator, iter, this->instructions) { - glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { unsigned j; for (j=0; j < num_inst_src_regs(inst->op); j++) { @@ -3271,6 +3328,13 @@ glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index) inst->src[j].index = new_index; } } + + for (j=0; j < inst->tex_offset_num_offset; j++) { + if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY && + inst->tex_offsets[j].index == index) { + inst->tex_offsets[j].index = new_index; + } + } if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) { inst->dst.index = new_index; @@ -3285,15 +3349,19 @@ glsl_to_tgsi_visitor::get_first_temp_read(int index) int loop_start = -1; /* index of the first active BGNLOOP (if any) */ unsigned i = 0, j; - foreach_iter(exec_list_iterator, iter, this->instructions) { - glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); - + foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { for (j=0; j < num_inst_src_regs(inst->op); j++) { if (inst->src[j].file == PROGRAM_TEMPORARY && inst->src[j].index == index) { return (depth == 0) ? i : loop_start; } } + for (j=0; j < inst->tex_offset_num_offset; j++) { + if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY && + inst->tex_offsets[j].index == index) { + return (depth == 0) ? i : loop_start; + } + } if (inst->op == TGSI_OPCODE_BGNLOOP) { if(depth++ == 0) @@ -3317,9 +3385,7 @@ glsl_to_tgsi_visitor::get_first_temp_write(int index) int loop_start = -1; /* index of the first active BGNLOOP (if any) */ int i = 0; - foreach_iter(exec_list_iterator, iter, this->instructions) { - glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); - + foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) { return (depth == 0) ? i : loop_start; } @@ -3346,15 +3412,18 @@ glsl_to_tgsi_visitor::get_last_temp_read(int index) int last = -1; /* index of last instruction that reads the temporary */ unsigned i = 0, j; - foreach_iter(exec_list_iterator, iter, this->instructions) { - glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); - + foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { for (j=0; j < num_inst_src_regs(inst->op); j++) { if (inst->src[j].file == PROGRAM_TEMPORARY && inst->src[j].index == index) { last = (depth == 0) ? i : -2; } } + for (j=0; j < inst->tex_offset_num_offset; j++) { + if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY && + inst->tex_offsets[j].index == index) + last = (depth == 0) ? i : -2; + } if (inst->op == TGSI_OPCODE_BGNLOOP) depth++; @@ -3377,9 +3446,7 @@ glsl_to_tgsi_visitor::get_last_temp_write(int index) int last = -1; /* index of last instruction that writes to the temporary */ int i = 0; - foreach_iter(exec_list_iterator, iter, this->instructions) { - glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); - + foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) last = (depth == 0) ? i : -2; @@ -3426,9 +3493,7 @@ glsl_to_tgsi_visitor::copy_propagate(void) int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); int level = 0; - foreach_iter(exec_list_iterator, iter, this->instructions) { - glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); - + foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { assert(inst->dst.file != PROGRAM_TEMPORARY || inst->dst.index < this->next_temp); @@ -3597,7 +3662,8 @@ glsl_to_tgsi_visitor::copy_propagate(void) } /* - * Tracks available PROGRAM_TEMPORARY registers for dead code elimination. + * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead + * code elimination. * * The glsl_to_tgsi_visitor lazily produces code assuming that this pass * will occur. As an example, a TXP production after copy propagation but @@ -3610,48 +3676,9 @@ glsl_to_tgsi_visitor::copy_propagate(void) * and after this pass: * * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; - * - * FIXME: assumes that all functions are inlined (no support for BGNSUB/ENDSUB) - * FIXME: doesn't eliminate all dead code inside of loops; it steps around them - */ -void -glsl_to_tgsi_visitor::eliminate_dead_code(void) -{ - int i; - - for (i=0; i < this->next_temp; i++) { - int last_read = get_last_temp_read(i); - int j = 0; - - foreach_iter(exec_list_iterator, iter, this->instructions) { - glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); - - if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == i && - j > last_read) - { - iter.remove(); - delete inst; - } - - j++; - } - } -} - -/* - * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead - * code elimination. This is less primitive than eliminate_dead_code(), as it - * is per-channel and can detect consecutive writes without a read between them - * as dead code. However, there is some dead code that can be eliminated by - * eliminate_dead_code() but not this function - for example, this function - * cannot eliminate an instruction writing to a register that is never read and - * is the only instruction writing to that register. - * - * The glsl_to_tgsi_visitor lazily produces code assuming that this pass - * will occur. */ int -glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) +glsl_to_tgsi_visitor::eliminate_dead_code(void) { glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx, glsl_to_tgsi_instruction *, @@ -3660,9 +3687,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) int level = 0; int removed = 0; - foreach_iter(exec_list_iterator, iter, this->instructions) { - glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); - + foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { assert(inst->dst.file != PROGRAM_TEMPORARY || inst->dst.index < this->next_temp); @@ -3731,6 +3756,26 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) } } } + for (unsigned i = 0; i < inst->tex_offset_num_offset; i++) { + if (inst->tex_offsets[i].file == PROGRAM_TEMPORARY && inst->tex_offsets[i].reladdr){ + /* Any temporary might be read, so no dead code elimination + * across this instruction. + */ + memset(writes, 0, sizeof(*writes) * this->next_temp * 4); + } else if (inst->tex_offsets[i].file == PROGRAM_TEMPORARY) { + /* Clear where it's used as src. */ + int src_chans = 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 0); + src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 1); + src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 2); + src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 3); + + for (int c = 0; c < 4; c++) { + if (src_chans & (1 << c)) { + writes[4 * inst->tex_offsets[i].index + c] = NULL; + } + } + } + } break; } @@ -3768,13 +3813,11 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) /* Now actually remove the instructions that are completely dead and update * the writemask of other instructions with dead channels. */ - foreach_iter(exec_list_iterator, iter, this->instructions) { - glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); - + foreach_in_list_safe(glsl_to_tgsi_instruction, inst, &this->instructions) { if (!inst->dead_mask || !inst->dst.writemask) continue; else if ((inst->dst.writemask & ~inst->dead_mask) == 0) { - iter.remove(); + inst->remove(); delete inst; removed++; } else @@ -3879,6 +3922,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, v->ctx = original->ctx; v->prog = prog; v->shader_program = NULL; + v->shader = NULL; v->glsl_version = original->glsl_version; v->native_integers = original->native_integers; v->options = original->options; @@ -3897,7 +3941,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, src0 = v->get_temp(glsl_type::vec4_type); dst0 = st_dst_reg(src0); inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); - inst->sampler = 0; + inst->sampler_array_size = 1; inst->tex_target = TEXTURE_2D_INDEX; prog->InputsRead |= VARYING_BIT_TEX0; @@ -3936,14 +3980,16 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */ temp_dst.writemask = WRITEMASK_XY; /* write R,G */ inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0); - inst->sampler = 1; + inst->sampler.index = 1; + inst->sampler_array_size = 1; inst->tex_target = TEXTURE_2D_INDEX; /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */ src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W); temp_dst.writemask = WRITEMASK_ZW; /* write B,A */ inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0); - inst->sampler = 1; + inst->sampler.index = 1; + inst->sampler_array_size = 1; inst->tex_target = TEXTURE_2D_INDEX; prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */ @@ -3955,8 +4001,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, /* Now copy the instructions from the original glsl_to_tgsi_visitor into the * new visitor. */ - foreach_iter(exec_list_iterator, iter, original->instructions) { - glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + foreach_in_list(glsl_to_tgsi_instruction, inst, &original->instructions) { glsl_to_tgsi_instruction *newinst; st_src_reg src_regs[3]; @@ -4009,6 +4054,7 @@ get_bitmap_visitor(struct st_fragment_program *fp, v->ctx = original->ctx; v->prog = prog; v->shader_program = NULL; + v->shader = NULL; v->glsl_version = original->glsl_version; v->native_integers = original->native_integers; v->options = original->options; @@ -4024,7 +4070,8 @@ get_bitmap_visitor(struct st_fragment_program *fp, src0 = v->get_temp(glsl_type::vec4_type); dst0 = st_dst_reg(src0); inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); - inst->sampler = samplerIndex; + inst->sampler.index = samplerIndex; + inst->sampler_array_size = 1; inst->tex_target = TEXTURE_2D_INDEX; prog->InputsRead |= VARYING_BIT_TEX0; @@ -4039,8 +4086,7 @@ get_bitmap_visitor(struct st_fragment_program *fp, /* Now copy the instructions from the original glsl_to_tgsi_visitor into the * new visitor. */ - foreach_iter(exec_list_iterator, iter, original->instructions) { - glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + foreach_in_list(glsl_to_tgsi_instruction, inst, &original->instructions) { glsl_to_tgsi_instruction *newinst; st_src_reg src_regs[3]; @@ -4075,16 +4121,18 @@ struct label { struct st_translate { struct ureg_program *ureg; - struct ureg_dst temps[MAX_TEMPS]; + unsigned temps_size; + struct ureg_dst *temps; + struct ureg_dst arrays[MAX_ARRAYS]; struct ureg_src *constants; struct ureg_src *immediates; struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; - struct ureg_dst address[2]; + struct ureg_dst address[3]; struct ureg_src samplers[PIPE_MAX_SAMPLERS]; struct ureg_src systemValues[SYSTEM_VALUE_MAX]; - + struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET]; unsigned array_sizes[MAX_ARRAYS]; const GLuint *inputMapping; @@ -4112,10 +4160,24 @@ struct st_translate { }; /** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */ -static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = { - TGSI_SEMANTIC_FACE, +const unsigned _mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = { + /* Vertex shader + */ TGSI_SEMANTIC_VERTEXID, - TGSI_SEMANTIC_INSTANCEID + TGSI_SEMANTIC_INSTANCEID, + 0, + 0, + + /* Geometry shader + */ + TGSI_SEMANTIC_INVOCATIONID, + + /* Fragment shader + */ + TGSI_SEMANTIC_FACE, + TGSI_SEMANTIC_SAMPLEID, + TGSI_SEMANTIC_SAMPLEPOS, + TGSI_SEMANTIC_SAMPLEMASK, }; /** @@ -4204,9 +4266,20 @@ dst_register(struct st_translate *t, return ureg_dst_undef(); case PROGRAM_TEMPORARY: - assert(index >= 0); - assert(index < (int) Elements(t->temps)); - + /* Allocate space for temporaries on demand. */ + if (index >= t->temps_size) { + const int inc = 4096; + + t->temps = (struct ureg_dst*) + realloc(t->temps, + (t->temps_size + inc) * sizeof(struct ureg_dst)); + if (!t->temps) + return ureg_dst_undef(); + + memset(t->temps + t->temps_size, 0, inc * sizeof(struct ureg_dst)); + t->temps_size += inc; + } + if (ureg_dst_is_undef(t->temps[index])) t->temps[index] = ureg_DECL_local_temporary(t->ureg); @@ -4215,8 +4288,7 @@ dst_register(struct st_translate *t, case PROGRAM_ARRAY: array = index >> 16; - assert(array >= 0); - assert(array < (int) Elements(t->arrays)); + assert(array < Elements(t->arrays)); if (ureg_dst_is_undef(t->arrays[array])) t->arrays[array] = ureg_DECL_array_temporary( @@ -4250,51 +4322,45 @@ dst_register(struct st_translate *t, * Map a glsl_to_tgsi src register to a TGSI ureg_src register. */ static struct ureg_src -src_register(struct st_translate *t, - gl_register_file file, - GLint index, GLint index2D) +src_register(struct st_translate *t, const struct st_src_reg *reg) { - switch(file) { + switch(reg->file) { case PROGRAM_UNDEFINED: return ureg_src_undef(); case PROGRAM_TEMPORARY: case PROGRAM_ARRAY: - return ureg_src(dst_register(t, file, index)); + return ureg_src(dst_register(t, reg->file, reg->index)); case PROGRAM_UNIFORM: - assert(index >= 0); - return t->constants[index]; + assert(reg->index >= 0); + return t->constants[reg->index]; case PROGRAM_STATE_VAR: case PROGRAM_CONSTANT: /* ie, immediate */ - if (index2D) { - struct ureg_src src; - src = ureg_src_register(TGSI_FILE_CONSTANT, 0); - src.Dimension = 1; - src.DimensionIndex = index2D; - return src; - } else if (index < 0) + if (reg->has_index2) + return ureg_src_register(TGSI_FILE_CONSTANT, reg->index); + else if (reg->index < 0) return ureg_DECL_constant(t->ureg, 0); else - return t->constants[index]; + return t->constants[reg->index]; case PROGRAM_IMMEDIATE: - return t->immediates[index]; + return t->immediates[reg->index]; case PROGRAM_INPUT: - assert(t->inputMapping[index] < Elements(t->inputs)); - return t->inputs[t->inputMapping[index]]; + assert(t->inputMapping[reg->index] < Elements(t->inputs)); + return t->inputs[t->inputMapping[reg->index]]; case PROGRAM_OUTPUT: - assert(t->outputMapping[index] < Elements(t->outputs)); - return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */ + assert(t->outputMapping[reg->index] < Elements(t->outputs)); + return ureg_src(t->outputs[t->outputMapping[reg->index]]); /* not needed? */ case PROGRAM_ADDRESS: - return ureg_src(t->address[index]); + return ureg_src(t->address[reg->index]); case PROGRAM_SYSTEM_VALUE: - assert(index < (int) Elements(t->systemValues)); - return t->systemValues[index]; + assert(reg->index < (int) Elements(t->systemValues)); + return t->systemValues[reg->index]; default: assert(!"unknown src register file"); @@ -4333,7 +4399,8 @@ translate_dst(struct st_translate *t, break; case TGSI_PROCESSOR_FRAGMENT: - if (dst_reg->index >= FRAG_RESULT_COLOR) { + if (dst_reg->index == FRAG_RESULT_COLOR || + dst_reg->index >= FRAG_RESULT_DATA0) { dst = ureg_saturate(dst); } break; @@ -4354,10 +4421,12 @@ translate_dst(struct st_translate *t, static struct ureg_src translate_src(struct st_translate *t, const st_src_reg *src_reg) { - struct ureg_src src = src_register(t, src_reg->file, src_reg->index, src_reg->index2D); + struct ureg_src src = src_register(t, src_reg); - if (t->procType == TGSI_PROCESSOR_GEOMETRY && src_reg->has_index2) { - src = src_register(t, src_reg->file, src_reg->index, src_reg->index2D); + if (src_reg->has_index2) { + /* 2D indexes occur with geometry shader inputs (attrib, vertex) + * and UBO constant buffers (buffer, position). + */ if (src_reg->reladdr2) src = ureg_src_dimension_indirect(src, ureg_src(t->address[1]), src_reg->index2D); @@ -4384,22 +4453,50 @@ translate_src(struct st_translate *t, const st_src_reg *src_reg) static struct tgsi_texture_offset translate_tex_offset(struct st_translate *t, - const struct tgsi_texture_offset *in_offset) + const st_src_reg *in_offset, int idx) { struct tgsi_texture_offset offset; struct ureg_src imm_src; + struct ureg_dst dst; + int array; - assert(in_offset->File == PROGRAM_IMMEDIATE); - imm_src = t->immediates[in_offset->Index]; + switch (in_offset->file) { + case PROGRAM_IMMEDIATE: + imm_src = t->immediates[in_offset->index]; + + offset.File = imm_src.File; + offset.Index = imm_src.Index; + offset.SwizzleX = imm_src.SwizzleX; + offset.SwizzleY = imm_src.SwizzleY; + offset.SwizzleZ = imm_src.SwizzleZ; + offset.Padding = 0; + break; + case PROGRAM_TEMPORARY: + imm_src = ureg_src(t->temps[in_offset->index]); + offset.File = imm_src.File; + offset.Index = imm_src.Index; + offset.SwizzleX = GET_SWZ(in_offset->swizzle, 0); + offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1); + offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2); + offset.Padding = 0; + break; + case PROGRAM_ARRAY: + array = in_offset->index >> 16; - offset.File = imm_src.File; - offset.Index = imm_src.Index; - offset.SwizzleX = imm_src.SwizzleX; - offset.SwizzleY = imm_src.SwizzleY; - offset.SwizzleZ = imm_src.SwizzleZ; - offset.File = TGSI_FILE_IMMEDIATE; - offset.Padding = 0; + assert(array >= 0); + assert(array < (int) Elements(t->arrays)); + dst = t->arrays[array]; + offset.File = dst.File; + offset.Index = dst.Index + (in_offset->index & 0xFFFF) - 0x8000; + offset.SwizzleX = GET_SWZ(in_offset->swizzle, 0); + offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1); + offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2); + offset.Padding = 0; + break; + default: + break; + } return offset; } @@ -4455,9 +4552,15 @@ compile_tgsi_instruction(struct st_translate *t, case TGSI_OPCODE_TEX2: case TGSI_OPCODE_TXB2: case TGSI_OPCODE_TXL2: - src[num_src++] = t->samplers[inst->sampler]; + case TGSI_OPCODE_TG4: + case TGSI_OPCODE_LODQ: + src[num_src] = t->samplers[inst->sampler.index]; + if (inst->sampler.reladdr) + src[num_src] = + ureg_src_indirect(src[num_src], ureg_src(t->address[2])); + num_src++; for (i = 0; i < inst->tex_offset_num_offset; i++) { - texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i]); + texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i], i); } tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow); @@ -4678,15 +4781,19 @@ emit_wpos(struct st_context *st, * saturating the value to [0,1] does the job. */ static void -emit_face_var(struct st_translate *t) +emit_face_var(struct gl_context *ctx, struct st_translate *t) { struct ureg_program *ureg = t->ureg; struct ureg_dst face_temp = ureg_DECL_temporary(ureg); struct ureg_src face_input = t->inputs[t->inputMapping[VARYING_SLOT_FACE]]; - /* MOV_SAT face_temp, input[face] */ - face_temp = ureg_saturate(face_temp); - ureg_MOV(ureg, face_temp, face_input); + if (ctx->Const.NativeIntegers) { + ureg_FSGE(ureg, face_temp, face_input, ureg_imm1f(ureg, 0)); + } + else { + /* MOV_SAT face_temp, input[face] */ + ureg_MOV(ureg, ureg_saturate(face_temp), face_input); + } /* Use face_temp as face input from here on: */ t->inputs[t->inputMapping[VARYING_SLOT_FACE]] = ureg_src(face_temp); @@ -4712,6 +4819,7 @@ emit_edgeflags(struct st_translate *t) * \param inputSemanticIndex the semantic index (ex: which texcoord) for * each input * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input + * \param interpLocation the TGSI_INTERPOLATE_LOC_* location for each input * \param numOutputs number of output registers used * \param outputMapping maps Mesa fragment program outputs to TGSI * generic outputs @@ -4733,7 +4841,7 @@ st_translate_program( const ubyte inputSemanticName[], const ubyte inputSemanticIndex[], const GLuint interpMode[], - const GLboolean is_centroid[], + const GLuint interpLocation[], GLuint numOutputs, const GLuint outputMapping[], const ubyte outputSemanticName[], @@ -4748,6 +4856,21 @@ st_translate_program( assert(numInputs <= Elements(t->inputs)); assert(numOutputs <= Elements(t->outputs)); + assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_FRONT_FACE] == + TGSI_SEMANTIC_FACE); + assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_VERTEX_ID] == + TGSI_SEMANTIC_VERTEXID); + assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_INSTANCE_ID] == + TGSI_SEMANTIC_INSTANCEID); + assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_SAMPLE_ID] == + TGSI_SEMANTIC_SAMPLEID); + assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_SAMPLE_POS] == + TGSI_SEMANTIC_SAMPLEPOS); + assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_SAMPLE_MASK_IN] == + TGSI_SEMANTIC_SAMPLEMASK); + assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_INVOCATION_ID] == + TGSI_SEMANTIC_INVOCATIONID); + t = CALLOC_STRUCT(st_translate); if (!t) { ret = PIPE_ERROR_OUT_OF_MEMORY; @@ -4779,7 +4902,7 @@ st_translate_program( inputSemanticName[i], inputSemanticIndex[i], interpMode[i], 0, - is_centroid[i]); + interpLocation[i]); } if (proginfo->InputsRead & VARYING_BIT_POS) { @@ -4790,7 +4913,7 @@ st_translate_program( } if (proginfo->InputsRead & VARYING_BIT_FACE) - emit_face_var(t); + emit_face_var(ctx, t); /* * Declare output attributes. @@ -4814,6 +4937,15 @@ st_translate_program( TGSI_SEMANTIC_COLOR, outputSemanticIndex[i]); break; + case TGSI_SEMANTIC_SAMPLEMASK: + t->outputs[i] = ureg_DECL_output(ureg, + TGSI_SEMANTIC_SAMPLEMASK, + outputSemanticIndex[i]); + /* TODO: If we ever support more than 32 samples, this will have + * to become an array. + */ + t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X); + break; default: assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR"); ret = PIPE_ERROR_BAD_INPUT; @@ -4861,10 +4993,9 @@ st_translate_program( /* Declare address register. */ if (program->num_address_regs > 0) { - assert(program->num_address_regs <= 2); - t->address[0] = ureg_DECL_address(ureg); - if (program->num_address_regs == 2) - t->address[1] = ureg_DECL_address(ureg); + assert(program->num_address_regs <= 3); + for (int i = 0; i < program->num_address_regs; i++) + t->address[i] = ureg_DECL_address(ureg); } /* Declare misc input registers @@ -4874,7 +5005,7 @@ st_translate_program( unsigned numSys = 0; for (i = 0; sysInputs; i++) { if (sysInputs & (1 << i)) { - unsigned semName = mesa_sysval_to_semantic[i]; + unsigned semName = _mesa_sysval_to_semantic[i]; t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0); if (semName == TGSI_SEMANTIC_INSTANCEID || semName == TGSI_SEMANTIC_VERTEXID) { @@ -4944,11 +5075,17 @@ st_translate_program( } } - if (program->shader_program) { - unsigned num_ubos = program->shader_program->NumUniformBlocks; + if (program->shader) { + unsigned num_ubos = program->shader->NumUniformBlocks; for (i = 0; i < num_ubos; i++) { - ureg_DECL_constant2D(t->ureg, 0, program->shader_program->UniformBlocks[i].UniformBufferSize / 4, i + 1); + unsigned size = program->shader->UniformBlocks[i].UniformBufferSize; + unsigned num_const_vecs = (size + 15) / 16; + unsigned first, last; + assert(num_const_vecs > 0); + first = 0; + last = num_const_vecs > 0 ? num_const_vecs - 1 : 0; + ureg_DECL_constant2D(t->ureg, first, last, i + 1); } } @@ -4961,15 +5098,14 @@ st_translate_program( goto out; } i = 0; - foreach_iter(exec_list_iterator, iter, program->immediates) { - immediate_storage *imm = (immediate_storage *)iter.get(); + foreach_in_list(immediate_storage, imm, &program->immediates) { assert(i < program->num_immediates); t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size); } assert(i == program->num_immediates); /* texture samplers */ - for (i = 0; i < ctx->Const.FragmentProgram.MaxTextureImageUnits; i++) { + for (i = 0; i < ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; i++) { if (program->samplers_used & (1 << i)) { t->samplers[i] = ureg_DECL_sampler(ureg, i); } @@ -4977,10 +5113,9 @@ st_translate_program( /* Emit each instruction in turn: */ - foreach_iter(exec_list_iterator, iter, program->instructions) { + foreach_in_list(glsl_to_tgsi_instruction, inst, &program->instructions) { set_insn_start(t, ureg_get_instruction_number(ureg)); - compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get(), - clamp_color); + compile_tgsi_instruction(t, inst, clamp_color); } /* Fix up all emitted labels: @@ -4995,7 +5130,7 @@ st_translate_program( * prog->ParameterValues to get reallocated (e.g., anything that adds a * program constant) has to happen before creating this linkage. */ - for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { if (program->shader_program->_LinkedShaders[i] == NULL) continue; @@ -5006,6 +5141,7 @@ st_translate_program( out: if (t) { + free(t->temps); free(t->insn); free(t->labels); free(t->constants); @@ -5022,6 +5158,26 @@ out: } /* ----------------------------- End TGSI code ------------------------------ */ + +static unsigned +shader_stage_to_ptarget(gl_shader_stage stage) +{ + switch (stage) { + case MESA_SHADER_VERTEX: + return PIPE_SHADER_VERTEX; + case MESA_SHADER_FRAGMENT: + return PIPE_SHADER_FRAGMENT; + case MESA_SHADER_GEOMETRY: + return PIPE_SHADER_GEOMETRY; + case MESA_SHADER_COMPUTE: + return PIPE_SHADER_COMPUTE; + } + + assert(!"should not be reached"); + return PIPE_SHADER_VERTEX; +} + + /** * Convert a shader's GLSL IR into a Mesa gl_program, although without * generating Mesa IR. @@ -5033,30 +5189,12 @@ get_mesa_program(struct gl_context *ctx, { glsl_to_tgsi_visitor* v; struct gl_program *prog; - GLenum target; + GLenum target = _mesa_shader_stage_to_program(shader->Stage); bool progress; struct gl_shader_compiler_options *options = - &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)]; + &ctx->Const.ShaderCompilerOptions[_mesa_shader_enum_to_shader_stage(shader->Type)]; struct pipe_screen *pscreen = ctx->st->pipe->screen; - unsigned ptarget; - - switch (shader->Type) { - case GL_VERTEX_SHADER: - target = GL_VERTEX_PROGRAM_ARB; - ptarget = PIPE_SHADER_VERTEX; - break; - case GL_FRAGMENT_SHADER: - target = GL_FRAGMENT_PROGRAM_ARB; - ptarget = PIPE_SHADER_FRAGMENT; - break; - case GL_GEOMETRY_SHADER: - target = GL_GEOMETRY_PROGRAM_NV; - ptarget = PIPE_SHADER_GEOMETRY; - break; - default: - assert(!"should not be reached"); - return NULL; - } + unsigned ptarget = shader_stage_to_ptarget(shader->Stage); validate_ir_tree(shader->ir); @@ -5068,6 +5206,7 @@ get_mesa_program(struct gl_context *ctx, v->ctx = ctx; v->prog = prog; v->shader_program = shader_program; + v->shader = shader; v->options = options; v->glsl_version = ctx->Const.GLSLVersion; v->native_integers = ctx->Const.NativeIntegers; @@ -5075,6 +5214,7 @@ get_mesa_program(struct gl_context *ctx, v->have_sqrt = pscreen->get_shader_param(pscreen, ptarget, PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED); + _mesa_copy_linked_program_data(shader->Stage, shader_program, prog); _mesa_generate_parameters_list_for_uniforms(shader_program, shader, prog->Parameters); @@ -5088,9 +5228,7 @@ get_mesa_program(struct gl_context *ctx, do { progress = GL_FALSE; - foreach_iter(exec_list_iterator, iter, v->function_signatures) { - function_entry *entry = (function_entry *)iter.get(); - + foreach_in_list(function_entry, entry, &v->function_signatures) { if (!entry->bgn_inst) { v->current_function = entry; @@ -5130,21 +5268,20 @@ get_mesa_program(struct gl_context *ctx, /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ v->simplify_cmp(); v->copy_propagate(); - while (v->eliminate_dead_code_advanced()); + while (v->eliminate_dead_code()); - v->eliminate_dead_code(); v->merge_registers(); v->renumber_registers(); /* Write the END instruction. */ v->emit(NULL, TGSI_OPCODE_END); - if (ctx->Shader.Flags & GLSL_DUMP) { + if (ctx->_Shader->Flags & GLSL_DUMP) { printf("\n"); printf("GLSL IR for linked %s program %d:\n", - _mesa_glsl_shader_target_name(shader->Type), + _mesa_shader_stage_to_string(shader->Stage), shader_program->Name); - _mesa_print_ir(shader->ir, NULL); + _mesa_print_ir(stdout, shader->ir, NULL); printf("\n"); printf("\n"); fflush(stdout); @@ -5153,7 +5290,7 @@ get_mesa_program(struct gl_context *ctx, prog->Instructions = NULL; prog->NumInstructions = 0; - do_set_program_inouts(shader->ir, prog, shader->Type); + do_set_program_inouts(shader->ir, prog, shader->Stage); count_resources(v, prog); _mesa_reference_program(ctx, &shader->Program, prog); @@ -5183,9 +5320,6 @@ get_mesa_program(struct gl_context *ctx, case GL_GEOMETRY_SHADER: stgp = (struct st_geometry_program *)prog; stgp->glsl_to_tgsi = v; - stgp->Base.InputType = shader_program->Geom.InputType; - stgp->Base.OutputType = shader_program->Geom.OutputType; - stgp->Base.VerticesOut = shader_program->Geom.VerticesOut; break; default: assert(!"should not be reached"); @@ -5197,33 +5331,6 @@ get_mesa_program(struct gl_context *ctx, extern "C" { -struct gl_shader * -st_new_shader(struct gl_context *ctx, GLuint name, GLuint type) -{ - struct gl_shader *shader; - assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER || - type == GL_GEOMETRY_SHADER_ARB); - shader = rzalloc(NULL, struct gl_shader); - if (shader) { - shader->Type = type; - shader->Name = name; - _mesa_init_shader(ctx, shader); - } - return shader; -} - -struct gl_shader_program * -st_new_shader_program(struct gl_context *ctx, GLuint name) -{ - struct gl_shader_program *shProg; - shProg = rzalloc(NULL, struct gl_shader_program); - if (shProg) { - shProg->Name = name; - _mesa_init_shader_program(ctx, shProg); - } - return shProg; -} - /** * Link a shader. * Called via ctx->Driver.LinkShader() @@ -5233,16 +5340,17 @@ st_new_shader_program(struct gl_context *ctx, GLuint name) GLboolean st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) { + struct pipe_screen *pscreen = ctx->st->pipe->screen; assert(prog->LinkStatus); - for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { if (prog->_LinkedShaders[i] == NULL) continue; bool progress; exec_list *ir = prog->_LinkedShaders[i]->ir; const struct gl_shader_compiler_options *options = - &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)]; + &ctx->Const.ShaderCompilerOptions[_mesa_shader_enum_to_shader_stage(prog->_LinkedShaders[i]->Type)]; /* If there are forms of indirect addressing that the driver * cannot handle, perform the lowering pass. @@ -5271,14 +5379,23 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) lower_packing_builtins(ir, lower_inst); } + if (!pscreen->get_param(pscreen, PIPE_CAP_TEXTURE_GATHER_OFFSETS)) + lower_offset_arrays(ir); do_mat_op_to_vec(ir); + /* Emit saturates in the vertex shader only if SM 3.0 is supported. */ + bool vs_sm3 = (_mesa_shader_stage_to_program(prog->_LinkedShaders[i]->Stage) == + GL_VERTEX_PROGRAM_ARB) && st_context(ctx)->has_shader_model3; lower_instructions(ir, MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2 | LOG_TO_LOG2 | + LDEXP_TO_ARITH | + CARRY_TO_ARITH | + BORROW_TO_ARITH | (options->EmitNoPow ? POW_TO_EXP2 : 0) | - (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0)); + (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0) | + (vs_sm3 ? SAT_TO_CLAMP : 0)); lower_ubo_reference(prog->_LinkedShaders[i], ir); do_vec_index_to_cond_assign(ir); @@ -5294,8 +5411,8 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress; - progress = do_common_optimization(ir, true, true, - options->MaxUnrollIterations, options) + progress = do_common_optimization(ir, true, true, options, + ctx->Const.NativeIntegers) || progress; progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress; @@ -5305,7 +5422,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) validate_ir_tree(ir); } - for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { struct gl_program *linked_prog; if (prog->_LinkedShaders[i] == NULL) @@ -5317,7 +5434,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program, linked_prog); if (!ctx->Driver.ProgramStringNotify(ctx, - _mesa_program_index_to_target(i), + _mesa_shader_stage_to_program(i), linked_prog)) { _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program, NULL); @@ -5348,6 +5465,7 @@ st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi, so->output[i].num_components = info->Outputs[i].NumComponents; so->output[i].output_buffer = info->Outputs[i].OutputBuffer; so->output[i].dst_offset = info->Outputs[i].DstOffset; + so->output[i].stream = info->Outputs[i].StreamId; } for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {