X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_fs_visitor.cpp;h=033255ed5d37dd69b82ffe90481f6beee95f97d5;hb=ffe582aa2076bc06f9d06e36287bdded45ab5b98;hp=ce6d3dad1e59b9a8b783274d5c7c8934f5513452;hpb=dd2e5d3999e456c33dbc9a710e211a7bdbe3330f;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index ce6d3dad1e5..033255ed5d3 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -36,7 +36,7 @@ extern "C" { #include "program/prog_parameter.h" #include "program/prog_print.h" #include "program/prog_optimize.h" -#include "program/register_allocate.h" +#include "util/register_allocate.h" #include "program/sampler.h" #include "program/hash_table.h" #include "brw_context.h" @@ -60,7 +60,7 @@ fs_visitor::visit(ir_variable *ir) if (!strcmp(ir->name, "gl_FragCoord")) { reg = emit_fragcoord_interpolation(ir); } else if (!strcmp(ir->name, "gl_FrontFacing")) { - reg = emit_frontfacing_interpolation(ir); + reg = emit_frontfacing_interpolation(); } else { reg = emit_general_interpolation(ir); } @@ -74,9 +74,12 @@ fs_visitor::visit(ir_variable *ir) assert(ir->data.location == FRAG_RESULT_DATA0); assert(ir->data.index == 1); this->dual_src_output = *reg; + this->do_dual_src = true; } else if (ir->data.location == FRAG_RESULT_COLOR) { /* Writing gl_FragColor outputs to all color regions. */ - for (unsigned int i = 0; i < MAX2(c->key.nr_color_regions, 1); i++) { + assert(stage == MESA_SHADER_FRAGMENT); + brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; + for (unsigned int i = 0; i < MAX2(key->nr_color_regions, 1); i++) { this->outputs[i] = *reg; this->output_components[i] = 4; } @@ -96,8 +99,7 @@ fs_visitor::visit(ir_variable *ir) /* General color output. */ for (unsigned int i = 0; i < MAX2(1, ir->type->length); i++) { int output = ir->data.location - FRAG_RESULT_DATA0 + i; - this->outputs[output] = *reg; - this->outputs[output].reg_offset += vector_elements * i; + this->outputs[output] = offset(*reg, vector_elements * i); this->output_components[output] = vector_elements; } } @@ -108,10 +110,10 @@ fs_visitor::visit(ir_variable *ir) * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO * variables, so no need for them to be in variable_ht. * - * Atomic counters take no uniform storage, no need to do - * anything here. + * Some uniforms, such as samplers and atomic counters, have no actual + * storage, so we should ignore them. */ - if (ir->is_in_uniform_block() || ir->type->contains_atomic()) + if (ir->is_in_uniform_block() || type_size(ir->type) == 0) return; if (dispatch_width == 16) { @@ -133,11 +135,14 @@ fs_visitor::visit(ir_variable *ir) } else if (ir->data.mode == ir_var_system_value) { if (ir->data.location == SYSTEM_VALUE_SAMPLE_POS) { - reg = emit_samplepos_setup(ir); + reg = emit_samplepos_setup(); } else if (ir->data.location == SYSTEM_VALUE_SAMPLE_ID) { - reg = emit_sampleid_setup(ir); + reg = emit_sampleid_setup(); } else if (ir->data.location == SYSTEM_VALUE_SAMPLE_MASK_IN) { - reg = emit_samplemaskin_setup(ir); + assert(brw->gen >= 7); + reg = new(mem_ctx) + fs_reg(retype(brw_vec8_grf(payload.sample_mask_in_reg, 0), + BRW_REGISTER_TYPE_D)); } } @@ -151,6 +156,12 @@ void fs_visitor::visit(ir_dereference_variable *ir) { fs_reg *reg = variable_storage(ir->var); + + if (!reg) { + fail("Failed to find variable storage for %s\n", ir->var->name); + this->result = fs_reg(reg_null_d); + return; + } this->result = *reg; } @@ -161,13 +172,13 @@ fs_visitor::visit(ir_dereference_record *ir) ir->record->accept(this); - unsigned int offset = 0; + unsigned int off = 0; for (unsigned int i = 0; i < struct_type->length; i++) { if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) break; - offset += type_size(struct_type->fields.structure[i].type); + off += type_size(struct_type->fields.structure[i].type); } - this->result.reg_offset += offset; + this->result = offset(this->result, off); this->result.type = brw_type_for_base_type(ir->type); } @@ -185,8 +196,8 @@ fs_visitor::visit(ir_dereference_array *ir) src.type = brw_type_for_base_type(ir->type); if (constant_index) { - assert(src.file == UNIFORM || src.file == GRF); - src.reg_offset += constant_index->value.i[0] * element_size; + assert(src.file == UNIFORM || src.file == GRF || src.file == HW_REG); + src = offset(src, constant_index->value.i[0] * element_size); } else { /* Variable index array dereference. We attach the variable index * component to the reg as a pointer to a register containing the @@ -241,7 +252,7 @@ fs_visitor::emit_lrp(const fs_reg &dst, const fs_reg &x, const fs_reg &y, } void -fs_visitor::emit_minmax(uint32_t conditionalmod, const fs_reg &dst, +fs_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, const fs_reg &dst, const fs_reg &src0, const fs_reg &src1) { fs_inst *inst; @@ -257,17 +268,14 @@ fs_visitor::emit_minmax(uint32_t conditionalmod, const fs_reg &dst, } } -/* Instruction selection: Produce a MOV.sat instead of - * MIN(MAX(val, 0), 1) when possible. - */ bool fs_visitor::try_emit_saturate(ir_expression *ir) { - ir_rvalue *sat_val = ir->as_rvalue_to_saturate(); - - if (!sat_val) + if (ir->operation != ir_unop_saturate) return false; + ir_rvalue *sat_val = ir->operands[0]; + fs_inst *pre_inst = (fs_inst *) this->instructions.get_tail(); sat_val->accept(this); @@ -275,25 +283,22 @@ fs_visitor::try_emit_saturate(ir_expression *ir) fs_inst *last_inst = (fs_inst *) this->instructions.get_tail(); - /* If the last instruction from our accept() didn't generate our - * src, generate a saturated MOV + /* If the last instruction from our accept() generated our + * src, just set the saturate flag instead of emmitting a separate mov. */ fs_inst *modify = get_instruction_generating_reg(pre_inst, last_inst, src); - if (!modify || modify->regs_written != 1) { - this->result = fs_reg(this, ir->type); - fs_inst *inst = emit(MOV(this->result, src)); - inst->saturate = true; - } else { + if (modify && modify->regs_written == modify->dst.width / 8 && + modify->can_do_saturate()) { modify->saturate = true; this->result = src; + return true; } - - return true; + return false; } bool -fs_visitor::try_emit_mad(ir_expression *ir, int mul_arg) +fs_visitor::try_emit_mad(ir_expression *ir) { /* 3-src instructions were introduced in gen6. */ if (brw->gen < 6) @@ -303,11 +308,16 @@ fs_visitor::try_emit_mad(ir_expression *ir, int mul_arg) if (ir->type != glsl_type::float_type) return false; - ir_rvalue *nonmul = ir->operands[1 - mul_arg]; - ir_expression *mul = ir->operands[mul_arg]->as_expression(); + ir_rvalue *nonmul = ir->operands[1]; + ir_expression *mul = ir->operands[0]->as_expression(); - if (!mul || mul->operation != ir_binop_mul) - return false; + if (!mul || mul->operation != ir_binop_mul) { + nonmul = ir->operands[0]; + mul = ir->operands[1]->as_expression(); + + if (!mul || mul->operation != ir_binop_mul) + return false; + } if (nonmul->as_constant() || mul->operands[0]->as_constant() || @@ -329,6 +339,135 @@ fs_visitor::try_emit_mad(ir_expression *ir, int mul_arg) return true; } +static int +pack_pixel_offset(float x) +{ + /* Clamp upper end of the range to +7/16. See explanation in non-constant + * offset case below. */ + int n = MIN2((int)(x * 16), 7); + return n & 0xf; +} + +void +fs_visitor::emit_interpolate_expression(ir_expression *ir) +{ + /* in SIMD16 mode, the pixel interpolator returns coords interleaved + * 8 channels at a time, same as the barycentric coords presented in + * the FS payload. this requires a bit of extra work to support. + */ + no16("interpolate_at_* not yet supported in SIMD16 mode."); + + assert(stage == MESA_SHADER_FRAGMENT); + brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; + + ir_dereference * deref = ir->operands[0]->as_dereference(); + ir_swizzle * swiz = NULL; + if (!deref) { + /* the api does not allow a swizzle here, but the varying packing code + * may have pushed one into here. + */ + swiz = ir->operands[0]->as_swizzle(); + assert(swiz); + deref = swiz->val->as_dereference(); + } + assert(deref); + ir_variable * var = deref->variable_referenced(); + assert(var); + + /* 1. collect interpolation factors */ + + fs_reg dst_x = fs_reg(this, glsl_type::get_instance(ir->type->base_type, 2, 1)); + fs_reg dst_y = offset(dst_x, 1); + + /* for most messages, we need one reg of ignored data; the hardware requires mlen==1 + * even when there is no payload. in the per-slot offset case, we'll replace this with + * the proper source data. */ + fs_reg src = fs_reg(this, glsl_type::float_type); + int mlen = 1; /* one reg unless overriden */ + int reg_width = dispatch_width / 8; + fs_inst *inst; + + switch (ir->operation) { + case ir_unop_interpolate_at_centroid: + inst = emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, dst_x, src, fs_reg(0u)); + break; + + case ir_binop_interpolate_at_sample: { + ir_constant *sample_num = ir->operands[1]->as_constant(); + assert(sample_num || !"nonconstant sample number should have been lowered."); + + unsigned msg_data = sample_num->value.i[0] << 4; + inst = emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_x, src, fs_reg(msg_data)); + break; + } + + case ir_binop_interpolate_at_offset: { + ir_constant *const_offset = ir->operands[1]->as_constant(); + if (const_offset) { + unsigned msg_data = pack_pixel_offset(const_offset->value.f[0]) | + (pack_pixel_offset(const_offset->value.f[1]) << 4); + inst = emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_x, src, + fs_reg(msg_data)); + } else { + /* pack the operands: hw wants offsets as 4 bit signed ints */ + ir->operands[1]->accept(this); + src = fs_reg(this, glsl_type::ivec2_type); + fs_reg src2 = src; + for (int i = 0; i < 2; i++) { + fs_reg temp = fs_reg(this, glsl_type::float_type); + emit(MUL(temp, this->result, fs_reg(16.0f))); + emit(MOV(src2, temp)); /* float to int */ + + /* Clamp the upper end of the range to +7/16. ARB_gpu_shader5 requires + * that we support a maximum offset of +0.5, which isn't representable + * in a S0.4 value -- if we didn't clamp it, we'd end up with -8/16, + * which is the opposite of what the shader author wanted. + * + * This is legal due to ARB_gpu_shader5's quantization rules: + * + * "Not all values of may be supported; x and y offsets may + * be rounded to fixed-point values with the number of fraction bits + * given by the implementation-dependent constant + * FRAGMENT_INTERPOLATION_OFFSET_BITS" + */ + + fs_inst *inst = emit(BRW_OPCODE_SEL, src2, src2, fs_reg(7)); + inst->conditional_mod = BRW_CONDITIONAL_L; /* min(src2, 7) */ + + src2 = offset(src2, 1); + this->result = offset(this->result, 1); + } + + mlen = 2 * reg_width; + inst = emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_x, src, + fs_reg(0u)); + } + break; + } + + default: + unreachable("not reached"); + } + + inst->mlen = mlen; + inst->regs_written = 2 * reg_width; /* 2 floats per slot returned */ + inst->pi_noperspective = var->determine_interpolation_mode(key->flat_shade) == + INTERP_QUALIFIER_NOPERSPECTIVE; + + /* 2. emit linterp */ + + fs_reg res(this, ir->type); + this->result = res; + + for (int i = 0; i < ir->type->vector_elements; i++) { + int ch = swiz ? ((*(int *)&swiz->mask) >> 2*i) & 3 : i; + emit(FS_OPCODE_LINTERP, res, + dst_x, dst_y, + fs_reg(interp_reg(var->data.location, ch))); + res = offset(res, 1); + } +} + void fs_visitor::visit(ir_expression *ir) { @@ -340,9 +479,22 @@ fs_visitor::visit(ir_expression *ir) if (try_emit_saturate(ir)) return; - if (ir->operation == ir_binop_add) { - if (try_emit_mad(ir, 0) || try_emit_mad(ir, 1)) - return; + + /* Deal with the real oddball stuff first */ + switch (ir->operation) { + case ir_binop_add: + if (try_emit_mad(ir)) + return; + break; + + case ir_unop_interpolate_at_centroid: + case ir_binop_interpolate_at_offset: + case ir_binop_interpolate_at_sample: + emit_interpolate_expression(ir); + return; + + default: + break; } for (operand = 0; operand < ir->get_num_operands(); operand++) { @@ -371,10 +523,11 @@ fs_visitor::visit(ir_expression *ir) switch (ir->operation) { case ir_unop_logic_not: - /* Note that BRW_OPCODE_NOT is not appropriate here, since it is - * ones complement of the whole register, not just bit 0. - */ - emit(XOR(this->result, op[0], fs_reg(1))); + if (ctx->Const.UniformBooleanTrue != 1) { + emit(NOT(this->result, op[0])); + } else { + emit(XOR(this->result, op[0], fs_reg(1))); + } break; case ir_unop_neg: op[0].negate = !op[0].negate; @@ -427,8 +580,7 @@ fs_visitor::visit(ir_expression *ir) break; case ir_unop_exp: case ir_unop_log: - assert(!"not reached: should be handled by ir_explog_to_explog2"); - break; + unreachable("not reached: should be handled by ir_explog_to_explog2"); case ir_unop_sin: case ir_unop_sin_reduced: emit_math(SHADER_OPCODE_SIN, this->result, op[0]); @@ -439,18 +591,29 @@ fs_visitor::visit(ir_expression *ir) break; case ir_unop_dFdx: - emit(FS_OPCODE_DDX, this->result, op[0]); + emit(FS_OPCODE_DDX, this->result, op[0], fs_reg(BRW_DERIVATIVE_BY_HINT)); + break; + case ir_unop_dFdx_coarse: + emit(FS_OPCODE_DDX, this->result, op[0], fs_reg(BRW_DERIVATIVE_COARSE)); + break; + case ir_unop_dFdx_fine: + emit(FS_OPCODE_DDX, this->result, op[0], fs_reg(BRW_DERIVATIVE_FINE)); break; case ir_unop_dFdy: - emit(FS_OPCODE_DDY, this->result, op[0]); + emit(FS_OPCODE_DDY, this->result, op[0], fs_reg(BRW_DERIVATIVE_BY_HINT)); + break; + case ir_unop_dFdy_coarse: + emit(FS_OPCODE_DDY, this->result, op[0], fs_reg(BRW_DERIVATIVE_COARSE)); + break; + case ir_unop_dFdy_fine: + emit(FS_OPCODE_DDY, this->result, op[0], fs_reg(BRW_DERIVATIVE_FINE)); break; case ir_binop_add: emit(ADD(this->result, op[0], op[1])); break; case ir_binop_sub: - assert(!"not reached: should be handled by ir_sub_to_add_neg"); - break; + unreachable("not reached: should be handled by ir_sub_to_add_neg"); case ir_binop_mul: if (brw->gen < 8 && ir->type->is_integer()) { @@ -458,30 +621,62 @@ fs_visitor::visit(ir_expression *ir) * of one of the operands (src0 on gen6, src1 on gen7). The * MACH accumulates in the contribution of the upper 16 bits * of that operand. - * - * FINISHME: Emit just the MUL if we know an operand is small - * enough. - */ - if (brw->gen >= 7) - no16("SIMD16 explicit accumulator operands unsupported\n"); - - struct brw_reg acc = retype(brw_acc_reg(), this->result.type); - - emit(MUL(acc, op[0], op[1])); - emit(MACH(reg_null_d, op[0], op[1])); - emit(MOV(this->result, fs_reg(acc))); + */ + if (ir->operands[0]->is_uint16_constant()) { + if (brw->gen < 7) + emit(MUL(this->result, op[0], op[1])); + else + emit(MUL(this->result, op[1], op[0])); + } else if (ir->operands[1]->is_uint16_constant()) { + if (brw->gen < 7) + emit(MUL(this->result, op[1], op[0])); + else + emit(MUL(this->result, op[0], op[1])); + } else { + if (brw->gen >= 7) + no16("SIMD16 explicit accumulator operands unsupported\n"); + + struct brw_reg acc = retype(brw_acc_reg(dispatch_width), + this->result.type); + + emit(MUL(acc, op[0], op[1])); + emit(MACH(reg_null_d, op[0], op[1])); + emit(MOV(this->result, fs_reg(acc))); + } } else { emit(MUL(this->result, op[0], op[1])); } break; case ir_binop_imul_high: { - if (brw->gen >= 7) + if (brw->gen == 7) no16("SIMD16 explicit accumulator operands unsupported\n"); - struct brw_reg acc = retype(brw_acc_reg(), this->result.type); + struct brw_reg acc = retype(brw_acc_reg(dispatch_width), + this->result.type); - emit(MUL(acc, op[0], op[1])); + fs_inst *mul = emit(MUL(acc, op[0], op[1])); emit(MACH(this->result, op[0], op[1])); + + /* Until Gen8, integer multiplies read 32-bits from one source, and + * 16-bits from the other, and relying on the MACH instruction to + * generate the high bits of the result. + * + * On Gen8, the multiply instruction does a full 32x32-bit multiply, + * but in order to do a 64x64-bit multiply we have to simulate the + * previous behavior and then use a MACH instruction. + * + * FINISHME: Don't use source modifiers on src1. + */ + if (brw->gen >= 8) { + assert(mul->src[1].type == BRW_REGISTER_TYPE_D || + mul->src[1].type == BRW_REGISTER_TYPE_UD); + if (mul->src[1].type == BRW_REGISTER_TYPE_D) { + mul->src[1].type = BRW_REGISTER_TYPE_W; + } else { + mul->src[1].type = BRW_REGISTER_TYPE_UW; + } + } + break; } case ir_binop_div: @@ -490,20 +685,22 @@ fs_visitor::visit(ir_expression *ir) emit_math(SHADER_OPCODE_INT_QUOTIENT, this->result, op[0], op[1]); break; case ir_binop_carry: { - if (brw->gen >= 7) + if (brw->gen == 7) no16("SIMD16 explicit accumulator operands unsupported\n"); - struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD); + struct brw_reg acc = retype(brw_acc_reg(dispatch_width), + BRW_REGISTER_TYPE_UD); emit(ADDC(reg_null_ud, op[0], op[1])); emit(MOV(this->result, fs_reg(acc))); break; } case ir_binop_borrow: { - if (brw->gen >= 7) + if (brw->gen == 7) no16("SIMD16 explicit accumulator operands unsupported\n"); - struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD); + struct brw_reg acc = retype(brw_acc_reg(dispatch_width), + BRW_REGISTER_TYPE_UD); emit(SUBB(reg_null_ud, op[0], op[1])); emit(MOV(this->result, fs_reg(acc))); @@ -523,8 +720,10 @@ fs_visitor::visit(ir_expression *ir) case ir_binop_all_equal: case ir_binop_nequal: case ir_binop_any_nequal: - resolve_bool_comparison(ir->operands[0], &op[0]); - resolve_bool_comparison(ir->operands[1], &op[1]); + if (ctx->Const.UniformBooleanTrue == 1) { + resolve_bool_comparison(ir->operands[0], &op[0]); + resolve_bool_comparison(ir->operands[1], &op[1]); + } emit(CMP(this->result, op[0], op[1], brw_conditional_for_comparison(ir->operation))); @@ -544,28 +743,22 @@ fs_visitor::visit(ir_expression *ir) case ir_binop_dot: case ir_unop_any: - assert(!"not reached: should be handled by brw_fs_channel_expressions"); - break; + unreachable("not reached: should be handled by brw_fs_channel_expressions"); case ir_unop_noise: - assert(!"not reached: should be handled by lower_noise"); - break; + unreachable("not reached: should be handled by lower_noise"); case ir_quadop_vector: - assert(!"not reached: should be handled by lower_quadop_vector"); - break; + unreachable("not reached: should be handled by lower_quadop_vector"); case ir_binop_vector_extract: - assert(!"not reached: should be handled by lower_vec_index_to_cond_assign()"); - break; + unreachable("not reached: should be handled by lower_vec_index_to_cond_assign()"); case ir_triop_vector_insert: - assert(!"not reached: should be handled by lower_vector_insert()"); - break; + unreachable("not reached: should be handled by lower_vector_insert()"); case ir_binop_ldexp: - assert(!"not reached: should be handled by ldexp_to_arith()"); - break; + unreachable("not reached: should be handled by ldexp_to_arith()"); case ir_unop_sqrt: emit_math(SHADER_OPCODE_SQRT, this->result, op[0]); @@ -601,9 +794,16 @@ fs_visitor::visit(ir_expression *ir) emit(AND(this->result, op[0], fs_reg(1))); break; case ir_unop_b2f: - temp = fs_reg(this, glsl_type::int_type); - emit(AND(temp, op[0], fs_reg(1))); - emit(MOV(this->result, temp)); + if (ctx->Const.UniformBooleanTrue != 1) { + op[0].type = BRW_REGISTER_TYPE_UD; + this->result.type = BRW_REGISTER_TYPE_UD; + emit(AND(this->result, op[0], fs_reg(0x3f800000u))); + this->result.type = BRW_REGISTER_TYPE_F; + } else { + temp = fs_reg(this, glsl_type::int_type); + emit(AND(temp, op[0], fs_reg(1))); + emit(MOV(this->result, temp)); + } break; case ir_unop_f2b: @@ -649,8 +849,7 @@ fs_visitor::visit(ir_expression *ir) case ir_unop_unpack_unorm_4x8: case ir_unop_unpack_half_2x16: case ir_unop_pack_half_2x16: - assert(!"not reached: should be handled by lower_packing_builtins"); - break; + unreachable("not reached: should be handled by lower_packing_builtins"); case ir_unop_unpack_half_2x16_split_x: emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, this->result, op[0]); break; @@ -687,6 +886,10 @@ fs_visitor::visit(ir_expression *ir) case ir_unop_find_lsb: emit(FBL(this->result, op[0])); break; + case ir_unop_saturate: + inst = emit(MOV(this->result, op[0])); + inst->saturate = true; + break; case ir_triop_bitfield_extract: /* Note that the instruction's argument order is reversed from GLSL * and the IR. @@ -700,9 +903,8 @@ fs_visitor::visit(ir_expression *ir) emit(BFI2(this->result, op[0], op[1], op[2])); break; case ir_quadop_bitfield_insert: - assert(!"not reached: should be handled by " + unreachable("not reached: should be handled by " "lower_instructions::bitfield_insert_to_bfm_bfi"); - break; case ir_unop_bit_not: emit(NOT(this->result, op[0])); @@ -734,16 +936,40 @@ fs_visitor::visit(ir_expression *ir) /* This IR node takes a constant uniform block and a constant or * variable byte offset within the block and loads a vector from that. */ - ir_constant *uniform_block = ir->operands[0]->as_constant(); + ir_constant *const_uniform_block = ir->operands[0]->as_constant(); ir_constant *const_offset = ir->operands[1]->as_constant(); - fs_reg surf_index = fs_reg(c->prog_data.base.binding_table.ubo_start + - uniform_block->value.u[0]); + fs_reg surf_index; + + if (const_uniform_block) { + /* The block index is a constant, so just emit the binding table entry + * as an immediate. + */ + surf_index = fs_reg(stage_prog_data->binding_table.ubo_start + + const_uniform_block->value.u[0]); + } else { + /* The block index is not a constant. Evaluate the index expression + * per-channel and add the base UBO index; the generator will select + * a value from any live channel. + */ + surf_index = fs_reg(this, glsl_type::uint_type); + emit(ADD(surf_index, op[0], + fs_reg(stage_prog_data->binding_table.ubo_start))) + ->force_writemask_all = true; + + /* Assume this may touch any UBO. It would be nice to provide + * a tighter bound, but the array information is already lowered away. + */ + brw_mark_surface_used(prog_data, + stage_prog_data->binding_table.ubo_start + + shader_prog->NumUniformBlocks - 1); + } + if (const_offset) { fs_reg packed_consts = fs_reg(this, glsl_type::float_type); packed_consts.type = result.type; fs_reg const_offset_reg = fs_reg(const_offset->value.u[0] & ~15); - emit(new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, + emit(new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, 8, packed_consts, surf_index, const_offset_reg)); for (int i = 0; i < ir->type->vector_elements; i++) { @@ -763,7 +989,7 @@ fs_visitor::visit(ir_expression *ir) emit(MOV(result, packed_consts)); } - result.reg_offset++; + result = offset(result, 1); } } else { /* Turn the byte offset into a dword offset. */ @@ -777,7 +1003,7 @@ fs_visitor::visit(ir_expression *ir) if (ir->type->base_type == GLSL_TYPE_BOOL) emit(CMP(result, result, fs_reg(0), BRW_CONDITIONAL_NZ)); - result.reg_offset++; + result = offset(result, 1); } } @@ -801,6 +1027,12 @@ fs_visitor::visit(ir_expression *ir) inst = emit(BRW_OPCODE_SEL, this->result, op[1], op[2]); inst->predicate = BRW_PREDICATE_NORMAL; break; + + case ir_unop_interpolate_at_centroid: + case ir_binop_interpolate_at_offset: + case ir_binop_interpolate_at_sample: + unreachable("already handled above"); + break; } } @@ -822,8 +1054,8 @@ fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r, inst->predicate = predicated ? BRW_PREDICATE_NORMAL : BRW_PREDICATE_NONE; } - l.reg_offset++; - r.reg_offset++; + l = offset(l, 1); + r = offset(r, 1); } break; case GLSL_TYPE_ARRAY: @@ -847,8 +1079,7 @@ fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r, case GLSL_TYPE_VOID: case GLSL_TYPE_ERROR: case GLSL_TYPE_INTERFACE: - assert(!"not reached"); - break; + unreachable("not reached"); } } @@ -925,9 +1156,9 @@ fs_visitor::visit(ir_assignment *ir) inst = emit(MOV(l, r)); if (ir->condition) inst->predicate = BRW_PREDICATE_NORMAL; - r.reg_offset++; + r = offset(r, 1); } - l.reg_offset++; + l = offset(l, 1); } } else { emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL); @@ -935,8 +1166,11 @@ fs_visitor::visit(ir_assignment *ir) } fs_inst * -fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, - fs_reg shadow_c, fs_reg lod, fs_reg dPdy) +fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst, + fs_reg coordinate, int coord_components, + fs_reg shadow_c, + fs_reg lod, fs_reg dPdy, int grad_components, + uint32_t sampler) { int mlen; int base_mrf = 1; @@ -946,55 +1180,55 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, /* g0 header. */ mlen = 1; - if (ir->shadow_comparitor) { - for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { + if (shadow_c.file != BAD_FILE) { + for (int i = 0; i < coord_components; i++) { emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate)); - coordinate.reg_offset++; + coordinate = offset(coordinate, 1); } /* gen4's SIMD8 sampler always has the slots for u,v,r present. * the unused slots must be zeroed. */ - for (int i = ir->coordinate->type->vector_elements; i < 3; i++) { + for (int i = coord_components; i < 3; i++) { emit(MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f))); } mlen += 3; - if (ir->op == ir_tex) { + if (op == ir_tex) { /* There's no plain shadow compare message, so we use shadow * compare with a bias of 0.0. */ emit(MOV(fs_reg(MRF, base_mrf + mlen), fs_reg(0.0f))); mlen++; - } else if (ir->op == ir_txb || ir->op == ir_txl) { + } else if (op == ir_txb || op == ir_txl) { emit(MOV(fs_reg(MRF, base_mrf + mlen), lod)); mlen++; } else { - assert(!"Should not get here."); + unreachable("Should not get here."); } emit(MOV(fs_reg(MRF, base_mrf + mlen), shadow_c)); mlen++; - } else if (ir->op == ir_tex) { - for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { + } else if (op == ir_tex) { + for (int i = 0; i < coord_components; i++) { emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate)); - coordinate.reg_offset++; + coordinate = offset(coordinate, 1); } /* zero the others. */ - for (int i = ir->coordinate->type->vector_elements; i<3; i++) { + for (int i = coord_components; i<3; i++) { emit(MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f))); } /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ mlen += 3; - } else if (ir->op == ir_txd) { + } else if (op == ir_txd) { fs_reg &dPdx = lod; - for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { + for (int i = 0; i < coord_components; i++) { emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate)); - coordinate.reg_offset++; + coordinate = offset(coordinate, 1); } /* the slots for u and v are always present, but r is optional */ - mlen += MAX2(ir->coordinate->type->vector_elements, 2); + mlen += MAX2(coord_components, 2); /* P = u, v, r * dPdx = dudx, dvdx, drdx @@ -1010,18 +1244,18 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, * dPdx.x dPdx.y dPdx.z dPdy.x dPdy.y dPdy.z * m5 m6 m7 m8 m9 m10 */ - for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) { + for (int i = 0; i < grad_components; i++) { emit(MOV(fs_reg(MRF, base_mrf + mlen), dPdx)); - dPdx.reg_offset++; + dPdx = offset(dPdx, 1); } - mlen += MAX2(ir->lod_info.grad.dPdx->type->vector_elements, 2); + mlen += MAX2(grad_components, 2); - for (int i = 0; i < ir->lod_info.grad.dPdy->type->vector_elements; i++) { + for (int i = 0; i < grad_components; i++) { emit(MOV(fs_reg(MRF, base_mrf + mlen), dPdy)); - dPdy.reg_offset++; + dPdy = offset(dPdy, 1); } - mlen += MAX2(ir->lod_info.grad.dPdy->type->vector_elements, 2); - } else if (ir->op == ir_txs) { + mlen += MAX2(grad_components, 2); + } else if (op == ir_txs) { /* There's no SIMD8 resinfo message on Gen4. Use SIMD16 instead. */ simd16 = true; emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod)); @@ -1031,18 +1265,18 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, * instructions. We'll need to do SIMD16 here. */ simd16 = true; - assert(ir->op == ir_txb || ir->op == ir_txl || ir->op == ir_txf); + assert(op == ir_txb || op == ir_txl || op == ir_txf); - for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { + for (int i = 0; i < coord_components; i++) { emit(MOV(fs_reg(MRF, base_mrf + mlen + i * 2, coordinate.type), coordinate)); - coordinate.reg_offset++; + coordinate = offset(coordinate, 1); } /* Initialize the rest of u/v/r with 0.0. Empirically, this seems to * be necessary for TXF (ld), but seems wise to do for all messages. */ - for (int i = ir->coordinate->type->vector_elements; i < 3; i++) { + for (int i = coord_components; i < 3; i++) { emit(MOV(fs_reg(MRF, base_mrf + mlen + i * 2), fs_reg(0.0f))); } @@ -1062,35 +1296,22 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, * this weirdness around to the expected layout. */ orig_dst = dst; - dst = fs_reg(GRF, virtual_grf_alloc(8), - (brw->is_g4x ? - brw_type_for_base_type(ir->type) : - BRW_REGISTER_TYPE_F)); + dst = fs_reg(GRF, virtual_grf_alloc(8), orig_dst.type); } - fs_inst *inst = NULL; - switch (ir->op) { - case ir_tex: - inst = emit(SHADER_OPCODE_TEX, dst); - break; - case ir_txb: - inst = emit(FS_OPCODE_TXB, dst); - break; - case ir_txl: - inst = emit(SHADER_OPCODE_TXL, dst); - break; - case ir_txd: - inst = emit(SHADER_OPCODE_TXD, dst); - break; - case ir_txs: - inst = emit(SHADER_OPCODE_TXS, dst); - break; - case ir_txf: - inst = emit(SHADER_OPCODE_TXF, dst); - break; + enum opcode opcode; + switch (op) { + case ir_tex: opcode = SHADER_OPCODE_TEX; break; + case ir_txb: opcode = FS_OPCODE_TXB; break; + case ir_txl: opcode = SHADER_OPCODE_TXL; break; + case ir_txd: opcode = SHADER_OPCODE_TXD; break; + case ir_txs: opcode = SHADER_OPCODE_TXS; break; + case ir_txf: opcode = SHADER_OPCODE_TXF; break; default: - fail("unrecognized texture opcode"); + unreachable("not reached"); } + + fs_inst *inst = emit(opcode, dst, reg_undef, fs_reg(sampler)); inst->base_mrf = base_mrf; inst->mlen = mlen; inst->header_present = true; @@ -1099,8 +1320,8 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, if (simd16) { for (int i = 0; i < 4; i++) { emit(MOV(orig_dst, dst)); - orig_dst.reg_offset++; - dst.reg_offset += 2; + orig_dst = offset(orig_dst, 1); + dst = offset(dst, 2); } } @@ -1116,62 +1337,59 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, * surprising in the disassembly. */ fs_inst * -fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, - fs_reg shadow_c, fs_reg lod, fs_reg lod2, - fs_reg sample_index) +fs_visitor::emit_texture_gen5(ir_texture_opcode op, fs_reg dst, + fs_reg coordinate, int vector_elements, + fs_reg shadow_c, + fs_reg lod, fs_reg lod2, int grad_components, + fs_reg sample_index, uint32_t sampler, + bool has_offset) { - int mlen = 0; - int base_mrf = 2; int reg_width = dispatch_width / 8; bool header_present = false; - const int vector_elements = - ir->coordinate ? ir->coordinate->type->vector_elements : 0; - if (ir->offset) { + fs_reg message(MRF, 2, BRW_REGISTER_TYPE_F, dispatch_width); + fs_reg msg_coords = message; + + if (has_offset) { /* The offsets set up by the ir_texture visitor are in the * m1 header, so we can't go headerless. */ header_present = true; - mlen++; - base_mrf--; + message.reg--; } for (int i = 0; i < vector_elements; i++) { - emit(MOV(fs_reg(MRF, base_mrf + mlen + i * reg_width, coordinate.type), - coordinate)); - coordinate.reg_offset++; + emit(MOV(retype(offset(msg_coords, i), coordinate.type), coordinate)); + coordinate = offset(coordinate, 1); } - mlen += vector_elements * reg_width; + fs_reg msg_end = offset(msg_coords, vector_elements); + fs_reg msg_lod = offset(msg_coords, 4); - if (ir->shadow_comparitor) { - mlen = MAX2(mlen, header_present + 4 * reg_width); - - emit(MOV(fs_reg(MRF, base_mrf + mlen), shadow_c)); - mlen += reg_width; + if (shadow_c.file != BAD_FILE) { + fs_reg msg_shadow = msg_lod; + emit(MOV(msg_shadow, shadow_c)); + msg_lod = offset(msg_shadow, 1); + msg_end = msg_lod; } - fs_inst *inst = NULL; - switch (ir->op) { + enum opcode opcode; + switch (op) { case ir_tex: - inst = emit(SHADER_OPCODE_TEX, dst); + opcode = SHADER_OPCODE_TEX; break; case ir_txb: - mlen = MAX2(mlen, header_present + 4 * reg_width); - emit(MOV(fs_reg(MRF, base_mrf + mlen), lod)); - mlen += reg_width; + emit(MOV(msg_lod, lod)); + msg_end = offset(msg_lod, 1); - inst = emit(FS_OPCODE_TXB, dst); + opcode = FS_OPCODE_TXB; break; case ir_txl: - mlen = MAX2(mlen, header_present + 4 * reg_width); - emit(MOV(fs_reg(MRF, base_mrf + mlen), lod)); - mlen += reg_width; + emit(MOV(msg_lod, lod)); + msg_end = offset(msg_lod, 1); - inst = emit(SHADER_OPCODE_TXL, dst); + opcode = SHADER_OPCODE_TXL; break; case ir_txd: { - mlen = MAX2(mlen, header_present + 4 * reg_width); /* skip over 'ai' */ - /** * P = u, v, r * dPdx = dudx, dvdx, drdx @@ -1181,60 +1399,68 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, * - dudx dudy dvdx dvdy drdx drdy * - dPdx.x dPdy.x dPdx.y dPdy.y dPdx.z dPdy.z */ - for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) { - emit(MOV(fs_reg(MRF, base_mrf + mlen), lod)); - lod.reg_offset++; - mlen += reg_width; - - emit(MOV(fs_reg(MRF, base_mrf + mlen), lod2)); - lod2.reg_offset++; - mlen += reg_width; + msg_end = msg_lod; + for (int i = 0; i < grad_components; i++) { + emit(MOV(msg_end, lod)); + lod = offset(lod, 1); + msg_end = offset(msg_end, 1); + + emit(MOV(msg_end, lod2)); + lod2 = offset(lod2, 1); + msg_end = offset(msg_end, 1); } - inst = emit(SHADER_OPCODE_TXD, dst); + opcode = SHADER_OPCODE_TXD; break; } case ir_txs: - emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod)); - mlen += reg_width; - inst = emit(SHADER_OPCODE_TXS, dst); + msg_lod = retype(msg_end, BRW_REGISTER_TYPE_UD); + emit(MOV(msg_lod, lod)); + msg_end = offset(msg_lod, 1); + + opcode = SHADER_OPCODE_TXS; break; case ir_query_levels: - emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), fs_reg(0u))); - mlen += reg_width; - inst = emit(SHADER_OPCODE_TXS, dst); + msg_lod = msg_end; + emit(MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u))); + msg_end = offset(msg_lod, 1); + + opcode = SHADER_OPCODE_TXS; break; case ir_txf: - mlen = header_present + 4 * reg_width; - emit(MOV(fs_reg(MRF, base_mrf + mlen - reg_width, BRW_REGISTER_TYPE_UD), lod)); - inst = emit(SHADER_OPCODE_TXF, dst); + msg_lod = offset(msg_coords, 3); + emit(MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), lod)); + msg_end = offset(msg_lod, 1); + + opcode = SHADER_OPCODE_TXF; break; case ir_txf_ms: - mlen = header_present + 4 * reg_width; - + msg_lod = offset(msg_coords, 3); /* lod */ - emit(MOV(fs_reg(MRF, base_mrf + mlen - reg_width, BRW_REGISTER_TYPE_UD), fs_reg(0))); + emit(MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u))); /* sample index */ - emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), sample_index)); - mlen += reg_width; - inst = emit(SHADER_OPCODE_TXF_CMS, dst); + emit(MOV(retype(offset(msg_lod, 1), BRW_REGISTER_TYPE_UD), sample_index)); + msg_end = offset(msg_lod, 2); + + opcode = SHADER_OPCODE_TXF_CMS; break; case ir_lod: - inst = emit(SHADER_OPCODE_LOD, dst); + opcode = SHADER_OPCODE_LOD; break; case ir_tg4: - inst = emit(SHADER_OPCODE_TG4, dst); + opcode = SHADER_OPCODE_TG4; break; default: - fail("unrecognized texture opcode"); - break; + unreachable("not reached"); } - inst->base_mrf = base_mrf; - inst->mlen = mlen; + + fs_inst *inst = emit(opcode, dst, reg_undef, fs_reg(sampler)); + inst->base_mrf = message.reg; + inst->mlen = msg_end.reg - message.reg; inst->header_present = header_present; - inst->regs_written = 4; + inst->regs_written = 4 * reg_width; - if (mlen > MAX_SAMPLER_MESSAGE_SIZE) { + if (inst->mlen > MAX_SAMPLER_MESSAGE_SIZE) { fail("Message length >" STRINGIFY(MAX_SAMPLER_MESSAGE_SIZE) " disallowed by hardware\n"); } @@ -1242,18 +1468,34 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, return inst; } +static bool +is_high_sampler(struct brw_context *brw, fs_reg sampler) +{ + if (brw->gen < 8 && !brw->is_haswell) + return false; + + return sampler.file != IMM || sampler.fixed_hw_reg.dw1.ud >= 16; +} + fs_inst * -fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, - fs_reg shadow_c, fs_reg lod, fs_reg lod2, - fs_reg sample_index, fs_reg mcs, int sampler) +fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst, + fs_reg coordinate, int coord_components, + fs_reg shadow_c, + fs_reg lod, fs_reg lod2, int grad_components, + fs_reg sample_index, fs_reg mcs, fs_reg sampler, + fs_reg offset_value) { int reg_width = dispatch_width / 8; bool header_present = false; - fs_reg payload = fs_reg(this, glsl_type::float_type); - fs_reg next = payload; + fs_reg *sources = ralloc_array(mem_ctx, fs_reg, MAX_SAMPLER_MESSAGE_SIZE); + for (int i = 0; i < MAX_SAMPLER_MESSAGE_SIZE; i++) { + sources[i] = fs_reg(this, glsl_type::float_type); + } + int length = 0; - if (ir->op == ir_tg4 || (ir->offset && ir->op != ir_txf) || sampler >= 16) { + if (op == ir_tg4 || offset_value.file != BAD_FILE || + is_high_sampler(brw, sampler)) { /* For general texture offsets (no txf workaround), we need a header to * put them in. Note that for SIMD16 we're making space for two actual * hardware registers here, so the emit will have to fix up for this. @@ -1265,29 +1507,31 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, * need to offset the Sampler State Pointer in the header. */ header_present = true; - next.reg_offset++; + sources[0] = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD); + length++; } - if (ir->shadow_comparitor) { - emit(MOV(next, shadow_c)); - next.reg_offset++; + if (shadow_c.file != BAD_FILE) { + emit(MOV(sources[length], shadow_c)); + length++; } - bool has_nonconstant_offset = ir->offset && !ir->offset->as_constant(); + bool has_nonconstant_offset = + offset_value.file != BAD_FILE && offset_value.file != IMM; bool coordinate_done = false; /* Set up the LOD info */ - switch (ir->op) { + switch (op) { case ir_tex: case ir_lod: break; case ir_txb: - emit(MOV(next, lod)); - next.reg_offset++; + emit(MOV(sources[length], lod)); + length++; break; case ir_txl: - emit(MOV(next, lod)); - next.reg_offset++; + emit(MOV(sources[length], lod)); + length++; break; case ir_txd: { no16("Gen7 does not support sample_d/sample_d_c in SIMD16 mode."); @@ -1295,22 +1539,22 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, /* Load dPdx and the coordinate together: * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z */ - for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { - emit(MOV(next, coordinate)); - coordinate.reg_offset++; - next.reg_offset++; + for (int i = 0; i < coord_components; i++) { + emit(MOV(sources[length], coordinate)); + coordinate = offset(coordinate, 1); + length++; /* For cube map array, the coordinate is (u,v,r,ai) but there are * only derivatives for (u, v, r). */ - if (i < ir->lod_info.grad.dPdx->type->vector_elements) { - emit(MOV(next, lod)); - lod.reg_offset++; - next.reg_offset++; - - emit(MOV(next, lod2)); - lod2.reg_offset++; - next.reg_offset++; + if (i < grad_components) { + emit(MOV(sources[length], lod)); + lod = offset(lod, 1); + length++; + + emit(MOV(sources[length], lod2)); + lod2 = offset(lod2, 1); + length++; } } @@ -1318,74 +1562,71 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, break; } case ir_txs: - emit(MOV(retype(next, BRW_REGISTER_TYPE_UD), lod)); - next.reg_offset++; + emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), lod)); + length++; break; case ir_query_levels: - emit(MOV(retype(next, BRW_REGISTER_TYPE_UD), fs_reg(0u))); - next.reg_offset++; + emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), fs_reg(0u))); + length++; break; case ir_txf: /* Unfortunately, the parameters for LD are intermixed: u, lod, v, r. */ - emit(MOV(retype(next, BRW_REGISTER_TYPE_D), coordinate)); - coordinate.reg_offset++; - next.reg_offset++; + emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate)); + coordinate = offset(coordinate, 1); + length++; - emit(MOV(retype(next, BRW_REGISTER_TYPE_D), lod)); - next.reg_offset++; + emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), lod)); + length++; - for (int i = 1; i < ir->coordinate->type->vector_elements; i++) { - emit(MOV(retype(next, BRW_REGISTER_TYPE_D), coordinate)); - coordinate.reg_offset++; - next.reg_offset++; + for (int i = 1; i < coord_components; i++) { + emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate)); + coordinate = offset(coordinate, 1); + length++; } coordinate_done = true; break; case ir_txf_ms: - emit(MOV(retype(next, BRW_REGISTER_TYPE_UD), sample_index)); - next.reg_offset++; + emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), sample_index)); + length++; /* data from the multisample control surface */ - emit(MOV(retype(next, BRW_REGISTER_TYPE_UD), mcs)); - next.reg_offset++; + emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), mcs)); + length++; /* there is no offsetting for this message; just copy in the integer * texture coordinates */ - for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { - emit(MOV(retype(next, BRW_REGISTER_TYPE_D), coordinate)); - coordinate.reg_offset++; - next.reg_offset++; + for (int i = 0; i < coord_components; i++) { + emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate)); + coordinate = offset(coordinate, 1); + length++; } coordinate_done = true; break; case ir_tg4: if (has_nonconstant_offset) { - if (ir->shadow_comparitor) + if (shadow_c.file != BAD_FILE) no16("Gen7 does not support gather4_po_c in SIMD16 mode."); /* More crazy intermixing */ - ir->offset->accept(this); - fs_reg offset_value = this->result; - for (int i = 0; i < 2; i++) { /* u, v */ - emit(MOV(next, coordinate)); - coordinate.reg_offset++; - next.reg_offset++; + emit(MOV(sources[length], coordinate)); + coordinate = offset(coordinate, 1); + length++; } for (int i = 0; i < 2; i++) { /* offu, offv */ - emit(MOV(retype(next, BRW_REGISTER_TYPE_D), offset_value)); - offset_value.reg_offset++; - next.reg_offset++; + emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), offset_value)); + offset_value = offset(offset_value, 1); + length++; } - if (ir->coordinate->type->vector_elements == 3) { /* r if present */ - emit(MOV(next, coordinate)); - coordinate.reg_offset++; - next.reg_offset++; + if (coord_components == 3) { /* r if present */ + emit(MOV(sources[length], coordinate)); + coordinate = offset(coordinate, 1); + length++; } coordinate_done = true; @@ -1394,42 +1635,51 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, } /* Set up the coordinate (except for cases where it was done above) */ - if (ir->coordinate && !coordinate_done) { - for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { - emit(MOV(next, coordinate)); - coordinate.reg_offset++; - next.reg_offset++; + if (!coordinate_done) { + for (int i = 0; i < coord_components; i++) { + emit(MOV(sources[length], coordinate)); + coordinate = offset(coordinate, 1); + length++; } } + int mlen; + if (reg_width == 2) + mlen = length * reg_width - header_present; + else + mlen = length * reg_width; + + fs_reg src_payload = fs_reg(GRF, virtual_grf_alloc(mlen), + BRW_REGISTER_TYPE_F); + emit(LOAD_PAYLOAD(src_payload, sources, length)); + /* Generate the SEND */ - fs_inst *inst = NULL; - switch (ir->op) { - case ir_tex: inst = emit(SHADER_OPCODE_TEX, dst, payload); break; - case ir_txb: inst = emit(FS_OPCODE_TXB, dst, payload); break; - case ir_txl: inst = emit(SHADER_OPCODE_TXL, dst, payload); break; - case ir_txd: inst = emit(SHADER_OPCODE_TXD, dst, payload); break; - case ir_txf: inst = emit(SHADER_OPCODE_TXF, dst, payload); break; - case ir_txf_ms: inst = emit(SHADER_OPCODE_TXF_CMS, dst, payload); break; - case ir_txs: inst = emit(SHADER_OPCODE_TXS, dst, payload); break; - case ir_query_levels: inst = emit(SHADER_OPCODE_TXS, dst, payload); break; - case ir_lod: inst = emit(SHADER_OPCODE_LOD, dst, payload); break; + enum opcode opcode; + switch (op) { + case ir_tex: opcode = SHADER_OPCODE_TEX; break; + case ir_txb: opcode = FS_OPCODE_TXB; break; + case ir_txl: opcode = SHADER_OPCODE_TXL; break; + case ir_txd: opcode = SHADER_OPCODE_TXD; break; + case ir_txf: opcode = SHADER_OPCODE_TXF; break; + case ir_txf_ms: opcode = SHADER_OPCODE_TXF_CMS; break; + case ir_txs: opcode = SHADER_OPCODE_TXS; break; + case ir_query_levels: opcode = SHADER_OPCODE_TXS; break; + case ir_lod: opcode = SHADER_OPCODE_LOD; break; case ir_tg4: if (has_nonconstant_offset) - inst = emit(SHADER_OPCODE_TG4_OFFSET, dst, payload); + opcode = SHADER_OPCODE_TG4_OFFSET; else - inst = emit(SHADER_OPCODE_TG4, dst, payload); + opcode = SHADER_OPCODE_TG4; break; + default: + unreachable("not reached"); } + fs_inst *inst = emit(opcode, dst, src_payload, sampler); inst->base_mrf = -1; - if (reg_width == 2) - inst->mlen = next.reg_offset * reg_width - header_present; - else - inst->mlen = next.reg_offset * reg_width; + inst->mlen = mlen; inst->header_present = header_present; - inst->regs_written = 4; + inst->regs_written = 4 * reg_width; - virtual_grf_sizes[payload.reg] = next.reg_offset; if (inst->mlen > MAX_SAMPLER_MESSAGE_SIZE) { fail("Message length >" STRINGIFY(MAX_SAMPLER_MESSAGE_SIZE) " disallowed by hardware\n"); @@ -1439,12 +1689,16 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, } fs_reg -fs_visitor::rescale_texcoord(ir_texture *ir, fs_reg coordinate, - bool is_rect, int sampler, int texunit) +fs_visitor::rescale_texcoord(fs_reg coordinate, const glsl_type *coord_type, + bool is_rect, uint32_t sampler, int texunit) { fs_inst *inst = NULL; bool needs_gl_clamp = true; fs_reg scale_x, scale_y; + const struct brw_sampler_prog_key_data *tex = + (stage == MESA_SHADER_FRAGMENT) ? + &((brw_wm_prog_key*) this->key)->tex : NULL; + assert(tex); /* The 965 requires the EU to do the normalization of GL rectangle * texture coordinates. We use the program parameter state @@ -1452,8 +1706,8 @@ fs_visitor::rescale_texcoord(ir_texture *ir, fs_reg coordinate, */ if (is_rect && (brw->gen < 6 || - (brw->gen >= 6 && (c->key.tex.gl_clamp_mask[0] & (1 << sampler) || - c->key.tex.gl_clamp_mask[1] & (1 << sampler))))) { + (brw->gen >= 6 && (tex->gl_clamp_mask[0] & (1 << sampler) || + tex->gl_clamp_mask[1] & (1 << sampler))))) { struct gl_program_parameter_list *params = prog->Parameters; int tokens[STATE_LENGTH] = { STATE_INTERNAL, @@ -1468,15 +1722,28 @@ fs_visitor::rescale_texcoord(ir_texture *ir, fs_reg coordinate, return coordinate; } - scale_x = fs_reg(UNIFORM, uniforms); - scale_y = fs_reg(UNIFORM, uniforms + 1); - GLuint index = _mesa_add_state_reference(params, (gl_state_index *)tokens); - stage_prog_data->param[uniforms++] = - &prog->Parameters->ParameterValues[index][0].f; - stage_prog_data->param[uniforms++] = - &prog->Parameters->ParameterValues[index][1].f; + /* Try to find existing copies of the texrect scale uniforms. */ + for (unsigned i = 0; i < uniforms; i++) { + if (stage_prog_data->param[i] == + &prog->Parameters->ParameterValues[index][0]) { + scale_x = fs_reg(UNIFORM, i); + scale_y = fs_reg(UNIFORM, i + 1); + break; + } + } + + /* If we didn't already set them up, do so now. */ + if (scale_x.file == BAD_FILE) { + scale_x = fs_reg(UNIFORM, uniforms); + scale_y = fs_reg(UNIFORM, uniforms + 1); + + stage_prog_data->param[uniforms++] = + &prog->Parameters->ParameterValues[index][0]; + stage_prog_data->param[uniforms++] = + &prog->Parameters->ParameterValues[index][1]; + } } /* The 965 requires the EU to do the normalization of GL rectangle @@ -1484,13 +1751,13 @@ fs_visitor::rescale_texcoord(ir_texture *ir, fs_reg coordinate, * tracking to get the scaling factor. */ if (brw->gen < 6 && is_rect) { - fs_reg dst = fs_reg(this, ir->coordinate->type); + fs_reg dst = fs_reg(this, coord_type); fs_reg src = coordinate; coordinate = dst; emit(MUL(dst, src, scale_x)); - dst.reg_offset++; - src.reg_offset++; + dst = offset(dst, 1); + src = offset(src, 1); emit(MUL(dst, src, scale_y)); } else if (is_rect) { /* On gen6+, the sampler handles the rectangle coordinates @@ -1501,11 +1768,11 @@ fs_visitor::rescale_texcoord(ir_texture *ir, fs_reg coordinate, needs_gl_clamp = false; for (int i = 0; i < 2; i++) { - if (c->key.tex.gl_clamp_mask[i] & (1 << sampler)) { + if (tex->gl_clamp_mask[i] & (1 << sampler)) { fs_reg chan = coordinate; - chan.reg_offset += i; + chan = offset(chan, i); - inst = emit(BRW_OPCODE_SEL, chan, chan, brw_imm_f(0.0)); + inst = emit(BRW_OPCODE_SEL, chan, chan, fs_reg(0.0f)); inst->conditional_mod = BRW_CONDITIONAL_G; /* Our parameter comes in as 1.0/width or 1.0/height, @@ -1524,12 +1791,11 @@ fs_visitor::rescale_texcoord(ir_texture *ir, fs_reg coordinate, } } - if (ir->coordinate && needs_gl_clamp) { - for (unsigned int i = 0; - i < MIN2(ir->coordinate->type->vector_elements, 3); i++) { - if (c->key.tex.gl_clamp_mask[i] & (1 << sampler)) { + if (coord_type && needs_gl_clamp) { + for (unsigned int i = 0; i < MIN2(coord_type->vector_elements, 3); i++) { + if (tex->gl_clamp_mask[i] & (1 << sampler)) { fs_reg chan = coordinate; - chan.reg_offset += i; + chan = offset(chan, i); fs_inst *inst = emit(MOV(chan, chan)); inst->saturate = true; @@ -1541,52 +1807,61 @@ fs_visitor::rescale_texcoord(ir_texture *ir, fs_reg coordinate, /* Sample from the MCS surface attached to this multisample texture. */ fs_reg -fs_visitor::emit_mcs_fetch(ir_texture *ir, fs_reg coordinate, int sampler) +fs_visitor::emit_mcs_fetch(fs_reg coordinate, int components, fs_reg sampler) { int reg_width = dispatch_width / 8; - fs_reg payload = fs_reg(this, glsl_type::float_type); + fs_reg payload = fs_reg(GRF, virtual_grf_alloc(components * reg_width), + BRW_REGISTER_TYPE_F); fs_reg dest = fs_reg(this, glsl_type::uvec4_type); - fs_reg next = payload; + fs_reg *sources = ralloc_array(mem_ctx, fs_reg, components); - /* parameters are: u, v, r, lod; missing parameters are treated as zero */ - for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { - emit(MOV(retype(next, BRW_REGISTER_TYPE_D), coordinate)); - coordinate.reg_offset++; - next.reg_offset++; + /* parameters are: u, v, r; missing parameters are treated as zero */ + for (int i = 0; i < components; i++) { + sources[i] = fs_reg(this, glsl_type::float_type); + emit(MOV(retype(sources[i], BRW_REGISTER_TYPE_D), coordinate)); + coordinate = offset(coordinate, 1); } - fs_inst *inst = emit(SHADER_OPCODE_TXF_MCS, dest, payload); - virtual_grf_sizes[payload.reg] = next.reg_offset; + emit(LOAD_PAYLOAD(payload, sources, components)); + + fs_inst *inst = emit(SHADER_OPCODE_TXF_MCS, dest, payload, sampler); inst->base_mrf = -1; - inst->mlen = next.reg_offset * reg_width; + inst->mlen = components * reg_width; inst->header_present = false; - inst->regs_written = 4 * reg_width; /* we only care about one reg of response, - * but the sampler always writes 4/8 + inst->regs_written = 4 * reg_width; /* we only care about one reg of + * response, but the sampler always + * writes 4/8 */ - inst->sampler = sampler; return dest; } void -fs_visitor::visit(ir_texture *ir) +fs_visitor::emit_texture(ir_texture_opcode op, + const glsl_type *dest_type, + fs_reg coordinate, const struct glsl_type *coord_type, + fs_reg shadow_c, + fs_reg lod, fs_reg lod2, int grad_components, + fs_reg sample_index, + fs_reg offset_value, unsigned offset_components, + fs_reg mcs, + int gather_component, + bool is_cube_array, + bool is_rect, + uint32_t sampler, + fs_reg sampler_reg, int texunit) { + const struct brw_sampler_prog_key_data *tex = + (stage == MESA_SHADER_FRAGMENT) ? + &((brw_wm_prog_key*) this->key)->tex : NULL; + assert(tex); fs_inst *inst = NULL; - int sampler = - _mesa_get_sampler_uniform_value(ir->sampler, shader_prog, prog); - /* FINISHME: We're failing to recompile our programs when the sampler is - * updated. This only matters for the texture rectangle scale parameters - * (pre-gen6, or gen6+ with GL_CLAMP). - */ - int texunit = prog->SamplerUnits[sampler]; - - if (ir->op == ir_tg4) { + if (op == ir_tg4) { /* When tg4 is used with the degenerate ZERO/ONE swizzles, don't bother * emitting anything other than setting up the constant result. */ - ir_constant *chan = ir->lod_info.component->as_constant(); - int swiz = GET_SWZ(c->key.tex.swizzles[sampler], chan->value.i[0]); + int swiz = GET_SWZ(tex->swizzles[sampler], gather_component); if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) { fs_reg res = fs_reg(this, glsl_type::vec4_type); @@ -1594,12 +1869,130 @@ fs_visitor::visit(ir_texture *ir) for (int i=0; i<4; i++) { emit(MOV(res, fs_reg(swiz == SWIZZLE_ZERO ? 0.0f : 1.0f))); - res.reg_offset++; + res = offset(res, 1); } return; } } + if (coordinate.file != BAD_FILE) { + /* FINISHME: Texture coordinate rescaling doesn't work with non-constant + * samplers. This should only be a problem with GL_CLAMP on Gen7. + */ + coordinate = rescale_texcoord(coordinate, coord_type, is_rect, + sampler, texunit); + } + + /* Writemasking doesn't eliminate channels on SIMD8 texture + * samples, so don't worry about them. + */ + fs_reg dst(this, glsl_type::get_instance(dest_type->base_type, 4, 1)); + + int coord_components = coord_type ? coord_type->vector_elements : 0; + + if (brw->gen >= 7) { + inst = emit_texture_gen7(op, dst, coordinate, coord_components, + shadow_c, lod, lod2, grad_components, + sample_index, mcs, sampler_reg, + offset_value); + } else if (brw->gen >= 5) { + inst = emit_texture_gen5(op, dst, coordinate, coord_components, + shadow_c, lod, lod2, grad_components, + sample_index, sampler, + offset_value.file != BAD_FILE); + } else { + inst = emit_texture_gen4(op, dst, coordinate, coord_components, + shadow_c, lod, lod2, grad_components, + sampler); + } + + if (shadow_c.file != BAD_FILE) + inst->shadow_compare = true; + + if (offset_value.file == IMM) + inst->texture_offset = offset_value.fixed_hw_reg.dw1.ud; + + if (op == ir_tg4) { + inst->texture_offset |= + gather_channel(gather_component, sampler) << 16; /* M0.2:16-17 */ + + if (brw->gen == 6) + emit_gen6_gather_wa(tex->gen6_gather_wa[sampler], dst); + } + + /* fixup #layers for cube map arrays */ + if (op == ir_txs && is_cube_array) { + fs_reg depth = offset(dst, 2); + fs_reg fixed_depth = fs_reg(this, glsl_type::int_type); + emit_math(SHADER_OPCODE_INT_QUOTIENT, fixed_depth, depth, fs_reg(6)); + + fs_reg *fixed_payload = ralloc_array(mem_ctx, fs_reg, inst->regs_written); + int components = inst->regs_written / (dst.width / 8); + for (int i = 0; i < components; i++) { + if (i == 2) { + fixed_payload[i] = fixed_depth; + } else { + fixed_payload[i] = offset(dst, i); + } + } + emit(LOAD_PAYLOAD(dst, fixed_payload, components)); + } + + swizzle_result(op, dest_type->vector_elements, dst, sampler); +} + +void +fs_visitor::visit(ir_texture *ir) +{ + const struct brw_sampler_prog_key_data *tex = + (stage == MESA_SHADER_FRAGMENT) ? + &((brw_wm_prog_key*) this->key)->tex : NULL; + assert(tex); + + uint32_t sampler = + _mesa_get_sampler_uniform_value(ir->sampler, shader_prog, prog); + + ir_rvalue *nonconst_sampler_index = + _mesa_get_sampler_array_nonconst_index(ir->sampler); + + /* Handle non-constant sampler array indexing */ + fs_reg sampler_reg; + if (nonconst_sampler_index) { + /* The highest sampler which may be used by this operation is + * the last element of the array. Mark it here, because the generator + * doesn't have enough information to determine the bound. + */ + uint32_t array_size = ir->sampler->as_dereference_array() + ->array->type->array_size(); + + uint32_t max_used = sampler + array_size - 1; + if (ir->op == ir_tg4 && brw->gen < 8) { + max_used += stage_prog_data->binding_table.gather_texture_start; + } else { + max_used += stage_prog_data->binding_table.texture_start; + } + + brw_mark_surface_used(prog_data, max_used); + + /* Emit code to evaluate the actual indexing expression */ + nonconst_sampler_index->accept(this); + fs_reg temp(this, glsl_type::uint_type); + emit(ADD(temp, this->result, fs_reg(sampler))) + ->force_writemask_all = true; + sampler_reg = temp; + } else { + /* Single sampler, or constant array index; the indexing expression + * is just an immediate. + */ + sampler_reg = fs_reg(sampler); + } + + /* FINISHME: We're failing to recompile our programs when the sampler is + * updated. This only matters for the texture rectangle scale parameters + * (pre-gen6, or gen6+ with GL_CLAMP). + */ + int texunit = prog->SamplerUnits[sampler]; + /* Should be lowered by do_lower_texture_projection */ assert(!ir->projector); @@ -1611,13 +2004,11 @@ fs_visitor::visit(ir_texture *ir) * generating these values may involve SEND messages that need the MRFs. */ fs_reg coordinate; + const glsl_type *coord_type = NULL; if (ir->coordinate) { + coord_type = ir->coordinate->type; ir->coordinate->accept(this); - - coordinate = rescale_texcoord(ir, this->result, - ir->sampler->type->sampler_dimensionality == - GLSL_SAMPLER_DIM_RECT, - sampler, texunit); + coordinate = this->result; } fs_reg shadow_comparitor; @@ -1626,7 +2017,27 @@ fs_visitor::visit(ir_texture *ir) shadow_comparitor = this->result; } + fs_reg offset_value; + int offset_components = 0; + if (ir->offset) { + ir_constant *const_offset = ir->offset->as_constant(); + if (const_offset) { + /* Store the header bitfield in an IMM register. This allows us to + * use offset_value.file to distinguish between no offset, a constant + * offset, and a non-constant offset. + */ + offset_value = + fs_reg(brw_texture_offset(ctx, const_offset->value.i, + const_offset->type->vector_elements)); + } else { + ir->offset->accept(this); + offset_value = this->result; + } + offset_components = ir->offset->type->vector_elements; + } + fs_reg lod, lod2, sample_index, mcs; + int grad_components = 0; switch (ir->op) { case ir_tex: case ir_lod: @@ -1643,6 +2054,8 @@ fs_visitor::visit(ir_texture *ir) ir->lod_info.grad.dPdy->accept(this); lod2 = this->result; + + grad_components = ir->lod_info.grad.dPdx->type->vector_elements; break; case ir_txf: case ir_txl: @@ -1654,58 +2067,31 @@ fs_visitor::visit(ir_texture *ir) ir->lod_info.sample_index->accept(this); sample_index = this->result; - if (brw->gen >= 7 && c->key.tex.compressed_multisample_layout_mask & (1<gen >= 7 && tex->compressed_multisample_layout_mask & (1<coordinate->type->vector_elements, + sampler_reg); else mcs = fs_reg(0u); break; default: - assert(!"Unrecognized texture opcode"); + unreachable("Unrecognized texture opcode"); }; - /* Writemasking doesn't eliminate channels on SIMD8 texture - * samples, so don't worry about them. - */ - fs_reg dst = fs_reg(this, glsl_type::get_instance(ir->type->base_type, 4, 1)); - - if (brw->gen >= 7) { - inst = emit_texture_gen7(ir, dst, coordinate, shadow_comparitor, - lod, lod2, sample_index, mcs, sampler); - } else if (brw->gen >= 5) { - inst = emit_texture_gen5(ir, dst, coordinate, shadow_comparitor, - lod, lod2, sample_index); - } else { - inst = emit_texture_gen4(ir, dst, coordinate, shadow_comparitor, - lod, lod2); - } - - if (ir->offset != NULL && ir->op != ir_txf) - inst->texture_offset = brw_texture_offset(ctx, ir->offset->as_constant()); - + int gather_component = 0; if (ir->op == ir_tg4) - inst->texture_offset |= gather_channel(ir, sampler) << 16; // M0.2:16-17 - - inst->sampler = sampler; + gather_component = ir->lod_info.component->as_constant()->value.i[0]; - if (ir->shadow_comparitor) - inst->shadow_compare = true; - - /* fixup #layers for cube map arrays */ - if (ir->op == ir_txs) { - glsl_type const *type = ir->sampler->type; - if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE && - type->sampler_array) { - fs_reg depth = dst; - depth.reg_offset = 2; - emit_math(SHADER_OPCODE_INT_QUOTIENT, depth, depth, fs_reg(6)); - } - } + bool is_rect = + ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT; - if (brw->gen == 6 && ir->op == ir_tg4) { - emit_gen6_gather_wa(c->key.tex.gen6_gather_wa[sampler], dst); - } + bool is_cube_array = + ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE && + ir->sampler->type->sampler_array; - swizzle_result(ir, dst, sampler); + emit_texture(ir->op, ir->type, coordinate, coord_type, shadow_comparitor, + lod, lod2, grad_components, sample_index, offset_value, + offset_components, mcs, gather_component, + is_cube_array, is_rect, sampler, sampler_reg, texunit); } /** @@ -1734,7 +2120,7 @@ fs_visitor::emit_gen6_gather_wa(uint8_t wa, fs_reg dst) emit(ASR(dst, dst, fs_reg(32 - width))); } - dst.reg_offset++; + dst = offset(dst, 1); } } @@ -1742,24 +2128,26 @@ fs_visitor::emit_gen6_gather_wa(uint8_t wa, fs_reg dst) * Set up the gather channel based on the swizzle, for gather4. */ uint32_t -fs_visitor::gather_channel(ir_texture *ir, int sampler) +fs_visitor::gather_channel(int orig_chan, uint32_t sampler) { - ir_constant *chan = ir->lod_info.component->as_constant(); - int swiz = GET_SWZ(c->key.tex.swizzles[sampler], chan->value.i[0]); + const struct brw_sampler_prog_key_data *tex = + (stage == MESA_SHADER_FRAGMENT) ? + &((brw_wm_prog_key*) this->key)->tex : NULL; + assert(tex); + int swiz = GET_SWZ(tex->swizzles[sampler], orig_chan); switch (swiz) { case SWIZZLE_X: return 0; case SWIZZLE_Y: /* gather4 sampler is broken for green channel on RG32F -- * we must ask for blue instead. */ - if (c->key.tex.gather_channel_quirk_mask & (1<gather_channel_quirk_mask & (1<op == ir_query_levels) { + if (op == ir_query_levels) { /* # levels is in .w */ - orig_val.reg_offset += 3; - this->result = orig_val; + this->result = offset(orig_val, 3); return; } @@ -1782,28 +2170,32 @@ fs_visitor::swizzle_result(ir_texture *ir, fs_reg orig_val, int sampler) /* txs,lod don't actually sample the texture, so swizzling the result * makes no sense. */ - if (ir->op == ir_txs || ir->op == ir_lod || ir->op == ir_tg4) + if (op == ir_txs || op == ir_lod || op == ir_tg4) return; - if (ir->type == glsl_type::float_type) { + const struct brw_sampler_prog_key_data *tex = + (stage == MESA_SHADER_FRAGMENT) ? + &((brw_wm_prog_key*) this->key)->tex : NULL; + assert(tex); + + if (dest_components == 1) { /* Ignore DEPTH_TEXTURE_MODE swizzling. */ - assert(ir->sampler->type->sampler_shadow); - } else if (c->key.tex.swizzles[sampler] != SWIZZLE_NOOP) { + } else if (tex->swizzles[sampler] != SWIZZLE_NOOP) { fs_reg swizzled_result = fs_reg(this, glsl_type::vec4_type); + swizzled_result.type = orig_val.type; for (int i = 0; i < 4; i++) { - int swiz = GET_SWZ(c->key.tex.swizzles[sampler], i); + int swiz = GET_SWZ(tex->swizzles[sampler], i); fs_reg l = swizzled_result; - l.reg_offset += i; + l = offset(l, i); if (swiz == SWIZZLE_ZERO) { emit(MOV(l, fs_reg(0.0f))); } else if (swiz == SWIZZLE_ONE) { emit(MOV(l, fs_reg(1.0f))); } else { - fs_reg r = orig_val; - r.reg_offset += GET_SWZ(c->key.tex.swizzles[sampler], i); - emit(MOV(l, r)); + emit(MOV(l, offset(orig_val, + GET_SWZ(tex->swizzles[sampler], i)))); } } this->result = swizzled_result; @@ -1817,7 +2209,7 @@ fs_visitor::visit(ir_swizzle *ir) fs_reg val = this->result; if (ir->type->vector_elements == 1) { - this->result.reg_offset += ir->mask.x; + this->result = offset(this->result, ir->mask.x); return; } @@ -1843,9 +2235,8 @@ fs_visitor::visit(ir_swizzle *ir) break; } - channel.reg_offset += swiz; - emit(MOV(result, channel)); - result.reg_offset++; + emit(MOV(result, offset(channel, swiz))); + result = offset(result, 1); } } @@ -1868,15 +2259,14 @@ fs_visitor::visit(ir_discard *ir) if (brw->gen >= 6) { /* For performance, after a discard, jump to the end of the shader. - * However, many people will do foliage by discarding based on a - * texture's alpha mask, and then continue on to texture with the - * remaining pixels. To avoid trashing the derivatives for those - * texture samples, we'll only jump if all of the pixels in the subspan - * have been discarded. + * Only jump if all relevant channels have been discarded. */ fs_inst *discard_jump = emit(FS_OPCODE_DISCARD_JUMP); discard_jump->flag_subreg = 1; - discard_jump->predicate = BRW_PREDICATE_ALIGN1_ANY4H; + + discard_jump->predicate = (dispatch_width == 8) + ? BRW_PREDICATE_ALIGN1_ANY8H + : BRW_PREDICATE_ALIGN1_ANY16H; discard_jump->predicate_inverse = true; } } @@ -1904,13 +2294,12 @@ fs_visitor::visit(ir_constant *ir) dst_reg.type = src_reg.type; for (unsigned j = 0; j < size; j++) { emit(MOV(dst_reg, src_reg)); - src_reg.reg_offset++; - dst_reg.reg_offset++; + src_reg = offset(src_reg, 1); + dst_reg = offset(dst_reg, 1); } } } else if (ir->type->is_record()) { - foreach_list(node, &ir->components) { - ir_constant *const field = (ir_constant *) node; + foreach_in_list(ir_constant, field, &ir->components) { const unsigned size = type_size(field->type); field->accept(this); @@ -1919,8 +2308,8 @@ fs_visitor::visit(ir_constant *ir) dst_reg.type = src_reg.type; for (unsigned j = 0; j < size; j++) { emit(MOV(dst_reg, src_reg)); - src_reg.reg_offset++; - dst_reg.reg_offset++; + src_reg = offset(src_reg, 1); + dst_reg = offset(dst_reg, 1); } } } else { @@ -1938,12 +2327,14 @@ fs_visitor::visit(ir_constant *ir) emit(MOV(dst_reg, fs_reg(ir->value.i[i]))); break; case GLSL_TYPE_BOOL: - emit(MOV(dst_reg, fs_reg((int)ir->value.b[i]))); + emit(MOV(dst_reg, + fs_reg(ir->value.b[i] != 0 ? ctx->Const.UniformBooleanTrue + : 0))); break; default: - assert(!"Non-float/uint/int/bool constant"); + unreachable("Non-float/uint/int/bool constant"); } - dst_reg.reg_offset++; + dst_reg = offset(dst_reg, 1); } } @@ -1955,74 +2346,123 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir) { ir_expression *expr = ir->as_expression(); - if (expr && - expr->operation != ir_binop_logic_and && - expr->operation != ir_binop_logic_or && - expr->operation != ir_binop_logic_xor) { - fs_reg op[2]; - fs_inst *inst; + if (!expr || expr->operation == ir_binop_ubo_load) { + ir->accept(this); - assert(expr->get_num_operands() <= 2); - for (unsigned int i = 0; i < expr->get_num_operands(); i++) { - assert(expr->operands[i]->type->is_scalar()); + fs_inst *inst = emit(AND(reg_null_d, this->result, fs_reg(1))); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + } - expr->operands[i]->accept(this); - op[i] = this->result; + fs_reg op[3]; + fs_inst *inst; - resolve_ud_negate(&op[i]); - } + assert(expr->get_num_operands() <= 3); + for (unsigned int i = 0; i < expr->get_num_operands(); i++) { + assert(expr->operands[i]->type->is_scalar()); - switch (expr->operation) { - case ir_unop_logic_not: - inst = emit(AND(reg_null_d, op[0], fs_reg(1))); - inst->conditional_mod = BRW_CONDITIONAL_Z; - break; + expr->operands[i]->accept(this); + op[i] = this->result; - case ir_unop_f2b: - if (brw->gen >= 6) { - emit(CMP(reg_null_d, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ)); - } else { - inst = emit(MOV(reg_null_f, op[0])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - } - break; + resolve_ud_negate(&op[i]); + } - case ir_unop_i2b: - if (brw->gen >= 6) { - emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_NZ)); - } else { - inst = emit(MOV(reg_null_d, op[0])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - } - break; + switch (expr->operation) { + case ir_unop_logic_not: + inst = emit(AND(reg_null_d, op[0], fs_reg(1))); + inst->conditional_mod = BRW_CONDITIONAL_Z; + break; - case ir_binop_greater: - case ir_binop_gequal: - case ir_binop_less: - case ir_binop_lequal: - case ir_binop_equal: - case ir_binop_all_equal: - case ir_binop_nequal: - case ir_binop_any_nequal: - resolve_bool_comparison(expr->operands[0], &op[0]); - resolve_bool_comparison(expr->operands[1], &op[1]); + case ir_binop_logic_xor: + if (ctx->Const.UniformBooleanTrue == 1) { + fs_reg dst = fs_reg(this, glsl_type::uint_type); + emit(XOR(dst, op[0], op[1])); + inst = emit(AND(reg_null_d, dst, fs_reg(1))); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + } else { + inst = emit(XOR(reg_null_d, op[0], op[1])); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + } + break; - emit(CMP(reg_null_d, op[0], op[1], - brw_conditional_for_comparison(expr->operation))); - break; + case ir_binop_logic_or: + if (ctx->Const.UniformBooleanTrue == 1) { + fs_reg dst = fs_reg(this, glsl_type::uint_type); + emit(OR(dst, op[0], op[1])); + inst = emit(AND(reg_null_d, dst, fs_reg(1))); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + } else { + inst = emit(OR(reg_null_d, op[0], op[1])); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + } + break; - default: - assert(!"not reached"); - fail("bad cond code\n"); - break; + case ir_binop_logic_and: + if (ctx->Const.UniformBooleanTrue == 1) { + fs_reg dst = fs_reg(this, glsl_type::uint_type); + emit(AND(dst, op[0], op[1])); + inst = emit(AND(reg_null_d, dst, fs_reg(1))); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + } else { + inst = emit(AND(reg_null_d, op[0], op[1])); + inst->conditional_mod = BRW_CONDITIONAL_NZ; } - return; - } + break; - ir->accept(this); + case ir_unop_f2b: + if (brw->gen >= 6) { + emit(CMP(reg_null_d, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ)); + } else { + inst = emit(MOV(reg_null_f, op[0])); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + } + break; - fs_inst *inst = emit(AND(reg_null_d, this->result, fs_reg(1))); - inst->conditional_mod = BRW_CONDITIONAL_NZ; + case ir_unop_i2b: + if (brw->gen >= 6) { + emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_NZ)); + } else { + inst = emit(MOV(reg_null_d, op[0])); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + } + break; + + case ir_binop_greater: + case ir_binop_gequal: + case ir_binop_less: + case ir_binop_lequal: + case ir_binop_equal: + case ir_binop_all_equal: + case ir_binop_nequal: + case ir_binop_any_nequal: + if (ctx->Const.UniformBooleanTrue == 1) { + resolve_bool_comparison(expr->operands[0], &op[0]); + resolve_bool_comparison(expr->operands[1], &op[1]); + } + + emit(CMP(reg_null_d, op[0], op[1], + brw_conditional_for_comparison(expr->operation))); + break; + + case ir_triop_csel: { + /* Expand the boolean condition into the flag register. */ + inst = emit(MOV(reg_null_d, op[0])); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + + /* Select which boolean to return. */ + fs_reg temp(this, expr->operands[1]->type); + inst = emit(SEL(temp, op[1], op[2])); + inst->predicate = BRW_PREDICATE_NORMAL; + + /* Expand the result to a condition code. */ + inst = emit(MOV(reg_null_d, temp)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + break; + } + + default: + unreachable("not reached"); + } } /** @@ -2034,12 +2474,12 @@ fs_visitor::emit_if_gen6(ir_if *ir) { ir_expression *expr = ir->condition->as_expression(); - if (expr) { - fs_reg op[2]; + if (expr && expr->operation != ir_binop_ubo_load) { + fs_reg op[3]; fs_inst *inst; fs_reg temp; - assert(expr->get_num_operands() <= 2); + assert(expr->get_num_operands() <= 3); for (unsigned int i = 0; i < expr->get_num_operands(); i++) { assert(expr->operands[i]->type->is_scalar()); @@ -2049,14 +2489,24 @@ fs_visitor::emit_if_gen6(ir_if *ir) switch (expr->operation) { case ir_unop_logic_not: + emit(IF(op[0], fs_reg(0), BRW_CONDITIONAL_Z)); + return; + case ir_binop_logic_xor: + emit(IF(op[0], op[1], BRW_CONDITIONAL_NZ)); + return; + case ir_binop_logic_or: + temp = fs_reg(this, glsl_type::bool_type); + emit(OR(temp, op[0], op[1])); + emit(IF(temp, fs_reg(0), BRW_CONDITIONAL_NZ)); + return; + case ir_binop_logic_and: - /* For operations on bool arguments, only the low bit of the bool is - * valid, and the others are undefined. Fall back to the condition - * code path. - */ - break; + temp = fs_reg(this, glsl_type::bool_type); + emit(AND(temp, op[0], op[1])); + emit(IF(temp, fs_reg(0), BRW_CONDITIONAL_NZ)); + return; case ir_unop_f2b: inst = emit(BRW_OPCODE_IF, reg_null_f, op[0], fs_reg(0)); @@ -2075,23 +2525,36 @@ fs_visitor::emit_if_gen6(ir_if *ir) case ir_binop_all_equal: case ir_binop_nequal: case ir_binop_any_nequal: - resolve_bool_comparison(expr->operands[0], &op[0]); - resolve_bool_comparison(expr->operands[1], &op[1]); + if (ctx->Const.UniformBooleanTrue == 1) { + resolve_bool_comparison(expr->operands[0], &op[0]); + resolve_bool_comparison(expr->operands[1], &op[1]); + } emit(IF(op[0], op[1], brw_conditional_for_comparison(expr->operation))); return; - default: - assert(!"not reached"); - emit(IF(op[0], fs_reg(0), BRW_CONDITIONAL_NZ)); - fail("bad condition\n"); + + case ir_triop_csel: { + /* Expand the boolean condition into the flag register. */ + fs_inst *inst = emit(MOV(reg_null_d, op[0])); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + + /* Select which boolean to use as the result. */ + fs_reg temp(this, expr->operands[1]->type); + inst = emit(SEL(temp, op[1], op[2])); + inst->predicate = BRW_PREDICATE_NORMAL; + + emit(IF(temp, fs_reg(0), BRW_CONDITIONAL_NZ)); return; } + + default: + unreachable("not reached"); + } } - emit_bool_to_cond_code(ir->condition); - fs_inst *inst = emit(BRW_OPCODE_IF); - inst->predicate = BRW_PREDICATE_NORMAL; + ir->condition->accept(this); + emit(IF(this->result, fs_reg(0), BRW_CONDITIONAL_NZ)); } /** @@ -2150,8 +2613,8 @@ fs_visitor::try_replace_with_sel() /* Remove the matched instructions; we'll emit a SEL to replace them. */ while (!if_inst->next->is_tail_sentinel()) - if_inst->next->remove(); - if_inst->remove(); + if_inst->next->exec_node::remove(); + if_inst->exec_node::remove(); /* Only the last source register can be a constant, so if the MOV in * the "then" clause uses a constant, we need to put it in a temporary. @@ -2199,21 +2662,17 @@ fs_visitor::visit(ir_if *ir) emit(IF(BRW_PREDICATE_NORMAL)); } - foreach_list(node, &ir->then_instructions) { - ir_instruction *ir = (ir_instruction *)node; - this->base_ir = ir; - - ir->accept(this); + foreach_in_list(ir_instruction, ir_, &ir->then_instructions) { + this->base_ir = ir_; + ir_->accept(this); } if (!ir->else_instructions.is_empty()) { emit(BRW_OPCODE_ELSE); - foreach_list(node, &ir->else_instructions) { - ir_instruction *ir = (ir_instruction *)node; - this->base_ir = ir; - - ir->accept(this); + foreach_in_list(ir_instruction, ir_, &ir->else_instructions) { + this->base_ir = ir_; + ir_->accept(this); } } @@ -2232,11 +2691,9 @@ fs_visitor::visit(ir_loop *ir) this->base_ir = NULL; emit(BRW_OPCODE_DO); - foreach_list(node, &ir->body_instructions) { - ir_instruction *ir = (ir_instruction *)node; - - this->base_ir = ir; - ir->accept(this); + foreach_in_list(ir_instruction, ir_, &ir->body_instructions) { + this->base_ir = ir_; + ir_->accept(this); } this->base_ir = NULL; @@ -2262,8 +2719,8 @@ fs_visitor::visit_atomic_counter_intrinsic(ir_call *ir) ir_dereference *deref = static_cast( ir->actual_parameters.get_head()); ir_variable *location = deref->variable_referenced(); - unsigned surf_index = (c->prog_data.base.binding_table.abo_start + - location->data.atomic.buffer_index); + unsigned surf_index = (stage_prog_data->binding_table.abo_start + + location->data.binding); /* Calculate the surface offset */ fs_reg offset(this, glsl_type::uint_type); @@ -2273,10 +2730,10 @@ fs_visitor::visit_atomic_counter_intrinsic(ir_call *ir) deref_array->array_index->accept(this); fs_reg tmp(this, glsl_type::uint_type); - emit(MUL(tmp, this->result, ATOMIC_COUNTER_SIZE)); - emit(ADD(offset, tmp, location->data.atomic.offset)); + emit(MUL(tmp, this->result, fs_reg(ATOMIC_COUNTER_SIZE))); + emit(ADD(offset, tmp, fs_reg(location->data.atomic.offset))); } else { - offset = location->data.atomic.offset; + offset = fs_reg(location->data.atomic.offset); } /* Emit the appropriate machine instruction */ @@ -2307,14 +2764,14 @@ fs_visitor::visit(ir_call *ir) !strcmp("__intrinsic_atomic_predecrement", callee)) { visit_atomic_counter_intrinsic(ir); } else { - assert(!"Unsupported intrinsic."); + unreachable("Unsupported intrinsic."); } } void -fs_visitor::visit(ir_return *ir) +fs_visitor::visit(ir_return *) { - assert(!"FINISHME"); + unreachable("FINISHME"); } void @@ -2327,36 +2784,33 @@ fs_visitor::visit(ir_function *ir) const ir_function_signature *sig; exec_list empty; - sig = ir->matching_signature(NULL, &empty); + sig = ir->matching_signature(NULL, &empty, false); assert(sig); - foreach_list(node, &sig->body) { - ir_instruction *ir = (ir_instruction *)node; - this->base_ir = ir; - - ir->accept(this); + foreach_in_list(ir_instruction, ir_, &sig->body) { + this->base_ir = ir_; + ir_->accept(this); } } } void -fs_visitor::visit(ir_function_signature *ir) +fs_visitor::visit(ir_function_signature *) { - assert(!"not reached"); - (void)ir; + unreachable("not reached"); } void fs_visitor::visit(ir_emit_vertex *) { - assert(!"not reached"); + unreachable("not reached"); } void fs_visitor::visit(ir_end_primitive *) { - assert(!"not reached"); + unreachable("not reached"); } void @@ -2364,85 +2818,105 @@ fs_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, fs_reg dst, fs_reg offset, fs_reg src0, fs_reg src1) { - const unsigned operand_len = dispatch_width / 8; - unsigned mlen = 0; + bool uses_kill = + (stage == MESA_SHADER_FRAGMENT) && + ((brw_wm_prog_data*) this->prog_data)->uses_kill; + int reg_width = dispatch_width / 8; + int length = 0; + + fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 4); + sources[0] = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD); /* Initialize the sample mask in the message header. */ - emit(MOV(brw_uvec_mrf(8, mlen, 0), brw_imm_ud(0))) + emit(MOV(sources[0], fs_reg(0u))) ->force_writemask_all = true; - if (fp->UsesKill) { - emit(MOV(brw_uvec_mrf(1, mlen, 7), brw_flag_reg(0, 1))) + if (uses_kill) { + emit(MOV(component(sources[0], 7), brw_flag_reg(0, 1))) ->force_writemask_all = true; } else { - emit(MOV(brw_uvec_mrf(1, mlen, 7), + emit(MOV(component(sources[0], 7), retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD))) ->force_writemask_all = true; } - - mlen++; + length++; /* Set the atomic operation offset. */ - emit(MOV(brw_uvec_mrf(dispatch_width, mlen, 0), offset)); - mlen += operand_len; + sources[1] = fs_reg(this, glsl_type::uint_type); + emit(MOV(sources[1], offset)); + length++; /* Set the atomic operation arguments. */ if (src0.file != BAD_FILE) { - emit(MOV(brw_uvec_mrf(dispatch_width, mlen, 0), src0)); - mlen += operand_len; + sources[length] = fs_reg(this, glsl_type::uint_type); + emit(MOV(sources[length], src0)); + length++; } if (src1.file != BAD_FILE) { - emit(MOV(brw_uvec_mrf(dispatch_width, mlen, 0), src1)); - mlen += operand_len; + sources[length] = fs_reg(this, glsl_type::uint_type); + emit(MOV(sources[length], src1)); + length++; } + int mlen = 1 + (length - 1) * reg_width; + fs_reg src_payload = fs_reg(GRF, virtual_grf_alloc(mlen), + BRW_REGISTER_TYPE_UD); + emit(LOAD_PAYLOAD(src_payload, sources, length)); + /* Emit the instruction. */ - fs_inst *inst = new(mem_ctx) fs_inst(SHADER_OPCODE_UNTYPED_ATOMIC, dst, - atomic_op, surf_index); - inst->base_mrf = 0; + fs_inst *inst = emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst, src_payload, + fs_reg(atomic_op), fs_reg(surf_index)); inst->mlen = mlen; - emit(inst); } void fs_visitor::emit_untyped_surface_read(unsigned surf_index, fs_reg dst, fs_reg offset) { - const unsigned operand_len = dispatch_width / 8; - unsigned mlen = 0; + bool uses_kill = + (stage == MESA_SHADER_FRAGMENT) && + ((brw_wm_prog_data*) this->prog_data)->uses_kill; + int reg_width = dispatch_width / 8; + + fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 2); + sources[0] = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD); /* Initialize the sample mask in the message header. */ - emit(MOV(brw_uvec_mrf(8, mlen, 0), brw_imm_ud(0))) + emit(MOV(sources[0], fs_reg(0u))) ->force_writemask_all = true; - if (fp->UsesKill) { - emit(MOV(brw_uvec_mrf(1, mlen, 7), brw_flag_reg(0, 1))) + if (uses_kill) { + emit(MOV(component(sources[0], 7), brw_flag_reg(0, 1))) ->force_writemask_all = true; } else { - emit(MOV(brw_uvec_mrf(1, mlen, 7), + emit(MOV(component(sources[0], 7), retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD))) ->force_writemask_all = true; } - mlen++; - /* Set the surface read offset. */ - emit(MOV(brw_uvec_mrf(dispatch_width, mlen, 0), offset)); - mlen += operand_len; + sources[1] = fs_reg(this, glsl_type::uint_type); + emit(MOV(sources[1], offset)); + + int mlen = 1 + reg_width; + fs_reg src_payload = fs_reg(GRF, virtual_grf_alloc(mlen), + BRW_REGISTER_TYPE_UD); + fs_inst *inst = emit(LOAD_PAYLOAD(src_payload, sources, 2)); /* Emit the instruction. */ - fs_inst *inst = new(mem_ctx) - fs_inst(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst, surf_index); - inst->base_mrf = 0; + inst = emit(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst, src_payload, + fs_reg(surf_index)); inst->mlen = mlen; - emit(inst); } fs_inst * fs_visitor::emit(fs_inst *inst) { if (force_uncompressed_stack > 0) + inst->exec_size = 8; + + if (dispatch_width == 16 && inst->exec_size == 8) inst->force_uncompressed = true; inst->annotation = this->current_annotation; @@ -2456,9 +2930,8 @@ fs_visitor::emit(fs_inst *inst) void fs_visitor::emit(exec_list list) { - foreach_list_safe(node, &list) { - fs_inst *inst = (fs_inst *)node; - inst->remove(); + foreach_in_list_safe(fs_inst, inst, &list) { + inst->exec_node::remove(); emit(inst); } } @@ -2489,10 +2962,12 @@ fs_visitor::emit_dummy_fs() struct brw_reg fs_visitor::interp_reg(int location, int channel) { - int regnr = c->prog_data.urb_setup[location] * 2 + channel / 2; + assert(stage == MESA_SHADER_FRAGMENT); + brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data; + int regnr = prog_data->urb_setup[location] * 2 + channel / 2; int stride = (channel & 1) * 4; - assert(c->prog_data.urb_setup[location] != -1); + assert(prog_data->urb_setup[location] != -1); return brw_vec1_grf(regnr, stride); } @@ -2515,8 +2990,7 @@ fs_visitor::emit_interpolation_setup_gen4() this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] = fs_reg(this, glsl_type::vec2_type); this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] = - this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC]; - this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg_offset++; + offset(this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], 1); } else { this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] = fs_reg(this, glsl_type::float_type); @@ -2572,12 +3046,12 @@ fs_visitor::emit_interpolation_setup_gen6() emit(MOV(this->pixel_y, int_pixel_y)); this->current_annotation = "compute pos.w"; - this->pixel_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0)); + this->pixel_w = fs_reg(brw_vec8_grf(payload.source_w_reg, 0)); this->wpos_w = fs_reg(this, glsl_type::float_type); emit_math(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w); for (int i = 0; i < BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT; ++i) { - uint8_t reg = c->barycentric_coord_reg[i]; + uint8_t reg = payload.barycentric_coord_reg[i]; this->delta_x[i] = fs_reg(brw_vec8_grf(reg, 0)); this->delta_y[i] = fs_reg(brw_vec8_grf(reg + 1, 0)); } @@ -2585,19 +3059,24 @@ fs_visitor::emit_interpolation_setup_gen6() this->current_annotation = NULL; } -void -fs_visitor::emit_color_write(int target, int index, int first_color_mrf) +int +fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components) { - int reg_width = dispatch_width / 8; + brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; fs_inst *inst; - fs_reg color = outputs[target]; - fs_reg mrf; - /* If there's no color data to be written, skip it. */ - if (color.file == BAD_FILE) - return; + if (color.file == BAD_FILE) { + return 4 * (dispatch_width / 8); + } - color.reg_offset += index; + uint8_t colors_enabled; + if (components == 0) { + /* We want to write one component to the alpha channel */ + colors_enabled = 0x8; + } else { + /* Enable the first components-many channels */ + colors_enabled = (1 << components) - 1; + } if (dispatch_width == 8 || brw->gen >= 6) { /* SIMD8 write looks like: @@ -2616,10 +3095,20 @@ fs_visitor::emit_color_write(int target, int index, int first_color_mrf) * m + 6: a0 * m + 7: a1 */ - inst = emit(MOV(fs_reg(MRF, first_color_mrf + index * reg_width, - color.type), - color)); - inst->saturate = c->key.clamp_fragment_color; + int len = 0; + for (unsigned i = 0; i < 4; ++i) { + if (colors_enabled & (1 << i)) { + dst[len] = fs_reg(GRF, virtual_grf_alloc(color.width / 8), + color.type, color.width); + inst = emit(MOV(dst[len], offset(color, i))); + inst->saturate = key->clamp_fragment_color; + } else if (color.width == 16) { + /* We need two BAD_FILE slots for a 16-wide color */ + len++; + } + len++; + } + return len; } else { /* pre-gen6 SIMD16 single source DP write looks like: * m + 0: r0 @@ -2631,32 +3120,23 @@ fs_visitor::emit_color_write(int target, int index, int first_color_mrf) * m + 6: b1 * m + 7: a1 */ - if (brw->has_compr4) { - /* By setting the high bit of the MRF register number, we - * indicate that we want COMPR4 mode - instead of doing the - * usual destination + 1 for the second half we get - * destination + 4. - */ - inst = emit(MOV(fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index, - color.type), - color)); - inst->saturate = c->key.clamp_fragment_color; - } else { - push_force_uncompressed(); - inst = emit(MOV(fs_reg(MRF, first_color_mrf + index, color.type), - color)); - inst->saturate = c->key.clamp_fragment_color; - pop_force_uncompressed(); - - inst = emit(MOV(fs_reg(MRF, first_color_mrf + index + 4, color.type), - half(color, 1))); - inst->force_sechalf = true; - inst->saturate = c->key.clamp_fragment_color; + for (unsigned i = 0; i < 4; ++i) { + if (colors_enabled & (1 << i)) { + dst[i] = fs_reg(GRF, virtual_grf_alloc(1), color.type); + inst = emit(MOV(dst[i], half(offset(color, i), 0))); + inst->saturate = key->clamp_fragment_color; + + dst[i + 4] = fs_reg(GRF, virtual_grf_alloc(1), color.type); + inst = emit(MOV(dst[i + 4], half(offset(color, i), 1))); + inst->saturate = key->clamp_fragment_color; + inst->force_sechalf = true; + } } + return 8; } } -static int +static enum brw_conditional_mod cond_for_alpha_func(GLenum func) { switch(func) { @@ -2673,8 +3153,7 @@ cond_for_alpha_func(GLenum func) case GL_NOTEQUAL: return BRW_CONDITIONAL_NEQ; default: - assert(!"Not reached"); - return 0; + unreachable("Not reached"); } } @@ -2685,13 +3164,15 @@ cond_for_alpha_func(GLenum func) void fs_visitor::emit_alpha_test() { + assert(stage == MESA_SHADER_FRAGMENT); + brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; this->current_annotation = "Alpha test"; fs_inst *cmp; - if (c->key.alpha_test_func == GL_ALWAYS) + if (key->alpha_test_func == GL_ALWAYS) return; - if (c->key.alpha_test_func == GL_NEVER) { + if (key->alpha_test_func == GL_NEVER) { /* f0.1 = 0 */ fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); @@ -2699,36 +3180,33 @@ fs_visitor::emit_alpha_test() BRW_CONDITIONAL_NEQ)); } else { /* RT0 alpha */ - fs_reg color = outputs[0]; - color.reg_offset += 3; + fs_reg color = offset(outputs[0], 3); /* f0.1 &= func(color, ref) */ - cmp = emit(CMP(reg_null_f, color, fs_reg(c->key.alpha_test_ref), - cond_for_alpha_func(c->key.alpha_test_func))); + cmp = emit(CMP(reg_null_f, color, fs_reg(key->alpha_test_ref), + cond_for_alpha_func(key->alpha_test_func))); } cmp->predicate = BRW_PREDICATE_NORMAL; cmp->flag_subreg = 1; } -void -fs_visitor::emit_fb_writes() +fs_inst * +fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, + fs_reg src0_alpha, unsigned components) { + assert(stage == MESA_SHADER_FRAGMENT); + brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data; + brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; + this->current_annotation = "FB write header"; bool header_present = true; + int reg_size = dispatch_width / 8; + /* We can potentially have a message length of up to 15, so we have to set * base_mrf to either 0 or 1 in order to fit in m0..m15. */ - int base_mrf = 1; - int nr = base_mrf; - int reg_width = dispatch_width / 8; - bool do_dual_src = this->dual_src_output.file != BAD_FILE; - bool src0_alpha_to_render_target = false; - - if (do_dual_src) { - no16("GL_ARB_blend_func_extended not yet supported in SIMD16."); - if (dispatch_width == 16) - do_dual_src = false; - } + fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 15); + int length = 0; /* From the Sandy Bridge PRM, volume 4, page 198: * @@ -2738,46 +3216,59 @@ fs_visitor::emit_fb_writes() * thread message and on all dual-source messages." */ if (brw->gen >= 6 && - (brw->is_haswell || brw->gen >= 8 || !this->fp->UsesKill) && - !do_dual_src && - c->key.nr_color_regions == 1) { + (brw->is_haswell || brw->gen >= 8 || !prog_data->uses_kill) && + color1.file == BAD_FILE && + key->nr_color_regions == 1) { header_present = false; } - if (header_present) { - src0_alpha_to_render_target = brw->gen >= 6 && - !do_dual_src && - c->key.replicate_alpha; - /* m2, m3 header */ - nr += 2; - } + if (header_present) + /* Allocate 2 registers for a header */ + length += 2; - if (c->aa_dest_stencil_reg) { - push_force_uncompressed(); - emit(MOV(fs_reg(MRF, nr++), - fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0)))); - pop_force_uncompressed(); + if (payload.aa_dest_stencil_reg) { + sources[length] = fs_reg(GRF, virtual_grf_alloc(1)); + emit(MOV(sources[length], + fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg, 0)))); + length++; } - c->prog_data.uses_omask = - fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK); - if(c->prog_data.uses_omask) { + prog_data->uses_omask = + prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK); + if (prog_data->uses_omask) { this->current_annotation = "FB write oMask"; assert(this->sample_mask.file != BAD_FILE); - /* Hand over gl_SampleMask. Only lower 16 bits are relevant. */ - emit(FS_OPCODE_SET_OMASK, fs_reg(MRF, nr, BRW_REGISTER_TYPE_UW), this->sample_mask); - nr += 1; + /* Hand over gl_SampleMask. Only lower 16 bits are relevant. Since + * it's unsinged single words, one vgrf is always 16-wide. + */ + sources[length] = fs_reg(GRF, virtual_grf_alloc(1), + BRW_REGISTER_TYPE_UW, 16); + emit(FS_OPCODE_SET_OMASK, sources[length], this->sample_mask); + length++; } - /* Reserve space for color. It'll be filled in per MRT below. */ - int color_mrf = nr; - nr += 4 * reg_width; - if (do_dual_src) - nr += 4; - if (src0_alpha_to_render_target) - nr += reg_width; + if (color0.file == BAD_FILE) { + /* Even if there's no color buffers enabled, we still need to send + * alpha out the pipeline to our null renderbuffer to support + * alpha-testing, alpha-to-coverage, and so on. + */ + length += setup_color_payload(sources + length, this->outputs[0], 0); + } else if (color1.file == BAD_FILE) { + if (src0_alpha.file != BAD_FILE) { + sources[length] = fs_reg(GRF, virtual_grf_alloc(reg_size), + src0_alpha.type, src0_alpha.width); + fs_inst *inst = emit(MOV(sources[length], src0_alpha)); + inst->saturate = key->clamp_fragment_color; + length++; + } - if (c->source_depth_to_render_target) { + length += setup_color_payload(sources + length, color0, components); + } else { + length += setup_color_payload(sources + length, color0, components); + length += setup_color_payload(sources + length, color1, components); + } + + if (source_depth_to_render_target) { if (brw->gen == 6) { /* For outputting oDepth on gen6, SIMD8 writes have to be * used. This would require SIMD8 moves of each half to @@ -2787,130 +3278,108 @@ fs_visitor::emit_fb_writes() no16("Missing support for simd16 depth writes on gen6\n"); } + sources[length] = fs_reg(this, glsl_type::float_type); if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { /* Hand over gl_FragDepth. */ assert(this->frag_depth.file != BAD_FILE); - emit(MOV(fs_reg(MRF, nr), this->frag_depth)); + emit(MOV(sources[length], this->frag_depth)); } else { /* Pass through the payload depth. */ - emit(MOV(fs_reg(MRF, nr), - fs_reg(brw_vec8_grf(c->source_depth_reg, 0)))); + emit(MOV(sources[length], + fs_reg(brw_vec8_grf(payload.source_depth_reg, 0)))); } - nr += reg_width; + length++; } - if (c->dest_depth_reg) { - emit(MOV(fs_reg(MRF, nr), - fs_reg(brw_vec8_grf(c->dest_depth_reg, 0)))); - nr += reg_width; + if (payload.dest_depth_reg) { + sources[length] = fs_reg(this, glsl_type::float_type); + emit(MOV(sources[length], + fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0)))); + length++; } - if (do_dual_src) { - fs_reg src0 = this->outputs[0]; - fs_reg src1 = this->dual_src_output; + fs_inst *load; + fs_inst *write; + if (brw->gen >= 7) { + /* Send from the GRF */ + fs_reg payload = fs_reg(GRF, -1, BRW_REGISTER_TYPE_F); + load = emit(LOAD_PAYLOAD(payload, sources, length)); + payload.reg = virtual_grf_alloc(load->regs_written); + load->dst = payload; + write = emit(FS_OPCODE_FB_WRITE, reg_undef, payload); + write->base_mrf = -1; + } else { + /* Send from the MRF */ + load = emit(LOAD_PAYLOAD(fs_reg(MRF, 1, BRW_REGISTER_TYPE_F), + sources, length)); + write = emit(FS_OPCODE_FB_WRITE); + write->base_mrf = 1; + } - this->current_annotation = ralloc_asprintf(this->mem_ctx, - "FB write src0"); - for (int i = 0; i < 4; i++) { - fs_inst *inst = emit(MOV(fs_reg(MRF, color_mrf + i, src0.type), src0)); - src0.reg_offset++; - inst->saturate = c->key.clamp_fragment_color; - } + write->mlen = load->regs_written; + write->header_present = header_present; + if ((brw->gen >= 8 || brw->is_haswell) && prog_data->uses_kill) { + write->predicate = BRW_PREDICATE_NORMAL; + write->flag_subreg = 1; + } + return write; +} - this->current_annotation = ralloc_asprintf(this->mem_ctx, - "FB write src1"); - for (int i = 0; i < 4; i++) { - fs_inst *inst = emit(MOV(fs_reg(MRF, color_mrf + 4 + i, src1.type), - src1)); - src1.reg_offset++; - inst->saturate = c->key.clamp_fragment_color; - } +void +fs_visitor::emit_fb_writes() +{ + assert(stage == MESA_SHADER_FRAGMENT); + brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data; + brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; + if (do_dual_src) { + no16("GL_ARB_blend_func_extended not yet supported in SIMD16."); + if (dispatch_width == 16) + do_dual_src = false; + } + + fs_inst *inst; + if (do_dual_src) { if (INTEL_DEBUG & DEBUG_SHADER_TIME) emit_shader_time_end(); - fs_inst *inst = emit(FS_OPCODE_FB_WRITE); - inst->target = 0; - inst->base_mrf = base_mrf; - inst->mlen = nr - base_mrf; - inst->eot = true; - inst->header_present = header_present; - if ((brw->gen >= 8 || brw->is_haswell) && fp->UsesKill) { - inst->predicate = BRW_PREDICATE_NORMAL; - inst->flag_subreg = 1; - } - - c->prog_data.dual_src_blend = true; - this->current_annotation = NULL; - return; - } - - for (int target = 0; target < c->key.nr_color_regions; target++) { this->current_annotation = ralloc_asprintf(this->mem_ctx, - "FB write target %d", - target); - /* If src0_alpha_to_render_target is true, include source zero alpha - * data in RenderTargetWrite message for targets > 0. - */ - int write_color_mrf = color_mrf; - if (src0_alpha_to_render_target && target != 0) { - fs_inst *inst; - fs_reg color = outputs[0]; - color.reg_offset += 3; - - inst = emit(MOV(fs_reg(MRF, write_color_mrf, color.type), - color)); - inst->saturate = c->key.clamp_fragment_color; - write_color_mrf = color_mrf + reg_width; - } - - for (unsigned i = 0; i < this->output_components[target]; i++) - emit_color_write(target, i, write_color_mrf); - - bool eot = false; - if (target == c->key.nr_color_regions - 1) { - eot = true; - - if (INTEL_DEBUG & DEBUG_SHADER_TIME) + "FB dual-source write"); + inst = emit_single_fb_write(this->outputs[0], this->dual_src_output, + reg_undef, 4); + inst->target = 0; + prog_data->dual_src_blend = true; + } else if (key->nr_color_regions > 0) { + for (int target = 0; target < key->nr_color_regions; target++) { + this->current_annotation = ralloc_asprintf(this->mem_ctx, + "FB write target %d", + target); + fs_reg src0_alpha; + if (brw->gen >= 6 && key->replicate_alpha && target != 0) + src0_alpha = offset(outputs[0], 3); + + if (target == key->nr_color_regions - 1 && + (INTEL_DEBUG & DEBUG_SHADER_TIME)) emit_shader_time_end(); - } - fs_inst *inst = emit(FS_OPCODE_FB_WRITE); - inst->target = target; - inst->base_mrf = base_mrf; - if (src0_alpha_to_render_target && target == 0) - inst->mlen = nr - base_mrf - reg_width; - else - inst->mlen = nr - base_mrf; - inst->eot = eot; - inst->header_present = header_present; - if ((brw->gen >= 8 || brw->is_haswell) && fp->UsesKill) { - inst->predicate = BRW_PREDICATE_NORMAL; - inst->flag_subreg = 1; + inst = emit_single_fb_write(this->outputs[target], reg_undef, + src0_alpha, + this->output_components[target]); + inst->target = target; } - } + } else { + if (INTEL_DEBUG & DEBUG_SHADER_TIME) + emit_shader_time_end(); - if (c->key.nr_color_regions == 0) { /* Even if there's no color buffers enabled, we still need to send * alpha out the pipeline to our null renderbuffer to support * alpha-testing, alpha-to-coverage, and so on. */ - emit_color_write(0, 3, color_mrf); - - if (INTEL_DEBUG & DEBUG_SHADER_TIME) - emit_shader_time_end(); - - fs_inst *inst = emit(FS_OPCODE_FB_WRITE); - inst->base_mrf = base_mrf; - inst->mlen = nr - base_mrf; - inst->eot = true; - inst->header_present = header_present; - if ((brw->gen >= 8 || brw->is_haswell) && fp->UsesKill) { - inst->predicate = BRW_PREDICATE_NORMAL; - inst->flag_subreg = 1; - } + inst = emit_single_fb_write(reg_undef, reg_undef, reg_undef, 0); + inst->target = 0; } + inst->eot = true; this->current_annotation = NULL; } @@ -2929,6 +3398,8 @@ fs_visitor::resolve_ud_negate(fs_reg *reg) void fs_visitor::resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg) { + assert(ctx->Const.UniformBooleanTrue == 1); + if (rvalue->type != glsl_type::bool_type) return; @@ -2938,17 +3409,27 @@ fs_visitor::resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg) } fs_visitor::fs_visitor(struct brw_context *brw, - struct brw_wm_compile *c, + void *mem_ctx, + const struct brw_wm_prog_key *key, + struct brw_wm_prog_data *prog_data, struct gl_shader_program *shader_prog, struct gl_fragment_program *fp, unsigned dispatch_width) - : backend_visitor(brw, shader_prog, &fp->Base, &c->prog_data.base, + : backend_visitor(brw, shader_prog, &fp->Base, &prog_data->base, MESA_SHADER_FRAGMENT), + reg_null_f(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_F)), + reg_null_d(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_D)), + reg_null_ud(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_UD)), + key(key), prog_data(&prog_data->base), dispatch_width(dispatch_width) { - this->c = c; - this->fp = fp; - this->mem_ctx = ralloc_context(NULL); + this->mem_ctx = mem_ctx; + init(); +} + +void +fs_visitor::init() +{ this->failed = false; this->simd16_unsupported = false; this->no16_msg = NULL; @@ -2956,8 +3437,11 @@ fs_visitor::fs_visitor(struct brw_context *brw, hash_table_pointer_hash, hash_table_pointer_compare); + memset(&this->payload, 0, sizeof(this->payload)); memset(this->outputs, 0, sizeof(this->outputs)); memset(this->output_components, 0, sizeof(this->output_components)); + this->source_depth_to_render_target = false; + this->runtime_check_aads_emit = false; this->first_non_payload_grf = 0; this->max_grf = brw->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF; @@ -2973,12 +3457,14 @@ fs_visitor::fs_visitor(struct brw_context *brw, this->regs_live_at_ip = NULL; this->uniforms = 0; + this->last_scratch = 0; this->pull_constant_loc = NULL; this->push_constant_loc = NULL; this->force_uncompressed_stack = 0; this->spilled_any_registers = false; + this->do_dual_src = false; if (dispatch_width == 8) this->param_size = rzalloc_array(mem_ctx, int, stage_prog_data->nr_params); @@ -2986,6 +3472,5 @@ fs_visitor::fs_visitor(struct brw_context *brw, fs_visitor::~fs_visitor() { - ralloc_free(this->mem_ctx); hash_table_dtor(this->variable_ht); }