X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_vec4_visitor.cpp;h=473b3c53336374acb51bea4869c30564c736f4e7;hb=7865026c04f6cc36dc81f993bc32ddda2806ecb5;hp=15a6cbdd256aaad3c78b7151814e77a108bb1a9b;hpb=51000c2ff8a6b59b6dab51cbd63ef87ac6f2a317;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 15a6cbdd256..473b3c53336 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -30,8 +30,9 @@ extern "C" { namespace brw { vec4_instruction::vec4_instruction(vec4_visitor *v, - enum opcode opcode, dst_reg dst, - src_reg src0, src_reg src1, src_reg src2) + enum opcode opcode, const dst_reg &dst, + const src_reg &src0, const src_reg &src1, + const src_reg &src2) { this->opcode = opcode; this->dst = dst; @@ -42,8 +43,8 @@ vec4_instruction::vec4_instruction(vec4_visitor *v, this->force_writemask_all = false; this->no_dd_clear = false; this->no_dd_check = false; + this->writes_accumulator = false; this->conditional_mod = BRW_CONDITIONAL_NONE; - this->sampler = 0; this->texture_offset = 0; this->target = 0; this->shadow_compare = false; @@ -110,7 +111,7 @@ vec4_visitor::emit(enum opcode opcode) #define ALU1(op) \ vec4_instruction * \ - vec4_visitor::op(dst_reg dst, src_reg src0) \ + vec4_visitor::op(const dst_reg &dst, const src_reg &src0) \ { \ return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \ src0); \ @@ -118,16 +119,30 @@ vec4_visitor::emit(enum opcode opcode) #define ALU2(op) \ vec4_instruction * \ - vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1) \ + vec4_visitor::op(const dst_reg &dst, const src_reg &src0, \ + const src_reg &src1) \ { \ return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \ src0, src1); \ } +#define ALU2_ACC(op) \ + vec4_instruction * \ + vec4_visitor::op(const dst_reg &dst, const src_reg &src0, \ + const src_reg &src1) \ + { \ + vec4_instruction *inst = new(mem_ctx) vec4_instruction(this, \ + BRW_OPCODE_##op, dst, src0, src1); \ + inst->writes_accumulator = true; \ + return inst; \ + } + #define ALU3(op) \ vec4_instruction * \ - vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1, src_reg src2)\ + vec4_visitor::op(const dst_reg &dst, const src_reg &src0, \ + const src_reg &src1, const src_reg &src2) \ { \ + assert(brw->gen >= 6); \ return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \ src0, src1, src2); \ } @@ -142,7 +157,7 @@ ALU1(F32TO16) ALU1(F16TO32) ALU2(ADD) ALU2(MUL) -ALU2(MACH) +ALU2_ACC(MACH) ALU2(AND) ALU2(OR) ALU2(XOR) @@ -161,12 +176,13 @@ ALU1(FBH) ALU1(FBL) ALU1(CBIT) ALU3(MAD) -ALU2(ADDC) -ALU2(SUBB) +ALU2_ACC(ADDC) +ALU2_ACC(SUBB) +ALU2(MAC) /** Gen4 predicated IF. */ vec4_instruction * -vec4_visitor::IF(uint32_t predicate) +vec4_visitor::IF(enum brw_predicate predicate) { vec4_instruction *inst; @@ -178,7 +194,8 @@ vec4_visitor::IF(uint32_t predicate) /** Gen6 IF with embedded comparison. */ vec4_instruction * -vec4_visitor::IF(src_reg src0, src_reg src1, uint32_t condition) +vec4_visitor::IF(src_reg src0, src_reg src1, + enum brw_conditional_mod condition) { assert(brw->gen == 6); @@ -200,7 +217,8 @@ vec4_visitor::IF(src_reg src0, src_reg src1, uint32_t condition) * the flag register with the packed 16 bits of the result. */ vec4_instruction * -vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1, uint32_t condition) +vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1, + enum brw_conditional_mod condition) { vec4_instruction *inst; @@ -224,7 +242,7 @@ vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1, uint32_t condition) } vec4_instruction * -vec4_visitor::SCRATCH_READ(dst_reg dst, src_reg index) +vec4_visitor::SCRATCH_READ(const dst_reg &dst, const src_reg &index) { vec4_instruction *inst; @@ -237,7 +255,8 @@ vec4_visitor::SCRATCH_READ(dst_reg dst, src_reg index) } vec4_instruction * -vec4_visitor::SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index) +vec4_visitor::SCRATCH_WRITE(const dst_reg &dst, const src_reg &src, + const src_reg &index) { vec4_instruction *inst; @@ -276,6 +295,9 @@ vec4_visitor::fix_3src_operand(src_reg src) if (src.file != UNIFORM && src.file != IMM) return src; + if (src.file == UNIFORM && brw_is_single_value_swizzle(src.swizzle)) + return src; + dst_reg expanded = dst_reg(this, glsl_type::vec4_type); expanded.type = src.type; emit(MOV(expanded, src)); @@ -310,7 +332,7 @@ vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src) { src = fix_math_operand(src); - if (dst.writemask != WRITEMASK_XYZW) { + if (brw->gen == 6 && dst.writemask != WRITEMASK_XYZW) { /* The gen6 math instruction must be align1, so we can't do * writemasks. */ @@ -345,14 +367,15 @@ vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src) case SHADER_OPCODE_COS: break; default: - assert(!"not reached: bad math opcode"); - return; + unreachable("not reached: bad math opcode"); } - if (brw->gen >= 6) { - return emit_math1_gen6(opcode, dst, src); + if (brw->gen >= 8) { + emit(opcode, dst, src); + } else if (brw->gen >= 6) { + emit_math1_gen6(opcode, dst, src); } else { - return emit_math1_gen4(opcode, dst, src); + emit_math1_gen4(opcode, dst, src); } } @@ -363,7 +386,7 @@ vec4_visitor::emit_math2_gen6(enum opcode opcode, src0 = fix_math_operand(src0); src1 = fix_math_operand(src1); - if (dst.writemask != WRITEMASK_XYZW) { + if (brw->gen == 6 && dst.writemask != WRITEMASK_XYZW) { /* The gen6 math instruction must be align1, so we can't do * writemasks. */ @@ -397,22 +420,24 @@ vec4_visitor::emit_math(enum opcode opcode, case SHADER_OPCODE_INT_REMAINDER: break; default: - assert(!"not reached: unsupported binary math opcode"); - return; + unreachable("not reached: unsupported binary math opcode"); } - if (brw->gen >= 6) { - return emit_math2_gen6(opcode, dst, src0, src1); + if (brw->gen >= 8) { + emit(opcode, dst, src0, src1); + } else if (brw->gen >= 6) { + emit_math2_gen6(opcode, dst, src0, src1); } else { - return emit_math2_gen4(opcode, dst, src0, src1); + emit_math2_gen4(opcode, dst, src0, src1); } } void vec4_visitor::emit_pack_half_2x16(dst_reg dst, src_reg src0) { - if (brw->gen < 7) - assert(!"ir_unop_pack_half_2x16 should be lowered"); + if (brw->gen < 7) { + unreachable("ir_unop_pack_half_2x16 should be lowered"); + } assert(dst.type == BRW_REGISTER_TYPE_UD); assert(src0.type == BRW_REGISTER_TYPE_F); @@ -473,22 +498,23 @@ vec4_visitor::emit_pack_half_2x16(dst_reg dst, src_reg src0) /* Give the write-channels of dst the form: * 0xhhhh0000 */ - tmp_src.swizzle = SWIZZLE_Y; + tmp_src.swizzle = BRW_SWIZZLE_YYYY; emit(SHL(dst, tmp_src, src_reg(16u))); /* Finally, give the write-channels of dst the form of packHalf2x16's * output: * 0xhhhhllll */ - tmp_src.swizzle = SWIZZLE_X; + tmp_src.swizzle = BRW_SWIZZLE_XXXX; emit(OR(dst, src_reg(dst), tmp_src)); } void vec4_visitor::emit_unpack_half_2x16(dst_reg dst, src_reg src0) { - if (brw->gen < 7) - assert(!"ir_unop_unpack_half_2x16 should be lowered"); + if (brw->gen < 7) { + unreachable("ir_unop_unpack_half_2x16 should be lowered"); + } assert(dst.type == BRW_REGISTER_TYPE_F); assert(src0.type == BRW_REGISTER_TYPE_UD); @@ -524,9 +550,7 @@ vec4_visitor::emit_unpack_half_2x16(dst_reg dst, src_reg src0) void vec4_visitor::visit_instructions(const exec_list *list) { - foreach_list(node, list) { - ir_instruction *ir = (ir_instruction *)node; - + foreach_in_list(ir_instruction, ir, list) { base_ir = ir; ir->accept(this); } @@ -564,17 +588,17 @@ type_size(const struct glsl_type *type) } return size; case GLSL_TYPE_SAMPLER: - /* Samplers take up one slot in UNIFORMS[], but they're baked in - * at link time. + /* Samplers take up no register space, since they're baked in at + * link time. */ - return 1; + return 0; case GLSL_TYPE_ATOMIC_UINT: return 0; + case GLSL_TYPE_IMAGE: case GLSL_TYPE_VOID: case GLSL_TYPE_ERROR: case GLSL_TYPE_INTERFACE: - assert(0); - break; + unreachable("not reached"); } return 0; @@ -662,16 +686,17 @@ vec4_visitor::setup_uniform_values(ir_variable *ir) storage->type->matrix_columns); for (unsigned s = 0; s < vector_count; s++) { + assert(uniforms < uniform_array_size); uniform_vector_size[uniforms] = storage->type->vector_elements; int i; for (i = 0; i < uniform_vector_size[uniforms]; i++) { - prog_data->param[uniforms * 4 + i] = &components->f; + stage_prog_data->param[uniforms * 4 + i] = components; components++; } for (; i < 4; i++) { - static float zero = 0; - prog_data->param[uniforms * 4 + i] = &zero; + static gl_constant_value zero = { 0.0 }; + stage_prog_data->param[uniforms * 4 + i] = &zero; } uniforms++; @@ -685,11 +710,13 @@ vec4_visitor::setup_uniform_clipplane_values() gl_clip_plane *clip_planes = brw_select_clip_planes(ctx); for (int i = 0; i < key->nr_userclip_plane_consts; ++i) { + assert(this->uniforms < uniform_array_size); this->uniform_vector_size[this->uniforms] = 4; this->userplane[i] = dst_reg(UNIFORM, this->uniforms); this->userplane[i].type = BRW_REGISTER_TYPE_F; for (int j = 0; j < 4; ++j) { - prog_data->param[this->uniforms * 4 + j] = &clip_planes[i][j]; + stage_prog_data->param[this->uniforms * 4 + j] = + (gl_constant_value *) &clip_planes[i][j]; } ++this->uniforms; } @@ -713,8 +740,10 @@ vec4_visitor::setup_builtin_uniform_values(ir_variable *ir) */ int index = _mesa_add_state_reference(this->prog->Parameters, (gl_state_index *)slots[i].tokens); - float *values = &this->prog->Parameters->ParameterValues[index][0].f; + gl_constant_value *values = + &this->prog->Parameters->ParameterValues[index][0]; + assert(this->uniforms < uniform_array_size); this->uniform_vector_size[this->uniforms] = 0; /* Add each of the unique swizzled channels of the element. * This will end up matching the size of the glsl_type of this field. @@ -724,7 +753,8 @@ vec4_visitor::setup_builtin_uniform_values(ir_variable *ir) int swiz = GET_SWZ(slots[i].swizzle, j); last_swiz = swiz; - prog_data->param[this->uniforms * 4 + j] = &values[swiz]; + stage_prog_data->param[this->uniforms * 4 + j] = &values[swiz]; + assert(this->uniforms < uniform_array_size); if (swiz <= last_swiz) this->uniform_vector_size[this->uniforms]++; } @@ -739,17 +769,18 @@ vec4_visitor::variable_storage(ir_variable *var) } void -vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir, uint32_t *predicate) +vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir, + enum brw_predicate *predicate) { ir_expression *expr = ir->as_expression(); *predicate = BRW_PREDICATE_NORMAL; - if (expr) { - src_reg op[2]; + if (expr && expr->operation != ir_binop_ubo_load) { + src_reg op[3]; vec4_instruction *inst; - assert(expr->get_num_operands() <= 2); + assert(expr->get_num_operands() <= 3); for (unsigned int i = 0; i < expr->get_num_operands(); i++) { expr->operands[i]->accept(this); op[i] = this->result; @@ -821,9 +852,24 @@ vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir, uint32_t *predicate) brw_conditional_for_comparison(expr->operation))); break; + case ir_triop_csel: { + /* Expand the boolean condition into the flag register. */ + inst = emit(MOV(dst_null_d(), op[0])); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + + /* Select which boolean to return. */ + dst_reg temp(this, expr->operands[1]->type); + inst = emit(BRW_OPCODE_SEL, temp, op[1], op[2]); + inst->predicate = BRW_PREDICATE_NORMAL; + + /* Expand the result to a condition code. */ + inst = emit(MOV(dst_null_d(), src_reg(temp))); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + break; + } + default: - assert(!"not reached"); - break; + unreachable("not reached"); } return; } @@ -851,11 +897,11 @@ vec4_visitor::emit_if_gen6(ir_if *ir) { ir_expression *expr = ir->condition->as_expression(); - if (expr) { - src_reg op[2]; + if (expr && expr->operation != ir_binop_ubo_load) { + src_reg op[3]; dst_reg temp; - assert(expr->get_num_operands() <= 2); + assert(expr->get_num_operands() <= 3); for (unsigned int i = 0; i < expr->get_num_operands(); i++) { expr->operands[i]->accept(this); op[i] = this->result; @@ -915,10 +961,22 @@ vec4_visitor::emit_if_gen6(ir_if *ir) emit(IF(BRW_PREDICATE_ALIGN16_ANY4H)); return; + case ir_triop_csel: { + /* Expand the boolean condition into the flag register. */ + vec4_instruction *inst = emit(MOV(dst_null_d(), op[0])); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + + /* Select which boolean to return. */ + dst_reg temp(this, expr->operands[1]->type); + inst = emit(BRW_OPCODE_SEL, temp, op[1], op[2]); + inst->predicate = BRW_PREDICATE_NORMAL; + + emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ)); + return; + } + default: - assert(!"not reached"); - emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ)); - return; + unreachable("not reached"); } return; } @@ -928,15 +986,6 @@ vec4_visitor::emit_if_gen6(ir_if *ir) emit(IF(this->result, src_reg(0), BRW_CONDITIONAL_NZ)); } -dst_reg -with_writemask(dst_reg const & r, int mask) -{ - dst_reg result = r; - result.writemask = mask; - return result; -} - - void vec4_visitor::visit(ir_variable *ir) { @@ -974,15 +1023,16 @@ vec4_visitor::visit(ir_variable *ir) * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO * variables, so no need for them to be in variable_ht. * - * Atomic counters take no uniform storage, no need to do - * anything here. + * Some uniforms, such as samplers and atomic counters, have no actual + * storage, so we should ignore them. */ - if (ir->is_in_uniform_block() || ir->type->contains_atomic()) + if (ir->is_in_uniform_block() || type_size(ir->type) == 0) return; /* Track how big the whole uniform variable is, in case we need to put a * copy of its data into pull constants for array access. */ + assert(this->uniforms < uniform_array_size); this->uniform_size[this->uniforms] = type_size(ir->type); if (!strncmp(ir->name, "gl_", 3)) { @@ -997,7 +1047,7 @@ vec4_visitor::visit(ir_variable *ir) break; default: - assert(!"not reached"); + unreachable("not reached"); } reg->type = brw_type_for_base_type(ir->type); @@ -1034,10 +1084,9 @@ vec4_visitor::visit(ir_loop_jump *ir) void -vec4_visitor::visit(ir_function_signature *ir) +vec4_visitor::visit(ir_function_signature *) { - assert(0); - (void)ir; + unreachable("not reached"); } void @@ -1050,7 +1099,7 @@ vec4_visitor::visit(ir_function *ir) const ir_function_signature *sig; exec_list empty; - sig = ir->matching_signature(NULL, &empty); + sig = ir->matching_signature(NULL, &empty, false); assert(sig); @@ -1059,25 +1108,7 @@ vec4_visitor::visit(ir_function *ir) } bool -vec4_visitor::try_emit_sat(ir_expression *ir) -{ - ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); - if (!sat_src) - return false; - - sat_src->accept(this); - src_reg src = this->result; - - this->result = src_reg(this, ir->type); - vec4_instruction *inst; - inst = emit(MOV(dst_reg(this->result), src)); - inst->saturate = true; - - return true; -} - -bool -vec4_visitor::try_emit_mad(ir_expression *ir, int mul_arg) +vec4_visitor::try_emit_mad(ir_expression *ir) { /* 3-src instructions were introduced in gen6. */ if (brw->gen < 6) @@ -1087,11 +1118,16 @@ vec4_visitor::try_emit_mad(ir_expression *ir, int mul_arg) if (ir->type->base_type != GLSL_TYPE_FLOAT) return false; - ir_rvalue *nonmul = ir->operands[1 - mul_arg]; - ir_expression *mul = ir->operands[mul_arg]->as_expression(); + ir_rvalue *nonmul = ir->operands[1]; + ir_expression *mul = ir->operands[0]->as_expression(); - if (!mul || mul->operation != ir_binop_mul) - return false; + if (!mul || mul->operation != ir_binop_mul) { + nonmul = ir->operands[0]; + mul = ir->operands[1]->as_expression(); + + if (!mul || mul->operation != ir_binop_mul) + return false; + } nonmul->accept(this); src_reg src0 = fix_3src_operand(this->result); @@ -1108,22 +1144,57 @@ vec4_visitor::try_emit_mad(ir_expression *ir, int mul_arg) return true; } -void -vec4_visitor::emit_bool_comparison(unsigned int op, - dst_reg dst, src_reg src0, src_reg src1) +bool +vec4_visitor::try_emit_b2f_of_compare(ir_expression *ir) { - /* original gen4 does destination conversion before comparison. */ - if (brw->gen < 5) - dst.type = src0.type; + /* This optimization relies on CMP setting the destination to 0 when + * false. Early hardware only sets the least significant bit, and + * leaves the other bits undefined. So we can't use it. + */ + if (brw->gen < 6) + return false; + + ir_expression *const cmp = ir->operands[0]->as_expression(); + + if (cmp == NULL) + return false; + + switch (cmp->operation) { + case ir_binop_less: + case ir_binop_greater: + case ir_binop_lequal: + case ir_binop_gequal: + case ir_binop_equal: + case ir_binop_nequal: + break; + + default: + return false; + } + + cmp->operands[0]->accept(this); + const src_reg cmp_src0 = this->result; - emit(CMP(dst, src0, src1, brw_conditional_for_comparison(op))); + cmp->operands[1]->accept(this); + const src_reg cmp_src1 = this->result; + + this->result = src_reg(this, ir->type); + + emit(CMP(dst_reg(this->result), cmp_src0, cmp_src1, + brw_conditional_for_comparison(cmp->operation))); + + /* If the comparison is false, this->result will just happen to be zero. + */ + vec4_instruction *const inst = emit(BRW_OPCODE_SEL, dst_reg(this->result), + this->result, src_reg(1.0f)); + inst->predicate = BRW_PREDICATE_NORMAL; + inst->predicate_inverse = true; - dst.type = BRW_REGISTER_TYPE_D; - emit(AND(dst, src_reg(dst), src_reg(0x1))); + return true; } void -vec4_visitor::emit_minmax(uint32_t conditionalmod, dst_reg dst, +vec4_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst, src_reg src0, src_reg src1) { vec4_instruction *inst; @@ -1139,18 +1210,32 @@ vec4_visitor::emit_minmax(uint32_t conditionalmod, dst_reg dst, } } -static bool -is_16bit_constant(ir_rvalue *rvalue) +void +vec4_visitor::emit_lrp(const dst_reg &dst, + const src_reg &x, const src_reg &y, const src_reg &a) { - ir_constant *constant = rvalue->as_constant(); - if (!constant) - return false; - - if (constant->type != glsl_type::int_type && - constant->type != glsl_type::uint_type) - return false; + if (brw->gen >= 6) { + /* Note that the instruction's argument order is reversed from GLSL + * and the IR. + */ + emit(LRP(dst, + fix_3src_operand(a), fix_3src_operand(y), fix_3src_operand(x))); + } else { + /* Earlier generations don't support three source operations, so we + * need to emit x*(1-a) + y*a. + */ + dst_reg y_times_a = dst_reg(this, glsl_type::vec4_type); + dst_reg one_minus_a = dst_reg(this, glsl_type::vec4_type); + dst_reg x_times_one_minus_a = dst_reg(this, glsl_type::vec4_type); + y_times_a.writemask = dst.writemask; + one_minus_a.writemask = dst.writemask; + x_times_one_minus_a.writemask = dst.writemask; - return constant->value.u[0] < (1 << 16); + emit(MUL(y_times_a, y, a)); + emit(ADD(one_minus_a, negate(a), src_reg(1.0f))); + emit(MUL(x_times_one_minus_a, x, src_reg(one_minus_a))); + emit(ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a))); + } } void @@ -1162,11 +1247,13 @@ vec4_visitor::visit(ir_expression *ir) dst_reg result_dst; vec4_instruction *inst; - if (try_emit_sat(ir)) - return; - if (ir->operation == ir_binop_add) { - if (try_emit_mad(ir, 0) || try_emit_mad(ir, 1)) + if (try_emit_mad(ir)) + return; + } + + if (ir->operation == ir_unop_b2f) { + if (try_emit_b2f_of_compare(ir)) return; } @@ -1174,8 +1261,8 @@ vec4_visitor::visit(ir_expression *ir) this->result.file = BAD_FILE; ir->operands[operand]->accept(this); if (this->result.file == BAD_FILE) { - printf("Failed to get tree for expression operand:\n"); - ir->operands[operand]->print(); + fprintf(stderr, "Failed to get tree for expression operand:\n"); + ir->operands[operand]->fprint(stderr); exit(1); } op[operand] = this->result; @@ -1210,10 +1297,11 @@ vec4_visitor::visit(ir_expression *ir) switch (ir->operation) { case ir_unop_logic_not: - /* Note that BRW_OPCODE_NOT is not appropriate here, since it is - * ones complement of the whole register, not just bit 0. - */ - emit(XOR(result_dst, op[0], src_reg(1))); + if (ctx->Const.UniformBooleanTrue != 1) { + emit(NOT(result_dst, op[0])); + } else { + emit(XOR(result_dst, op[0], src_reg(1))); + } break; case ir_unop_neg: op[0].negate = !op[0].negate; @@ -1268,8 +1356,7 @@ vec4_visitor::visit(ir_expression *ir) break; case ir_unop_exp: case ir_unop_log: - assert(!"not reached: should be handled by ir_explog_to_explog2"); - break; + unreachable("not reached: should be handled by ir_explog_to_explog2"); case ir_unop_sin: case ir_unop_sin_reduced: emit_math(SHADER_OPCODE_SIN, result_dst, op[0]); @@ -1280,9 +1367,12 @@ vec4_visitor::visit(ir_expression *ir) break; case ir_unop_dFdx: + case ir_unop_dFdx_coarse: + case ir_unop_dFdx_fine: case ir_unop_dFdy: - assert(!"derivatives not valid in vertex shader"); - break; + case ir_unop_dFdy_coarse: + case ir_unop_dFdy_fine: + unreachable("derivatives not valid in vertex shader"); case ir_unop_bitfield_reverse: emit(BFREV(result_dst, op[0])); @@ -1316,17 +1406,19 @@ vec4_visitor::visit(ir_expression *ir) case ir_unop_find_lsb: emit(FBL(result_dst, op[0])); break; + case ir_unop_saturate: + inst = emit(MOV(result_dst, op[0])); + inst->saturate = true; + break; case ir_unop_noise: - assert(!"not reached: should be handled by lower_noise"); - break; + unreachable("not reached: should be handled by lower_noise"); case ir_binop_add: emit(ADD(result_dst, op[0], op[1])); break; case ir_binop_sub: - assert(!"not reached: should be handled by ir_sub_to_add_neg"); - break; + unreachable("not reached: should be handled by ir_sub_to_add_neg"); case ir_binop_mul: if (brw->gen < 8 && ir->type->is_integer()) { @@ -1336,12 +1428,12 @@ vec4_visitor::visit(ir_expression *ir) * operand. If we can determine that one of the args is in the low * 16 bits, though, we can just emit a single MUL. */ - if (is_16bit_constant(ir->operands[0])) { + if (ir->operands[0]->is_uint16_constant()) { if (brw->gen < 7) emit(MUL(result_dst, op[0], op[1])); else emit(MUL(result_dst, op[1], op[0])); - } else if (is_16bit_constant(ir->operands[1])) { + } else if (ir->operands[1]->is_uint16_constant()) { if (brw->gen < 7) emit(MUL(result_dst, op[1], op[0])); else @@ -1397,7 +1489,9 @@ vec4_visitor::visit(ir_expression *ir) case ir_binop_nequal: { emit(CMP(result_dst, op[0], op[1], brw_conditional_for_comparison(ir->operation))); - emit(AND(result_dst, result_src, src_reg(0x1))); + if (ctx->Const.UniformBooleanTrue == 1) { + emit(AND(result_dst, result_src, src_reg(1))); + } break; } @@ -1407,11 +1501,13 @@ vec4_visitor::visit(ir_expression *ir) ir->operands[1]->type->is_vector()) { emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z)); emit(MOV(result_dst, src_reg(0))); - inst = emit(MOV(result_dst, src_reg(1))); + inst = emit(MOV(result_dst, src_reg(ctx->Const.UniformBooleanTrue))); inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; } else { emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_Z)); - emit(AND(result_dst, result_src, src_reg(0x1))); + if (ctx->Const.UniformBooleanTrue == 1) { + emit(AND(result_dst, result_src, src_reg(1))); + } } break; case ir_binop_any_nequal: @@ -1421,11 +1517,13 @@ vec4_visitor::visit(ir_expression *ir) emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ)); emit(MOV(result_dst, src_reg(0))); - inst = emit(MOV(result_dst, src_reg(1))); + inst = emit(MOV(result_dst, src_reg(ctx->Const.UniformBooleanTrue))); inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; } else { emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_NZ)); - emit(AND(result_dst, result_src, src_reg(0x1))); + if (ctx->Const.UniformBooleanTrue == 1) { + emit(AND(result_dst, result_src, src_reg(1))); + } } break; @@ -1433,7 +1531,7 @@ vec4_visitor::visit(ir_expression *ir) emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ)); emit(MOV(result_dst, src_reg(0))); - inst = emit(MOV(result_dst, src_reg(1))); + inst = emit(MOV(result_dst, src_reg(ctx->Const.UniformBooleanTrue))); inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; break; @@ -1482,18 +1580,34 @@ vec4_visitor::visit(ir_expression *ir) case ir_unop_i2u: case ir_unop_u2i: case ir_unop_u2f: - case ir_unop_b2f: - case ir_unop_b2i: case ir_unop_f2i: case ir_unop_f2u: emit(MOV(result_dst, op[0])); break; + case ir_unop_b2i: + if (ctx->Const.UniformBooleanTrue != 1) { + emit(AND(result_dst, op[0], src_reg(1))); + } else { + emit(MOV(result_dst, op[0])); + } + break; + case ir_unop_b2f: + if (ctx->Const.UniformBooleanTrue != 1) { + op[0].type = BRW_REGISTER_TYPE_UD; + result_dst.type = BRW_REGISTER_TYPE_UD; + emit(AND(result_dst, op[0], src_reg(0x3f800000u))); + result_dst.type = BRW_REGISTER_TYPE_F; + } else { + emit(MOV(result_dst, op[0])); + } + break; case ir_unop_f2b: - case ir_unop_i2b: { + case ir_unop_i2b: emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ)); - emit(AND(result_dst, result_src, src_reg(1))); + if (ctx->Const.UniformBooleanTrue == 1) { + emit(AND(result_dst, result_src, src_reg(1))); + } break; - } case ir_unop_trunc: emit(RNDZ(result_dst, op[0])); @@ -1553,7 +1667,7 @@ vec4_visitor::visit(ir_expression *ir) break; case ir_binop_ubo_load: { - ir_constant *uniform_block = ir->operands[0]->as_constant(); + ir_constant *const_uniform_block = ir->operands[0]->as_constant(); ir_constant *const_offset_ir = ir->operands[1]->as_constant(); unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0; src_reg offset; @@ -1563,8 +1677,31 @@ vec4_visitor::visit(ir_expression *ir) src_reg packed_consts = src_reg(this, glsl_type::vec4_type); packed_consts.type = result.type; - src_reg surf_index = - src_reg(prog_data->base.binding_table.ubo_start + uniform_block->value.u[0]); + src_reg surf_index; + + if (const_uniform_block) { + /* The block index is a constant, so just emit the binding table entry + * as an immediate. + */ + surf_index = src_reg(prog_data->base.binding_table.ubo_start + + const_uniform_block->value.u[0]); + } else { + /* The block index is not a constant. Evaluate the index expression + * per-channel and add the base UBO index; the generator will select + * a value from any live channel. + */ + surf_index = src_reg(this, glsl_type::uint_type); + emit(ADD(dst_reg(surf_index), op[0], + src_reg(prog_data->base.binding_table.ubo_start))); + + /* Assume this may touch any UBO. It would be nice to provide + * a tighter bound, but the array information is already lowered away. + */ + brw_mark_surface_used(&prog_data->base, + prog_data->base.binding_table.ubo_start + + shader_prog->NumUniformBlocks - 1); + } + if (const_offset_ir) { if (brw->gen >= 8) { /* Store the offset in a GRF so we can send-from-GRF. */ @@ -1581,14 +1718,27 @@ vec4_visitor::visit(ir_expression *ir) emit(SHR(dst_reg(offset), op[1], src_reg(4))); } - vec4_instruction *pull = + if (brw->gen >= 7) { + dst_reg grf_offset = dst_reg(this, glsl_type::int_type); + grf_offset.type = offset.type; + + emit(MOV(grf_offset, offset)); + emit(new(mem_ctx) vec4_instruction(this, - VS_OPCODE_PULL_CONSTANT_LOAD, + VS_OPCODE_PULL_CONSTANT_LOAD_GEN7, dst_reg(packed_consts), surf_index, - offset)); - pull->base_mrf = 14; - pull->mlen = 1; + src_reg(grf_offset))); + } else { + vec4_instruction *pull = + emit(new(mem_ctx) vec4_instruction(this, + VS_OPCODE_PULL_CONSTANT_LOAD, + dst_reg(packed_consts), + surf_index, + offset)); + pull->base_mrf = 14; + pull->mlen = 1; + } packed_consts.swizzle = swizzle_for_size(ir->type->vector_elements); packed_consts.swizzle += BRW_SWIZZLE4(const_offset % 16 / 4, @@ -1596,11 +1746,15 @@ vec4_visitor::visit(ir_expression *ir) const_offset % 16 / 4, const_offset % 16 / 4); - /* UBO bools are any nonzero int. We store bools as either 0 or 1. */ + /* UBO bools are any nonzero int. We need to convert them to use the + * value of true stored in ctx->Const.UniformBooleanTrue. + */ if (ir->type->base_type == GLSL_TYPE_BOOL) { emit(CMP(result_dst, packed_consts, src_reg(0u), BRW_CONDITIONAL_NZ)); - emit(AND(result_dst, result, src_reg(0x1))); + if (ctx->Const.UniformBooleanTrue == 1) { + emit(AND(result_dst, result, src_reg(1))); + } } else { emit(MOV(result_dst, packed_consts)); } @@ -1608,8 +1762,7 @@ vec4_visitor::visit(ir_expression *ir) } case ir_binop_vector_extract: - assert(!"should have been lowered by vec_index_to_cond_assign"); - break; + unreachable("should have been lowered by vec_index_to_cond_assign"); case ir_triop_fma: op[0] = fix_3src_operand(op[0]); @@ -1622,13 +1775,7 @@ vec4_visitor::visit(ir_expression *ir) break; case ir_triop_lrp: - op[0] = fix_3src_operand(op[0]); - op[1] = fix_3src_operand(op[1]); - op[2] = fix_3src_operand(op[2]); - /* Note that the instruction's argument order is reversed from GLSL - * and the IR. - */ - emit(LRP(result_dst, op[2], op[1], op[0])); + emit_lrp(result_dst, op[0], op[1], op[2]); break; case ir_triop_csel: @@ -1655,17 +1802,14 @@ vec4_visitor::visit(ir_expression *ir) break; case ir_triop_vector_insert: - assert(!"should have been lowered by lower_vector_insert"); - break; + unreachable("should have been lowered by lower_vector_insert"); case ir_quadop_bitfield_insert: - assert(!"not reached: should be handled by " + unreachable("not reached: should be handled by " "bitfield_insert_to_bfm_bfi\n"); - break; case ir_quadop_vector: - assert(!"not reached: should be handled by lower_quadop_vector"); - break; + unreachable("not reached: should be handled by lower_quadop_vector"); case ir_unop_pack_half_2x16: emit_pack_half_2x16(result_dst, op[0]); @@ -1681,16 +1825,16 @@ vec4_visitor::visit(ir_expression *ir) case ir_unop_unpack_snorm_4x8: case ir_unop_unpack_unorm_2x16: case ir_unop_unpack_unorm_4x8: - assert(!"not reached: should be handled by lower_packing_builtins"); - break; + unreachable("not reached: should be handled by lower_packing_builtins"); case ir_unop_unpack_half_2x16_split_x: case ir_unop_unpack_half_2x16_split_y: case ir_binop_pack_half_2x16_split: - assert(!"not reached: should not occur in vertex shader"); - break; + case ir_unop_interpolate_at_centroid: + case ir_binop_interpolate_at_sample: + case ir_binop_interpolate_at_offset: + unreachable("not reached: should not occur in vertex shader"); case ir_binop_ldexp: - assert(!"not reached: should be handled by ldexp_to_arith()"); - break; + unreachable("not reached: should be handled by ldexp_to_arith()"); } } @@ -1875,7 +2019,8 @@ get_assignment_lhs(ir_dereference *ir, vec4_visitor *v) void vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src, - const struct glsl_type *type, uint32_t predicate) + const struct glsl_type *type, + enum brw_predicate predicate) { if (type->base_type == GLSL_TYPE_STRUCT) { for (unsigned int i = 0; i < type->length; i++) { @@ -1981,7 +2126,7 @@ void vec4_visitor::visit(ir_assignment *ir) { dst_reg dst = get_assignment_lhs(ir->lhs, this); - uint32_t predicate = BRW_PREDICATE_NONE; + enum brw_predicate predicate = BRW_PREDICATE_NONE; if (!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()) { @@ -2068,9 +2213,7 @@ void vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir) { if (ir->type->base_type == GLSL_TYPE_STRUCT) { - foreach_list(node, &ir->components) { - ir_constant *field_value = (ir_constant *)node; - + foreach_in_list(ir_constant, field_value, &ir->components) { emit_constant_values(dst, field_value); } return; @@ -2135,11 +2278,12 @@ vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir) emit(MOV(*dst, src_reg(ir->value.u[i]))); break; case GLSL_TYPE_BOOL: - emit(MOV(*dst, src_reg(ir->value.b[i]))); + emit(MOV(*dst, + src_reg(ir->value.b[i] != 0 ? ctx->Const.UniformBooleanTrue + : 0))); break; default: - assert(!"Non-float/uint/int/bool constant"); - break; + unreachable("Non-float/uint/int/bool constant"); } remaining_writemask &= ~dst->writemask; @@ -2163,7 +2307,7 @@ vec4_visitor::visit_atomic_counter_intrinsic(ir_call *ir) ir->actual_parameters.get_head()); ir_variable *location = deref->variable_referenced(); unsigned surf_index = (prog_data->base.binding_table.abo_start + - location->data.atomic.buffer_index); + location->data.binding); /* Calculate the surface offset */ src_reg offset(this, glsl_type::uint_type); @@ -2205,20 +2349,21 @@ vec4_visitor::visit(ir_call *ir) !strcmp("__intrinsic_atomic_predecrement", callee)) { visit_atomic_counter_intrinsic(ir); } else { - assert(!"Unsupported intrinsic."); + unreachable("Unsupported intrinsic."); } } src_reg -vec4_visitor::emit_mcs_fetch(ir_texture *ir, src_reg coordinate, int sampler) +vec4_visitor::emit_mcs_fetch(ir_texture *ir, src_reg coordinate, src_reg sampler) { vec4_instruction *inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXF_MCS); inst->base_mrf = 2; inst->mlen = 1; - inst->sampler = sampler; inst->dst = dst_reg(this, glsl_type::uvec4_type); inst->dst.writemask = WRITEMASK_XYZW; + inst->src[1] = sampler; + /* parameters are: u, v, r, lod; lod will always be zero due to api restrictions */ int param_base = inst->base_mrf; int coord_mask = (1 << ir->coordinate->type->vector_elements) - 1; @@ -2234,12 +2379,56 @@ vec4_visitor::emit_mcs_fetch(ir_texture *ir, src_reg coordinate, int sampler) return src_reg(inst->dst); } +static bool +is_high_sampler(struct brw_context *brw, src_reg sampler) +{ + if (brw->gen < 8 && !brw->is_haswell) + return false; + + return sampler.file != IMM || sampler.fixed_hw_reg.dw1.ud >= 16; +} + void vec4_visitor::visit(ir_texture *ir) { - int sampler = + uint32_t sampler = _mesa_get_sampler_uniform_value(ir->sampler, shader_prog, prog); + ir_rvalue *nonconst_sampler_index = + _mesa_get_sampler_array_nonconst_index(ir->sampler); + + /* Handle non-constant sampler array indexing */ + src_reg sampler_reg; + if (nonconst_sampler_index) { + /* The highest sampler which may be used by this operation is + * the last element of the array. Mark it here, because the generator + * doesn't have enough information to determine the bound. + */ + uint32_t array_size = ir->sampler->as_dereference_array() + ->array->type->array_size(); + + uint32_t max_used = sampler + array_size - 1; + if (ir->op == ir_tg4 && brw->gen < 8) { + max_used += prog_data->base.binding_table.gather_texture_start; + } else { + max_used += prog_data->base.binding_table.texture_start; + } + + brw_mark_surface_used(&prog_data->base, max_used); + + /* Emit code to evaluate the actual indexing expression */ + nonconst_sampler_index->accept(this); + dst_reg temp(this, glsl_type::uint_type); + emit(ADD(temp, this->result, src_reg(sampler))) + ->force_writemask_all = true; + sampler_reg = src_reg(temp); + } else { + /* Single sampler, or constant array index; the indexing expression + * is just an immediate. + */ + sampler_reg = src_reg(sampler); + } + /* When tg4 is used with the degenerate ZERO/ONE swizzles, don't bother * emitting anything other than setting up the constant result. */ @@ -2307,7 +2496,7 @@ vec4_visitor::visit(ir_texture *ir) sample_index_type = ir->lod_info.sample_index->type; if (brw->gen >= 7 && key->tex.compressed_multisample_layout_mask & (1<op) { - case ir_tex: - case ir_txl: - inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXL); - break; - case ir_txd: - inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXD); - break; - case ir_txf: - inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXF); - break; - case ir_txf_ms: - inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXF_MS); - break; - case ir_txs: - inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXS); - break; - case ir_tg4: - if (has_nonconstant_offset) - inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TG4_OFFSET); - else - inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TG4); - break; - case ir_query_levels: - inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXS); - break; + case ir_tex: opcode = SHADER_OPCODE_TXL; break; + case ir_txl: opcode = SHADER_OPCODE_TXL; break; + case ir_txd: opcode = SHADER_OPCODE_TXD; break; + case ir_txf: opcode = SHADER_OPCODE_TXF; break; + case ir_txf_ms: opcode = SHADER_OPCODE_TXF_CMS; break; + case ir_txs: opcode = SHADER_OPCODE_TXS; break; + case ir_tg4: opcode = has_nonconstant_offset + ? SHADER_OPCODE_TG4_OFFSET : SHADER_OPCODE_TG4; break; + case ir_query_levels: opcode = SHADER_OPCODE_TXS; break; case ir_txb: - assert(!"TXB is not valid for vertex shaders."); - break; + unreachable("TXB is not valid for vertex shaders."); case ir_lod: - assert(!"LOD is not valid for vertex shaders."); - break; + unreachable("LOD is not valid for vertex shaders."); default: - assert(!"Unrecognized tex op"); + unreachable("Unrecognized tex op"); } - bool use_texture_offset = ir->offset != NULL && ir->op != ir_txf; + vec4_instruction *inst = new(mem_ctx) vec4_instruction(this, opcode); - /* Texel offsets go in the message header; Gen4 also requires headers. */ - inst->header_present = use_texture_offset || brw->gen < 5 || ir->op == ir_tg4; + if (ir->offset != NULL && ir->op != ir_txf) + inst->texture_offset = brw_texture_offset(ctx, ir->offset->as_constant()); + + /* Stuff the channel select bits in the top of the texture offset */ + if (ir->op == ir_tg4) + inst->texture_offset |= gather_channel(ir, sampler) << 16; + + /* The message header is necessary for: + * - Gen4 (always) + * - Texel offsets + * - Gather channel selection + * - Sampler indices too large to fit in a 4-bit value. + */ + inst->header_present = + brw->gen < 5 || inst->texture_offset != 0 || ir->op == ir_tg4 || + is_high_sampler(brw, sampler_reg); inst->base_mrf = 2; inst->mlen = inst->header_present + 1; /* always at least one */ - inst->sampler = sampler; inst->dst = dst_reg(this, ir->type); inst->dst.writemask = WRITEMASK_XYZW; inst->shadow_compare = ir->shadow_comparitor != NULL; - if (use_texture_offset) - inst->texture_offset = brw_texture_offset(ctx, ir->offset->as_constant()); - - /* Stuff the channel select bits in the top of the texture offset */ - if (ir->op == ir_tg4) - inst->texture_offset |= gather_channel(ir, sampler)<<16; + inst->src[1] = sampler_reg; /* MRF for the first parameter */ int param_base = inst->base_mrf + inst->header_present; @@ -2430,7 +2609,7 @@ vec4_visitor::visit(ir_texture *ir) } else if (ir->op == ir_txf_ms) { emit(MOV(dst_reg(MRF, param_base + 1, sample_index_type, WRITEMASK_X), sample_index)); - if (brw->gen >= 7) + if (brw->gen >= 7) { /* MCS data is in the first channel of `mcs`, but we need to get it into * the .y channel of the second vec4 of params, so replicate .x across * the whole vec4 and then mask off everything except .y @@ -2438,6 +2617,7 @@ vec4_visitor::visit(ir_texture *ir) mcs.swizzle = BRW_SWIZZLE_XXXX; emit(MOV(dst_reg(MRF, param_base + 1, glsl_type::uint_type, WRITEMASK_Y), mcs)); + } inst->mlen++; } else if (ir->op == ir_txd) { const glsl_type *type = lod_type; @@ -2489,19 +2669,50 @@ vec4_visitor::visit(ir_texture *ir) if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE && type->sampler_array) { emit_math(SHADER_OPCODE_INT_QUOTIENT, - with_writemask(inst->dst, WRITEMASK_Z), + writemask(inst->dst, WRITEMASK_Z), src_reg(inst->dst), src_reg(6)); } } + if (brw->gen == 6 && ir->op == ir_tg4) { + emit_gen6_gather_wa(key->tex.gen6_gather_wa[sampler], inst->dst); + } + swizzle_result(ir, src_reg(inst->dst), sampler); } +/** + * Apply workarounds for Gen6 gather with UINT/SINT + */ +void +vec4_visitor::emit_gen6_gather_wa(uint8_t wa, dst_reg dst) +{ + if (!wa) + return; + + int width = (wa & WA_8BIT) ? 8 : 16; + dst_reg dst_f = dst; + dst_f.type = BRW_REGISTER_TYPE_F; + + /* Convert from UNORM to UINT */ + emit(MUL(dst_f, src_reg(dst_f), src_reg((float)((1 << width) - 1)))); + emit(MOV(dst, src_reg(dst_f))); + + if (wa & WA_SIGN) { + /* Reinterpret the UINT value as a signed INT value by + * shifting the sign bit into place, then shifting back + * preserving sign. + */ + emit(SHL(dst, src_reg(dst), src_reg(32 - width))); + emit(ASR(dst, src_reg(dst), src_reg(32 - width))); + } +} + /** * Set up the gather channel based on the swizzle, for gather4. */ uint32_t -vec4_visitor::gather_channel(ir_texture *ir, int sampler) +vec4_visitor::gather_channel(ir_texture *ir, uint32_t sampler) { ir_constant *chan = ir->lod_info.component->as_constant(); int swiz = GET_SWZ(key->tex.swizzles[sampler], chan->value.i[0]); @@ -2517,13 +2728,12 @@ vec4_visitor::gather_channel(ir_texture *ir, int sampler) case SWIZZLE_Z: return 2; case SWIZZLE_W: return 3; default: - assert(!"Not reached"); /* zero, one swizzles handled already */ - return 0; + unreachable("Not reached"); /* zero, one swizzles handled already */ } } void -vec4_visitor::swizzle_result(ir_texture *ir, src_reg orig_val, int sampler) +vec4_visitor::swizzle_result(ir_texture *ir, src_reg orig_val, uint32_t sampler) { int s = key->tex.swizzles[sampler]; @@ -2580,15 +2790,15 @@ vec4_visitor::swizzle_result(ir_texture *ir, src_reg orig_val, int sampler) } void -vec4_visitor::visit(ir_return *ir) +vec4_visitor::visit(ir_return *) { - assert(!"not reached"); + unreachable("not reached"); } void -vec4_visitor::visit(ir_discard *ir) +vec4_visitor::visit(ir_discard *) { - assert(!"not reached"); + unreachable("not reached"); } void @@ -2602,7 +2812,7 @@ vec4_visitor::visit(ir_if *ir) if (brw->gen == 6) { emit_if_gen6(ir); } else { - uint32_t predicate; + enum brw_predicate predicate; emit_bool_to_cond_code(ir->condition, &predicate); emit(IF(predicate)); } @@ -2623,13 +2833,13 @@ vec4_visitor::visit(ir_if *ir) void vec4_visitor::visit(ir_emit_vertex *) { - assert(!"not reached"); + unreachable("not reached"); } void vec4_visitor::visit(ir_end_primitive *) { - assert(!"not reached"); + unreachable("not reached"); } void @@ -3104,9 +3314,7 @@ vec4_visitor::move_grf_array_access_to_scratch() * to scratch due to having any array access on them, and where in * scratch. */ - foreach_list(node, &this->instructions) { - vec4_instruction *inst = (vec4_instruction *)node; - + foreach_in_list(vec4_instruction, inst, &instructions) { if (inst->dst.file == GRF && inst->dst.reladdr && scratch_loc[inst->dst.reg] == -1) { scratch_loc[inst->dst.reg] = c->last_scratch; @@ -3129,9 +3337,7 @@ vec4_visitor::move_grf_array_access_to_scratch() * we may generate a new scratch_write instruction after the one * we're processing. */ - foreach_list_safe(node, &this->instructions) { - vec4_instruction *inst = (vec4_instruction *)node; - + foreach_in_list_safe(vec4_instruction, inst, &instructions) { /* Set up the annotation tracking for new generated instructions. */ base_ir = inst->ir; current_annotation = inst->annotation; @@ -3215,9 +3421,7 @@ vec4_visitor::move_uniform_array_access_to_pull_constants() * Note that we don't move constant-indexed accesses to arrays. No * testing has been done of the performance impact of this choice. */ - foreach_list_safe(node, &this->instructions) { - vec4_instruction *inst = (vec4_instruction *)node; - + foreach_in_list_safe(vec4_instruction, inst, &instructions) { for (int i = 0 ; i < 3; i++) { if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr) continue; @@ -3228,12 +3432,14 @@ vec4_visitor::move_uniform_array_access_to_pull_constants() * add it. */ if (pull_constant_loc[uniform] == -1) { - const float **values = &prog_data->param[uniform * 4]; + const gl_constant_value **values = + &stage_prog_data->param[uniform * 4]; - pull_constant_loc[uniform] = prog_data->nr_pull_params / 4; + pull_constant_loc[uniform] = stage_prog_data->nr_pull_params / 4; + assert(uniform < uniform_array_size); for (int j = 0; j < uniform_size[uniform] * 4; j++) { - prog_data->pull_param[prog_data->nr_pull_params++] + stage_prog_data->pull_param[stage_prog_data->nr_pull_params++] = values[j]; } } @@ -3280,14 +3486,18 @@ vec4_visitor::vec4_visitor(struct brw_context *brw, const struct brw_vec4_prog_key *key, struct brw_vec4_prog_data *prog_data, struct gl_shader_program *shader_prog, - struct brw_shader *shader, + gl_shader_stage stage, void *mem_ctx, bool debug_flag, bool no_spills, shader_time_shader_type st_base, shader_time_shader_type st_written, shader_time_shader_type st_reset) - : sanity_param_count(0), + : backend_visitor(brw, shader_prog, prog, &prog_data->base, stage), + c(c), + key(key), + prog_data(prog_data), + sanity_param_count(0), fail_msg(NULL), first_non_payload_grf(0), need_all_constants_in_pull_buffer(false), @@ -3297,11 +3507,6 @@ vec4_visitor::vec4_visitor(struct brw_context *brw, st_written(st_written), st_reset(st_reset) { - this->brw = brw; - this->ctx = &brw->ctx; - this->shader_prog = shader_prog; - this->shader = shader; - this->mem_ctx = mem_ctx; this->failed = false; @@ -3309,12 +3514,6 @@ vec4_visitor::vec4_visitor(struct brw_context *brw, this->current_annotation = NULL; memset(this->output_reg_annotation, 0, sizeof(this->output_reg_annotation)); - this->c = c; - this->prog = prog; - this->key = key; - this->prog_data = prog_data; - this->stage_prog_data = &prog_data->base; - this->variable_ht = hash_table_ctor(0, hash_table_pointer_hash, hash_table_pointer_compare); @@ -3331,6 +3530,17 @@ vec4_visitor::vec4_visitor(struct brw_context *brw, this->max_grf = brw->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF; this->uniforms = 0; + + /* Initialize uniform_array_size to at least 1 because pre-gen6 VS requires + * at least one. See setup_uniforms() in brw_vec4.cpp. + */ + this->uniform_array_size = 1; + if (prog_data) { + this->uniform_array_size = MAX2(stage_prog_data->nr_params, 1); + } + + this->uniform_size = rzalloc_array(mem_ctx, int, this->uniform_array_size); + this->uniform_vector_size = rzalloc_array(mem_ctx, int, this->uniform_array_size); } vec4_visitor::~vec4_visitor()