X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_vec4_visitor.cpp;h=f9a08a011f28fb13ca39d90c0f942078aac8f98d;hb=f0cecd43d6b6d3f5def3fd43b9c95baaf3be9b16;hp=f9447d7c39183f1206a681faf57171f31913c3e8;hpb=abf843a797876b5e3c5c91dbec25b6553d2cc281;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index f9447d7c391..f9a08a011f2 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -25,6 +25,7 @@ extern "C" { #include "main/macros.h" #include "program/prog_parameter.h" +#include "program/sampler.h" } namespace brw { @@ -38,6 +39,7 @@ src_reg::src_reg(dst_reg reg) this->reg_offset = reg.reg_offset; this->type = reg.type; this->reladdr = reg.reladdr; + this->fixed_hw_reg = reg.fixed_hw_reg; int swizzles[4]; int next_chan = 0; @@ -68,45 +70,182 @@ dst_reg::dst_reg(src_reg reg) this->type = reg.type; this->writemask = WRITEMASK_XYZW; this->reladdr = reg.reladdr; + this->fixed_hw_reg = reg.fixed_hw_reg; +} + +vec4_instruction::vec4_instruction(vec4_visitor *v, + enum opcode opcode, dst_reg dst, + src_reg src0, src_reg src1, src_reg src2) +{ + this->opcode = opcode; + this->dst = dst; + this->src[0] = src0; + this->src[1] = src1; + this->src[2] = src2; + this->ir = v->base_ir; + this->annotation = v->current_annotation; } vec4_instruction * -vec4_visitor::emit(enum opcode opcode, dst_reg dst, - src_reg src0, src_reg src1, src_reg src2) +vec4_visitor::emit(vec4_instruction *inst) { - vec4_instruction *inst = new(mem_ctx) vec4_instruction(); + this->instructions.push_tail(inst); - inst->opcode = opcode; - inst->dst = dst; - inst->src[0] = src0; - inst->src[1] = src1; - inst->src[2] = src2; - inst->ir = this->base_ir; - inst->annotation = this->current_annotation; + return inst; +} - this->instructions.push_tail(inst); +vec4_instruction * +vec4_visitor::emit_before(vec4_instruction *inst, vec4_instruction *new_inst) +{ + new_inst->ir = inst->ir; + new_inst->annotation = inst->annotation; + + inst->insert_before(new_inst); return inst; } +vec4_instruction * +vec4_visitor::emit(enum opcode opcode, dst_reg dst, + src_reg src0, src_reg src1, src_reg src2) +{ + return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, + src0, src1, src2)); +} + vec4_instruction * vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1) { - return emit(opcode, dst, src0, src1, src_reg()); + return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0, src1)); } vec4_instruction * vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0) { - assert(dst.writemask != 0); - return emit(opcode, dst, src0, src_reg(), src_reg()); + return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0)); } vec4_instruction * vec4_visitor::emit(enum opcode opcode) { - return emit(opcode, dst_reg(), src_reg(), src_reg(), src_reg()); + return emit(new(mem_ctx) vec4_instruction(this, opcode, dst_reg())); +} + +#define ALU1(op) \ + vec4_instruction * \ + vec4_visitor::op(dst_reg dst, src_reg src0) \ + { \ + return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \ + src0); \ + } + +#define ALU2(op) \ + vec4_instruction * \ + vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1) \ + { \ + return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \ + src0, src1); \ + } + +ALU1(NOT) +ALU1(MOV) +ALU1(FRC) +ALU1(RNDD) +ALU1(RNDE) +ALU1(RNDZ) +ALU2(ADD) +ALU2(MUL) +ALU2(MACH) +ALU2(AND) +ALU2(OR) +ALU2(XOR) +ALU2(DP3) +ALU2(DP4) + +/** Gen4 predicated IF. */ +vec4_instruction * +vec4_visitor::IF(uint32_t predicate) +{ + vec4_instruction *inst; + + inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF); + inst->predicate = predicate; + + return inst; +} + +/** Gen6+ IF with embedded comparison. */ +vec4_instruction * +vec4_visitor::IF(src_reg src0, src_reg src1, uint32_t condition) +{ + assert(intel->gen >= 6); + + vec4_instruction *inst; + + resolve_ud_negate(&src0); + resolve_ud_negate(&src1); + + inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF, dst_null_d(), + src0, src1); + inst->conditional_mod = condition; + + return inst; +} + +/** + * CMP: Sets the low bit of the destination channels with the result + * of the comparison, while the upper bits are undefined, and updates + * the flag register with the packed 16 bits of the result. + */ +vec4_instruction * +vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1, uint32_t condition) +{ + vec4_instruction *inst; + + /* original gen4 does type conversion to the destination type + * before before comparison, producing garbage results for floating + * point comparisons. + */ + if (intel->gen == 4) { + dst.type = src0.type; + if (dst.file == HW_REG) + dst.fixed_hw_reg.type = dst.type; + } + + resolve_ud_negate(&src0); + resolve_ud_negate(&src1); + + inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_CMP, dst, src0, src1); + inst->conditional_mod = condition; + + return inst; +} + +vec4_instruction * +vec4_visitor::SCRATCH_READ(dst_reg dst, src_reg index) +{ + vec4_instruction *inst; + + inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_READ, + dst, index); + inst->base_mrf = 14; + inst->mlen = 1; + + return inst; +} + +vec4_instruction * +vec4_visitor::SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index) +{ + vec4_instruction *inst; + + inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_WRITE, + dst, src, index); + inst->base_mrf = 13; + inst->mlen = 2; + + return inst; } void @@ -125,9 +264,15 @@ vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src) /* The gen6 math instruction ignores the source modifiers -- * swizzle, abs, negate, and at least some parts of the register * region description. + * + * While it would seem that this MOV could be avoided at this point + * in the case that the swizzle is matched up with the destination + * writemask, note that uniform packing and register allocation + * could rearrange our swizzle, so let's leave this matter up to + * copy propagation later. */ src_reg temp_src = src_reg(this, glsl_type::vec4_type); - emit(BRW_OPCODE_MOV, dst_reg(temp_src), src); + emit(MOV(dst_reg(temp_src), src)); if (dst.writemask != WRITEMASK_XYZW) { /* The gen6 math instruction must be align1, so we can't do @@ -137,7 +282,7 @@ vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src) emit(opcode, temp_dst, temp_src); - emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst)); + emit(MOV(dst, src_reg(temp_dst))); } else { emit(opcode, dst, temp_src); } @@ -168,7 +313,9 @@ vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src) return; } - if (intel->gen >= 6) { + if (intel->gen >= 7) { + emit(opcode, dst, src); + } else if (intel->gen == 6) { return emit_math1_gen6(opcode, dst, src); } else { return emit_math1_gen4(opcode, dst, src); @@ -188,11 +335,13 @@ vec4_visitor::emit_math2_gen6(enum opcode opcode, */ expanded = src_reg(this, glsl_type::vec4_type); - emit(BRW_OPCODE_MOV, dst_reg(expanded), src0); + expanded.type = src0.type; + emit(MOV(dst_reg(expanded), src0)); src0 = expanded; expanded = src_reg(this, glsl_type::vec4_type); - emit(BRW_OPCODE_MOV, dst_reg(expanded), src1); + expanded.type = src1.type; + emit(MOV(dst_reg(expanded), src1)); src1 = expanded; if (dst.writemask != WRITEMASK_XYZW) { @@ -200,10 +349,11 @@ vec4_visitor::emit_math2_gen6(enum opcode opcode, * writemasks. */ dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type); + temp_dst.type = dst.type; emit(opcode, temp_dst, src0, src1); - emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst)); + emit(MOV(dst, src_reg(temp_dst))); } else { emit(opcode, dst, src0, src1); } @@ -222,9 +372,19 @@ void vec4_visitor::emit_math(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1) { - assert(opcode == SHADER_OPCODE_POW); + switch (opcode) { + case SHADER_OPCODE_POW: + case SHADER_OPCODE_INT_QUOTIENT: + case SHADER_OPCODE_INT_REMAINDER: + break; + default: + assert(!"not reached: unsupported binary math opcode"); + return; + } - if (intel->gen >= 6) { + if (intel->gen >= 7) { + emit(opcode, dst, src0, src1); + } else if (intel->gen == 6) { return emit_math2_gen6(opcode, dst, src0, src1); } else { return emit_math2_gen4(opcode, dst, src0, src1); @@ -234,8 +394,8 @@ vec4_visitor::emit_math(enum opcode opcode, void vec4_visitor::visit_instructions(const exec_list *list) { - foreach_iter(exec_list_iterator, iter, *list) { - ir_instruction *ir = (ir_instruction *)iter.get(); + foreach_list(node, list) { + ir_instruction *ir = (ir_instruction *)node; base_ir = ir; ir->accept(this); @@ -294,7 +454,11 @@ vec4_visitor::virtual_grf_alloc(int size) virtual_grf_array_size *= 2; virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int, virtual_grf_array_size); + virtual_grf_reg_map = reralloc(mem_ctx, virtual_grf_reg_map, int, + virtual_grf_array_size); } + virtual_grf_reg_map[virtual_grf_count] = virtual_grf_reg_count; + virtual_grf_reg_count += size; virtual_grf_sizes[virtual_grf_count] = size; return virtual_grf_count++; } @@ -343,9 +507,7 @@ vec4_visitor::setup_uniform_values(int loc, const glsl_type *type) float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f; if (type->is_matrix()) { - const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT, - type->vector_elements, - 1); + const glsl_type *column = type->column_type(); for (unsigned int i = 0; i < type->matrix_columns; i++) { offset += setup_uniform_values(loc + offset, column); @@ -360,35 +522,20 @@ vec4_visitor::setup_uniform_values(int loc, const glsl_type *type) case GLSL_TYPE_INT: case GLSL_TYPE_BOOL: for (unsigned int i = 0; i < type->vector_elements; i++) { - int slot = this->uniforms * 4 + i; - switch (type->base_type) { - case GLSL_TYPE_FLOAT: - c->prog_data.param_convert[slot] = PARAM_NO_CONVERT; - break; - case GLSL_TYPE_UINT: - c->prog_data.param_convert[slot] = PARAM_CONVERT_F2U; - break; - case GLSL_TYPE_INT: - c->prog_data.param_convert[slot] = PARAM_CONVERT_F2I; - break; - case GLSL_TYPE_BOOL: - c->prog_data.param_convert[slot] = PARAM_CONVERT_F2B; - break; - default: - assert(!"not reached"); - c->prog_data.param_convert[slot] = PARAM_NO_CONVERT; - break; - } - c->prog_data.param[slot] = &values[i]; + c->prog_data.param[this->uniforms * 4 + i] = &values[i]; } + /* Set up pad elements to get things aligned to a vec4 boundary. */ for (unsigned int i = type->vector_elements; i < 4; i++) { - c->prog_data.param_convert[this->uniforms * 4 + i] = - PARAM_CONVERT_ZERO; - c->prog_data.param[this->uniforms * 4 + i] = NULL; + static float zero = 0; + + c->prog_data.param[this->uniforms * 4 + i] = &zero; } - this->uniform_size[this->uniforms] = type->vector_elements; + /* Track the size of this uniform vector, for future packing of + * uniforms. + */ + this->uniform_vector_size[this->uniforms] = type->vector_elements; this->uniforms++; return 1; @@ -416,6 +563,37 @@ vec4_visitor::setup_uniform_values(int loc, const glsl_type *type) } } +void +vec4_visitor::setup_uniform_clipplane_values() +{ + gl_clip_plane *clip_planes = brw_select_clip_planes(ctx); + + /* Pre-Gen6, we compact clip planes. For example, if the user + * enables just clip planes 0, 1, and 3, we will enable clip planes + * 0, 1, and 2 in the hardware, and we'll move clip plane 3 to clip + * plane 2. This simplifies the implementation of the Gen6 clip + * thread. + * + * In Gen6 and later, we don't compact clip planes, because this + * simplifies the implementation of gl_ClipDistance. + */ + int compacted_clipplane_index = 0; + for (int i = 0; i < c->key.nr_userclip_plane_consts; ++i) { + if (intel->gen < 6 && + !(c->key.userclip_planes_enabled_gen_4_5 & (1 << i))) { + continue; + } + this->uniform_vector_size[this->uniforms] = 4; + this->userplane[compacted_clipplane_index] = dst_reg(UNIFORM, this->uniforms); + this->userplane[compacted_clipplane_index].type = BRW_REGISTER_TYPE_F; + for (int j = 0; j < 4; ++j) { + c->prog_data.param[this->uniforms * 4 + j] = &clip_planes[i][j]; + } + ++compacted_clipplane_index; + ++this->uniforms; + } +} + /* Our support for builtin uniforms is even scarier than non-builtin. * It sits on top of the PROG_STATE_VAR parameters that are * automatically updated from GL context state. @@ -436,20 +614,18 @@ vec4_visitor::setup_builtin_uniform_values(ir_variable *ir) (gl_state_index *)slots[i].tokens); float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f; - this->uniform_size[this->uniforms] = 0; + this->uniform_vector_size[this->uniforms] = 0; /* Add each of the unique swizzled channels of the element. * This will end up matching the size of the glsl_type of this field. */ int last_swiz = -1; for (unsigned int j = 0; j < 4; j++) { int swiz = GET_SWZ(slots[i].swizzle, j); - if (swiz == last_swiz) - break; last_swiz = swiz; c->prog_data.param[this->uniforms * 4 + j] = &values[swiz]; - c->prog_data.param_convert[this->uniforms * 4 + j] = PARAM_NO_CONVERT; - this->uniform_size[this->uniforms]++; + if (swiz <= last_swiz) + this->uniform_vector_size[this->uniforms]++; } this->uniforms++; } @@ -462,59 +638,76 @@ vec4_visitor::variable_storage(ir_variable *var) } void -vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir) +vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir, uint32_t *predicate) { ir_expression *expr = ir->as_expression(); + *predicate = BRW_PREDICATE_NORMAL; + if (expr) { src_reg op[2]; vec4_instruction *inst; assert(expr->get_num_operands() <= 2); for (unsigned int i = 0; i < expr->get_num_operands(); i++) { - assert(expr->operands[i]->type->is_scalar()); - expr->operands[i]->accept(this); op[i] = this->result; + + resolve_ud_negate(&op[i]); } switch (expr->operation) { case ir_unop_logic_not: - inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], src_reg(1)); + inst = emit(AND(dst_null_d(), op[0], src_reg(1))); inst->conditional_mod = BRW_CONDITIONAL_Z; break; case ir_binop_logic_xor: - inst = emit(BRW_OPCODE_XOR, dst_null_d(), op[0], op[1]); + inst = emit(XOR(dst_null_d(), op[0], op[1])); inst->conditional_mod = BRW_CONDITIONAL_NZ; break; case ir_binop_logic_or: - inst = emit(BRW_OPCODE_OR, dst_null_d(), op[0], op[1]); + inst = emit(OR(dst_null_d(), op[0], op[1])); inst->conditional_mod = BRW_CONDITIONAL_NZ; break; case ir_binop_logic_and: - inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], op[1]); + inst = emit(AND(dst_null_d(), op[0], op[1])); inst->conditional_mod = BRW_CONDITIONAL_NZ; break; case ir_unop_f2b: if (intel->gen >= 6) { - inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0.0f)); + emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ)); } else { - inst = emit(BRW_OPCODE_MOV, dst_null_f(), op[0]); + inst = emit(MOV(dst_null_f(), op[0])); + inst->conditional_mod = BRW_CONDITIONAL_NZ; } - inst->conditional_mod = BRW_CONDITIONAL_NZ; break; case ir_unop_i2b: if (intel->gen >= 6) { - inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); + emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ)); } else { - inst = emit(BRW_OPCODE_MOV, dst_null_d(), op[0]); + inst = emit(MOV(dst_null_d(), op[0])); + inst->conditional_mod = BRW_CONDITIONAL_NZ; } - inst->conditional_mod = BRW_CONDITIONAL_NZ; + break; + + case ir_binop_all_equal: + inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z)); + *predicate = BRW_PREDICATE_ALIGN16_ALL4H; + break; + + case ir_binop_any_nequal: + inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ)); + *predicate = BRW_PREDICATE_ALIGN16_ANY4H; + break; + + case ir_unop_any: + inst = emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ)); + *predicate = BRW_PREDICATE_ALIGN16_ANY4H; break; case ir_binop_greater: @@ -522,12 +715,9 @@ vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir) case ir_binop_less: case ir_binop_lequal: case ir_binop_equal: - case ir_binop_all_equal: case ir_binop_nequal: - case ir_binop_any_nequal: - inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); - inst->conditional_mod = - brw_conditional_for_comparison(expr->operation); + emit(CMP(dst_null_d(), op[0], op[1], + brw_conditional_for_comparison(expr->operation))); break; default: @@ -539,12 +729,14 @@ vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir) ir->accept(this); + resolve_ud_negate(&this->result); + if (intel->gen >= 6) { - vec4_instruction *inst = emit(BRW_OPCODE_AND, dst_null_d(), - this->result, src_reg(1)); + vec4_instruction *inst = emit(AND(dst_null_d(), + this->result, src_reg(1))); inst->conditional_mod = BRW_CONDITIONAL_NZ; } else { - vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst_null_d(), this->result); + vec4_instruction *inst = emit(MOV(dst_null_d(), this->result)); inst->conditional_mod = BRW_CONDITIONAL_NZ; } } @@ -560,52 +752,41 @@ vec4_visitor::emit_if_gen6(ir_if *ir) if (expr) { src_reg op[2]; - vec4_instruction *inst; dst_reg temp; assert(expr->get_num_operands() <= 2); for (unsigned int i = 0; i < expr->get_num_operands(); i++) { - assert(expr->operands[i]->type->is_scalar() || - expr->operation == ir_binop_any_nequal || - expr->operation == ir_binop_all_equal); - expr->operands[i]->accept(this); op[i] = this->result; } switch (expr->operation) { case ir_unop_logic_not: - inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); - inst->conditional_mod = BRW_CONDITIONAL_Z; + emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_Z)); return; case ir_binop_logic_xor: - inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]); - inst->conditional_mod = BRW_CONDITIONAL_NZ; + emit(IF(op[0], op[1], BRW_CONDITIONAL_NZ)); return; case ir_binop_logic_or: temp = dst_reg(this, glsl_type::bool_type); - emit(BRW_OPCODE_OR, temp, op[0], op[1]); - inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0)); - inst->conditional_mod = BRW_CONDITIONAL_NZ; + emit(OR(temp, op[0], op[1])); + emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ)); return; case ir_binop_logic_and: temp = dst_reg(this, glsl_type::bool_type); - emit(BRW_OPCODE_AND, temp, op[0], op[1]); - inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0)); - inst->conditional_mod = BRW_CONDITIONAL_NZ; + emit(AND(temp, op[0], op[1])); + emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ)); return; case ir_unop_f2b: - inst = emit(BRW_OPCODE_IF, dst_null_f(), op[0], src_reg(0)); - inst->conditional_mod = BRW_CONDITIONAL_NZ; + emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ)); return; case ir_unop_i2b: - inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); - inst->conditional_mod = BRW_CONDITIONAL_NZ; + emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ)); return; case ir_binop_greater: @@ -614,31 +795,28 @@ vec4_visitor::emit_if_gen6(ir_if *ir) case ir_binop_lequal: case ir_binop_equal: case ir_binop_nequal: - inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]); - inst->conditional_mod = - brw_conditional_for_comparison(expr->operation); + emit(IF(op[0], op[1], + brw_conditional_for_comparison(expr->operation))); return; case ir_binop_all_equal: - inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]); - inst->conditional_mod = BRW_CONDITIONAL_Z; - - inst = emit(BRW_OPCODE_IF); - inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; + emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z)); + emit(IF(BRW_PREDICATE_ALIGN16_ALL4H)); return; case ir_binop_any_nequal: - inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]); - inst->conditional_mod = BRW_CONDITIONAL_NZ; + emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ)); + emit(IF(BRW_PREDICATE_ALIGN16_ANY4H)); + return; - inst = emit(BRW_OPCODE_IF); - inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; + case ir_unop_any: + emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ)); + emit(IF(BRW_PREDICATE_ALIGN16_ANY4H)); return; default: assert(!"not reached"); - inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); - inst->conditional_mod = BRW_CONDITIONAL_NZ; + emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ)); return; } return; @@ -646,9 +824,7 @@ vec4_visitor::emit_if_gen6(ir_if *ir) ir->condition->accept(this); - vec4_instruction *inst = emit(BRW_OPCODE_IF, dst_null_d(), - this->result, src_reg(0)); - inst->conditional_mod = BRW_CONDITIONAL_NZ; + emit(IF(this->result, src_reg(0), BRW_CONDITIONAL_NZ)); } void @@ -662,6 +838,19 @@ vec4_visitor::visit(ir_variable *ir) switch (ir->mode) { case ir_var_in: reg = new(mem_ctx) dst_reg(ATTR, ir->location); + + /* Do GL_FIXED rescaling for GLES2.0. Our GL_FIXED attributes + * come in as floating point conversions of the integer values. + */ + for (int i = ir->location; i < ir->location + type_size(ir->type); i++) { + if (!c->key.gl_fixed_input_size[i]) + continue; + + dst_reg dst = *reg; + dst.type = brw_type_for_base_type(ir->type); + dst.writemask = (1 << c->key.gl_fixed_input_size[i]) - 1; + emit(MUL(dst, src_reg(dst), src_reg(1.0f / 65536.0f))); + } break; case ir_var_out: @@ -670,7 +859,9 @@ vec4_visitor::visit(ir_variable *ir) for (int i = 0; i < type_size(ir->type); i++) { output_reg[ir->location + i] = *reg; output_reg[ir->location + i].reg_offset = i; - output_reg[ir->location + i].type = BRW_REGISTER_TYPE_F; + output_reg[ir->location + i].type = + brw_type_for_base_type(ir->type->get_scalar_type()); + output_reg_annotation[ir->location + i] = ir->name; } break; @@ -682,6 +873,11 @@ vec4_visitor::visit(ir_variable *ir) case ir_var_uniform: reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms); + /* Track how big the whole uniform variable is, in case we need to put a + * copy of its data into pull constants for array access. + */ + this->uniform_size[this->uniforms] = type_size(ir->type); + if (!strncmp(ir->name, "gl_", 3)) { setup_builtin_uniform_values(ir); } else { @@ -689,6 +885,27 @@ vec4_visitor::visit(ir_variable *ir) } break; + case ir_var_system_value: + /* VertexID is stored by the VF as the last vertex element, but + * we don't represent it with a flag in inputs_read, so we call + * it VERT_ATTRIB_MAX, which setup_attributes() picks up on. + */ + reg = new(mem_ctx) dst_reg(ATTR, VERT_ATTRIB_MAX); + prog_data->uses_vertexid = true; + + switch (ir->location) { + case SYSTEM_VALUE_VERTEX_ID: + reg->writemask = WRITEMASK_X; + break; + case SYSTEM_VALUE_INSTANCE_ID: + reg->writemask = WRITEMASK_Y; + break; + default: + assert(!"not reached"); + break; + } + break; + default: assert(!"not reached"); } @@ -700,58 +917,46 @@ vec4_visitor::visit(ir_variable *ir) void vec4_visitor::visit(ir_loop *ir) { - ir_dereference_variable *counter = NULL; - - fail("not yet\n"); + dst_reg counter; /* We don't want debugging output to print the whole body of the * loop as the annotation. */ this->base_ir = NULL; - if (ir->counter != NULL) - counter = new(ir) ir_dereference_variable(ir->counter); - - if (ir->from != NULL) { - assert(ir->counter != NULL); + if (ir->counter != NULL) { + this->base_ir = ir->counter; + ir->counter->accept(this); + counter = *(variable_storage(ir->counter)); - ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL); + if (ir->from != NULL) { + this->base_ir = ir->from; + ir->from->accept(this); - a->accept(this); - delete a; + emit(MOV(counter, this->result)); + } } emit(BRW_OPCODE_DO); if (ir->to) { - ir_expression *e = - new(ir) ir_expression(ir->cmp, glsl_type::bool_type, - counter, ir->to); - ir_if *if_stmt = new(ir) ir_if(e); + this->base_ir = ir->to; + ir->to->accept(this); - ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break); + emit(CMP(dst_null_d(), src_reg(counter), this->result, + brw_conditional_for_comparison(ir->cmp))); - if_stmt->then_instructions.push_tail(brk); - - if_stmt->accept(this); - - delete if_stmt; - delete e; - delete brk; + vec4_instruction *inst = emit(BRW_OPCODE_BREAK); + inst->predicate = BRW_PREDICATE_NORMAL; } visit_instructions(&ir->body_instructions); - if (ir->increment) { - ir_expression *e = - new(ir) ir_expression(ir_binop_add, counter->type, - counter, ir->increment); - ir_assignment *a = new(ir) ir_assignment(counter, e, NULL); - - a->accept(this); - delete a; - delete e; + if (ir->increment) { + this->base_ir = ir->increment; + ir->increment->accept(this); + emit(ADD(counter, src_reg(counter), this->result)); } emit(BRW_OPCODE_WHILE); @@ -796,7 +1001,7 @@ vec4_visitor::visit(ir_function *ir) } } -GLboolean +bool vec4_visitor::try_emit_sat(ir_expression *ir) { ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); @@ -808,7 +1013,7 @@ vec4_visitor::try_emit_sat(ir_expression *ir) this->result = src_reg(this, ir->type); vec4_instruction *inst; - inst = emit(BRW_OPCODE_MOV, dst_reg(this->result), src); + inst = emit(MOV(dst_reg(this->result), src)); inst->saturate = true; return true; @@ -822,11 +1027,10 @@ vec4_visitor::emit_bool_comparison(unsigned int op, if (intel->gen < 5) dst.type = src0.type; - vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst, src0, src1); - inst->conditional_mod = brw_conditional_for_comparison(op); + emit(CMP(dst, src0, src1, brw_conditional_for_comparison(op))); dst.type = BRW_REGISTER_TYPE_D; - emit(BRW_OPCODE_AND, dst, src_reg(dst), src_reg(0x1)); + emit(AND(dst, src_reg(dst), src_reg(0x1))); } void @@ -884,7 +1088,7 @@ vec4_visitor::visit(ir_expression *ir) /* Note that BRW_OPCODE_NOT is not appropriate here, since it is * ones complement of the whole register, not just bit 0. */ - emit(BRW_OPCODE_XOR, result_dst, op[0], src_reg(1)); + emit(XOR(result_dst, op[0], src_reg(1))); break; case ir_unop_neg: op[0].negate = !op[0].negate; @@ -897,16 +1101,14 @@ vec4_visitor::visit(ir_expression *ir) break; case ir_unop_sign: - emit(BRW_OPCODE_MOV, result_dst, src_reg(0.0f)); + emit(MOV(result_dst, src_reg(0.0f))); - inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f)); - inst->conditional_mod = BRW_CONDITIONAL_G; - inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1.0f)); + emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_G)); + inst = emit(MOV(result_dst, src_reg(1.0f))); inst->predicate = BRW_PREDICATE_NORMAL; - inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f)); - inst->conditional_mod = BRW_CONDITIONAL_L; - inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(-1.0f)); + emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_L)); + inst = emit(MOV(result_dst, src_reg(-1.0f))); inst->predicate = BRW_PREDICATE_NORMAL; break; @@ -944,19 +1146,40 @@ vec4_visitor::visit(ir_expression *ir) break; case ir_binop_add: - emit(BRW_OPCODE_ADD, result_dst, op[0], op[1]); + emit(ADD(result_dst, op[0], op[1])); break; case ir_binop_sub: assert(!"not reached: should be handled by ir_sub_to_add_neg"); break; case ir_binop_mul: - emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]); + if (ir->type->is_integer()) { + /* For integer multiplication, the MUL uses the low 16 bits + * of one of the operands (src0 on gen6, src1 on gen7). The + * MACH accumulates in the contribution of the upper 16 bits + * of that operand. + * + * FINISHME: Emit just the MUL if we know an operand is small + * enough. + */ + struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D); + + emit(MUL(acc, op[0], op[1])); + emit(MACH(dst_null_d(), op[0], op[1])); + emit(MOV(result_dst, src_reg(acc))); + } else { + emit(MUL(result_dst, op[0], op[1])); + } break; case ir_binop_div: - assert(!"not reached: should be handled by ir_div_to_mul_rcp"); + /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */ + assert(ir->type->is_integer()); + emit_math(SHADER_OPCODE_INT_QUOTIENT, result_dst, op[0], op[1]); + break; case ir_binop_mod: - assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); + /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */ + assert(ir->type->is_integer()); + emit_math(SHADER_OPCODE_INT_REMAINDER, result_dst, op[0], op[1]); break; case ir_binop_less: @@ -965,14 +1188,9 @@ vec4_visitor::visit(ir_expression *ir) case ir_binop_gequal: case ir_binop_equal: case ir_binop_nequal: { - dst_reg temp = result_dst; - /* original gen4 does implicit conversion before comparison. */ - if (intel->gen < 5) - temp.type = op[0].type; - - inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); - inst->conditional_mod = brw_conditional_for_comparison(ir->operation); - emit(BRW_OPCODE_AND, result_dst, this->result, src_reg(0x1)); + emit(CMP(result_dst, op[0], op[1], + brw_conditional_for_comparison(ir->operation))); + emit(AND(result_dst, result_src, src_reg(0x1))); break; } @@ -980,65 +1198,48 @@ vec4_visitor::visit(ir_expression *ir) /* "==" operator producing a scalar boolean. */ if (ir->operands[0]->type->is_vector() || ir->operands[1]->type->is_vector()) { - inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); - inst->conditional_mod = BRW_CONDITIONAL_Z; - - emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); - inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); + emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z)); + emit(MOV(result_dst, src_reg(0))); + inst = emit(MOV(result_dst, src_reg(1))); inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; } else { - dst_reg temp = result_dst; - /* original gen4 does implicit conversion before comparison. */ - if (intel->gen < 5) - temp.type = op[0].type; - - inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1)); + emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_Z)); + emit(AND(result_dst, result_src, src_reg(0x1))); } break; case ir_binop_any_nequal: /* "!=" operator producing a scalar boolean. */ if (ir->operands[0]->type->is_vector() || ir->operands[1]->type->is_vector()) { - inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); - inst->conditional_mod = BRW_CONDITIONAL_NZ; + emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ)); - emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); - inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); + emit(MOV(result_dst, src_reg(0))); + inst = emit(MOV(result_dst, src_reg(1))); inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; } else { - dst_reg temp = result_dst; - /* original gen4 does implicit conversion before comparison. */ - if (intel->gen < 5) - temp.type = op[0].type; - - inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1)); + emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_NZ)); + emit(AND(result_dst, result_src, src_reg(0x1))); } break; case ir_unop_any: - inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); - inst->conditional_mod = BRW_CONDITIONAL_NZ; + emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ)); + emit(MOV(result_dst, src_reg(0))); - emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); - - inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); + inst = emit(MOV(result_dst, src_reg(1))); inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; break; case ir_binop_logic_xor: - emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]); + emit(XOR(result_dst, op[0], op[1])); break; case ir_binop_logic_or: - emit(BRW_OPCODE_OR, result_dst, op[0], op[1]); + emit(OR(result_dst, op[0], op[1])); break; case ir_binop_logic_and: - emit(BRW_OPCODE_AND, result_dst, op[0], op[1]); + emit(AND(result_dst, op[0], op[1])); break; case ir_binop_dot: @@ -1060,52 +1261,54 @@ vec4_visitor::visit(ir_expression *ir) case ir_unop_b2f: case ir_unop_b2i: case ir_unop_f2i: - emit(BRW_OPCODE_MOV, result_dst, op[0]); + emit(MOV(result_dst, op[0])); break; case ir_unop_f2b: case ir_unop_i2b: { - dst_reg temp = result_dst; - /* original gen4 does implicit conversion before comparison. */ - if (intel->gen < 5) - temp.type = op[0].type; - - inst = emit(BRW_OPCODE_CMP, temp, op[0], src_reg(0.0f)); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - inst = emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(1)); + emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ)); + emit(AND(result_dst, result_src, src_reg(1))); break; } case ir_unop_trunc: - emit(BRW_OPCODE_RNDZ, result_dst, op[0]); + emit(RNDZ(result_dst, op[0])); break; case ir_unop_ceil: op[0].negate = !op[0].negate; - inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]); + inst = emit(RNDD(result_dst, op[0])); this->result.negate = true; break; case ir_unop_floor: - inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]); + inst = emit(RNDD(result_dst, op[0])); break; case ir_unop_fract: - inst = emit(BRW_OPCODE_FRC, result_dst, op[0]); + inst = emit(FRC(result_dst, op[0])); break; case ir_unop_round_even: - emit(BRW_OPCODE_RNDE, result_dst, op[0]); + emit(RNDE(result_dst, op[0])); break; case ir_binop_min: - inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]); - inst->conditional_mod = BRW_CONDITIONAL_L; + if (intel->gen >= 6) { + inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_L; + } else { + emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_L)); - inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]); - inst->predicate = BRW_PREDICATE_NORMAL; + inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]); + inst->predicate = BRW_PREDICATE_NORMAL; + } break; case ir_binop_max: - inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]); - inst->conditional_mod = BRW_CONDITIONAL_G; + if (intel->gen >= 6) { + inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_G; + } else { + emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_G)); - inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]); - inst->predicate = BRW_PREDICATE_NORMAL; + inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]); + inst->predicate = BRW_PREDICATE_NORMAL; + } break; case ir_binop_pow: @@ -1113,21 +1316,27 @@ vec4_visitor::visit(ir_expression *ir) break; case ir_unop_bit_not: - inst = emit(BRW_OPCODE_NOT, result_dst, op[0]); + inst = emit(NOT(result_dst, op[0])); break; case ir_binop_bit_and: - inst = emit(BRW_OPCODE_AND, result_dst, op[0], op[1]); + inst = emit(AND(result_dst, op[0], op[1])); break; case ir_binop_bit_xor: - inst = emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]); + inst = emit(XOR(result_dst, op[0], op[1])); break; case ir_binop_bit_or: - inst = emit(BRW_OPCODE_OR, result_dst, op[0], op[1]); + inst = emit(OR(result_dst, op[0], op[1])); break; case ir_binop_lshift: + inst = emit(BRW_OPCODE_SHL, result_dst, op[0], op[1]); + break; + case ir_binop_rshift: - assert(!"GLSL 1.30 features unsupported"); + if (ir->type->base_type == GLSL_TYPE_INT) + inst = emit(BRW_OPCODE_ASR, result_dst, op[0], op[1]); + else + inst = emit(BRW_OPCODE_SHR, result_dst, op[0], op[1]); break; case ir_quadop_vector: @@ -1225,14 +1434,13 @@ vec4_visitor::visit(ir_dereference_array *ir) } else { index_reg = src_reg(this, glsl_type::int_type); - emit(BRW_OPCODE_MUL, dst_reg(index_reg), - this->result, src_reg(element_size)); + emit(MUL(dst_reg(index_reg), this->result, src_reg(element_size))); } if (src.reladdr) { src_reg temp = src_reg(this, glsl_type::int_type); - emit(BRW_OPCODE_ADD, dst_reg(temp), *src.reladdr, index_reg); + emit(ADD(dst_reg(temp), *src.reladdr, index_reg)); index_reg = temp; } @@ -1303,18 +1511,18 @@ get_assignment_lhs(ir_dereference *ir, vec4_visitor *v) void vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src, - const struct glsl_type *type, bool predicated) + const struct glsl_type *type, uint32_t predicate) { if (type->base_type == GLSL_TYPE_STRUCT) { for (unsigned int i = 0; i < type->length; i++) { - emit_block_move(dst, src, type->fields.structure[i].type, predicated); + emit_block_move(dst, src, type->fields.structure[i].type, predicate); } return; } if (type->is_array()) { for (unsigned int i = 0; i < type->length; i++) { - emit_block_move(dst, src, type->fields.array, predicated); + emit_block_move(dst, src, type->fields.array, predicate); } return; } @@ -1326,7 +1534,7 @@ vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src, type->vector_elements, 1); for (int i = 0; i < type->matrix_columns; i++) { - emit_block_move(dst, src, vec_type, predicated); + emit_block_move(dst, src, vec_type, predicate); } return; } @@ -1338,22 +1546,78 @@ vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src, dst->writemask = (1 << type->vector_elements) - 1; - /* Do we need to worry about swizzling a swizzle? */ - assert(src->swizzle = BRW_SWIZZLE_NOOP); src->swizzle = swizzle_for_size(type->vector_elements); - vec4_instruction *inst = emit(BRW_OPCODE_MOV, *dst, *src); - if (predicated) - inst->predicate = BRW_PREDICATE_NORMAL; + vec4_instruction *inst = emit(MOV(*dst, *src)); + inst->predicate = predicate; dst->reg_offset++; src->reg_offset++; } + +/* If the RHS processing resulted in an instruction generating a + * temporary value, and it would be easy to rewrite the instruction to + * generate its result right into the LHS instead, do so. This ends + * up reliably removing instructions where it can be tricky to do so + * later without real UD chain information. + */ +bool +vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir, + dst_reg dst, + src_reg src, + vec4_instruction *pre_rhs_inst, + vec4_instruction *last_rhs_inst) +{ + /* This could be supported, but it would take more smarts. */ + if (ir->condition) + return false; + + if (pre_rhs_inst == last_rhs_inst) + return false; /* No instructions generated to work with. */ + + /* Make sure the last instruction generated our source reg. */ + if (src.file != GRF || + src.file != last_rhs_inst->dst.file || + src.reg != last_rhs_inst->dst.reg || + src.reg_offset != last_rhs_inst->dst.reg_offset || + src.reladdr || + src.abs || + src.negate || + last_rhs_inst->predicate != BRW_PREDICATE_NONE) + return false; + + /* Check that that last instruction fully initialized the channels + * we want to use, in the order we want to use them. We could + * potentially reswizzle the operands of many instructions so that + * we could handle out of order channels, but don't yet. + */ + + for (unsigned i = 0; i < 4; i++) { + if (dst.writemask & (1 << i)) { + if (!(last_rhs_inst->dst.writemask & (1 << i))) + return false; + + if (BRW_GET_SWZ(src.swizzle, i) != i) + return false; + } + } + + /* Success! Rewrite the instruction. */ + last_rhs_inst->dst.file = dst.file; + last_rhs_inst->dst.reg = dst.reg; + last_rhs_inst->dst.reg_offset = dst.reg_offset; + last_rhs_inst->dst.reladdr = dst.reladdr; + last_rhs_inst->dst.writemask &= dst.writemask; + + return true; +} + void vec4_visitor::visit(ir_assignment *ir) { dst_reg dst = get_assignment_lhs(ir->lhs, this); + uint32_t predicate = BRW_PREDICATE_NONE; if (!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()) { @@ -1361,17 +1625,32 @@ vec4_visitor::visit(ir_assignment *ir) src_reg src = this->result; if (ir->condition) { - emit_bool_to_cond_code(ir->condition); + emit_bool_to_cond_code(ir->condition, &predicate); } - emit_block_move(&dst, &src, ir->rhs->type, ir->condition != NULL); + /* emit_block_move doesn't account for swizzles in the source register. + * This should be ok, since the source register is a structure or an + * array, and those can't be swizzled. But double-check to be sure. + */ + assert(src.swizzle == + (ir->rhs->type->is_matrix() + ? swizzle_for_size(ir->rhs->type->vector_elements) + : BRW_SWIZZLE_NOOP)); + + emit_block_move(&dst, &src, ir->rhs->type, predicate); return; } /* Now we're down to just a scalar/vector with writemasks. */ int i; + vec4_instruction *pre_rhs_inst, *last_rhs_inst; + pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail(); + ir->rhs->accept(this); + + last_rhs_inst = (vec4_instruction *)this->instructions.get_tail(); + src_reg src = this->result; int swizzles[4]; @@ -1404,15 +1683,17 @@ vec4_visitor::visit(ir_assignment *ir) src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1], swizzles[2], swizzles[3]); + if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) { + return; + } + if (ir->condition) { - emit_bool_to_cond_code(ir->condition); + emit_bool_to_cond_code(ir->condition, &predicate); } for (i = 0; i < type_size(ir->lhs->type); i++) { - vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src); - - if (ir->condition) - inst->predicate = BRW_PREDICATE_NORMAL; + vec4_instruction *inst = emit(MOV(dst, src)); + inst->predicate = predicate; dst.reg_offset++; src.reg_offset++; @@ -1440,39 +1721,64 @@ vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir) if (ir->type->is_matrix()) { for (int i = 0; i < ir->type->matrix_columns; i++) { + float *vec = &ir->value.f[i * ir->type->vector_elements]; + for (int j = 0; j < ir->type->vector_elements; j++) { dst->writemask = 1 << j; dst->type = BRW_REGISTER_TYPE_F; - emit(BRW_OPCODE_MOV, *dst, - src_reg(ir->value.f[i * ir->type->vector_elements + j])); + emit(MOV(*dst, src_reg(vec[j]))); } dst->reg_offset++; } return; } + int remaining_writemask = (1 << ir->type->vector_elements) - 1; + for (int i = 0; i < ir->type->vector_elements; i++) { + if (!(remaining_writemask & (1 << i))) + continue; + dst->writemask = 1 << i; dst->type = brw_type_for_base_type(ir->type); + /* Find other components that match the one we're about to + * write. Emits fewer instructions for things like vec4(0.5, + * 1.5, 1.5, 1.5). + */ + for (int j = i + 1; j < ir->type->vector_elements; j++) { + if (ir->type->base_type == GLSL_TYPE_BOOL) { + if (ir->value.b[i] == ir->value.b[j]) + dst->writemask |= (1 << j); + } else { + /* u, i, and f storage all line up, so no need for a + * switch case for comparing each type. + */ + if (ir->value.u[i] == ir->value.u[j]) + dst->writemask |= (1 << j); + } + } + switch (ir->type->base_type) { case GLSL_TYPE_FLOAT: - emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.f[i])); + emit(MOV(*dst, src_reg(ir->value.f[i]))); break; case GLSL_TYPE_INT: - emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.i[i])); + emit(MOV(*dst, src_reg(ir->value.i[i]))); break; case GLSL_TYPE_UINT: - emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.u[i])); + emit(MOV(*dst, src_reg(ir->value.u[i]))); break; case GLSL_TYPE_BOOL: - emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.b[i])); + emit(MOV(*dst, src_reg(ir->value.b[i]))); break; default: assert(!"Non-float/uint/int/bool constant"); break; } + + remaining_writemask &= ~dst->writemask; } dst->reg_offset++; } @@ -1495,7 +1801,178 @@ vec4_visitor::visit(ir_call *ir) void vec4_visitor::visit(ir_texture *ir) { - assert(!"not reached"); + int sampler = _mesa_get_sampler_uniform_value(ir->sampler, prog, &vp->Base); + sampler = vp->Base.SamplerUnits[sampler]; + + /* Should be lowered by do_lower_texture_projection */ + assert(!ir->projector); + + vec4_instruction *inst = NULL; + switch (ir->op) { + case ir_tex: + case ir_txl: + inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXL); + break; + case ir_txd: + inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXD); + break; + case ir_txf: + inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXF); + break; + case ir_txs: + inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXS); + break; + case ir_txb: + assert(!"TXB is not valid for vertex shaders."); + } + + /* Texel offsets go in the message header; Gen4 also requires headers. */ + inst->header_present = ir->offset || intel->gen < 5; + inst->base_mrf = 2; + inst->mlen = inst->header_present + 1; /* always at least one */ + inst->sampler = sampler; + inst->dst = dst_reg(this, ir->type); + inst->shadow_compare = ir->shadow_comparitor != NULL; + + if (ir->offset != NULL) + inst->texture_offset = brw_texture_offset(ir->offset->as_constant()); + + /* MRF for the first parameter */ + int param_base = inst->base_mrf + inst->header_present; + + if (ir->op == ir_txs) { + ir->lod_info.lod->accept(this); + int writemask = intel->gen == 4 ? WRITEMASK_W : WRITEMASK_X; + emit(MOV(dst_reg(MRF, param_base, ir->lod_info.lod->type, writemask), + this->result)); + } else { + int i, coord_mask = 0, zero_mask = 0; + /* Load the coordinate */ + /* FINISHME: gl_clamp_mask and saturate */ + for (i = 0; i < ir->coordinate->type->vector_elements; i++) + coord_mask |= (1 << i); + for (; i < 4; i++) + zero_mask |= (1 << i); + + ir->coordinate->accept(this); + emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, coord_mask), + this->result)); + emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask), + src_reg(0))); + /* Load the shadow comparitor */ + if (ir->shadow_comparitor) { + ir->shadow_comparitor->accept(this); + emit(MOV(dst_reg(MRF, param_base + 1, ir->shadow_comparitor->type, + WRITEMASK_X), + this->result)); + inst->mlen++; + } + + /* Load the LOD info */ + if (ir->op == ir_txl) { + int mrf, writemask; + if (intel->gen >= 5) { + mrf = param_base + 1; + if (ir->shadow_comparitor) { + writemask = WRITEMASK_Y; + /* mlen already incremented */ + } else { + writemask = WRITEMASK_X; + inst->mlen++; + } + } else /* intel->gen == 4 */ { + mrf = param_base; + writemask = WRITEMASK_Z; + } + ir->lod_info.lod->accept(this); + emit(MOV(dst_reg(MRF, mrf, ir->lod_info.lod->type, writemask), + this->result)); + } else if (ir->op == ir_txf) { + ir->lod_info.lod->accept(this); + emit(MOV(dst_reg(MRF, param_base, ir->lod_info.lod->type, WRITEMASK_W), + this->result)); + } else if (ir->op == ir_txd) { + const glsl_type *type = ir->lod_info.grad.dPdx->type; + + ir->lod_info.grad.dPdx->accept(this); + src_reg dPdx = this->result; + ir->lod_info.grad.dPdy->accept(this); + src_reg dPdy = this->result; + + if (intel->gen >= 5) { + dPdx.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y); + dPdy.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y); + emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XZ), dPdx)); + emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_YW), dPdy)); + inst->mlen++; + + if (ir->type->vector_elements == 3) { + dPdx.swizzle = BRW_SWIZZLE_ZZZZ; + dPdy.swizzle = BRW_SWIZZLE_ZZZZ; + emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_X), dPdx)); + emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_Y), dPdy)); + inst->mlen++; + } + } else /* intel->gen == 4 */ { + emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XYZ), dPdx)); + emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_XYZ), dPdy)); + inst->mlen += 2; + } + } + } + + emit(inst); + + swizzle_result(ir, src_reg(inst->dst), sampler); +} + +void +vec4_visitor::swizzle_result(ir_texture *ir, src_reg orig_val, int sampler) +{ + this->result = orig_val; + + int s = c->key.tex.swizzles[sampler]; + + if (ir->op == ir_txs || ir->type == glsl_type::float_type + || s == SWIZZLE_NOOP) + return; + + int zero_mask = 0, one_mask = 0, copy_mask = 0; + int swizzle[4]; + + for (int i = 0; i < 4; i++) { + switch (GET_SWZ(s, i)) { + case SWIZZLE_ZERO: + zero_mask |= (1 << i); + break; + case SWIZZLE_ONE: + one_mask |= (1 << i); + break; + default: + copy_mask |= (1 << i); + swizzle[i] = GET_SWZ(s, i); + break; + } + } + + this->result = src_reg(this, ir->type); + dst_reg swizzled_result(this->result); + + if (copy_mask) { + orig_val.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); + swizzled_result.writemask = copy_mask; + emit(MOV(swizzled_result, orig_val)); + } + + if (zero_mask) { + swizzled_result.writemask = zero_mask; + emit(MOV(swizzled_result, src_reg(0.0f))); + } + + if (one_mask) { + swizzled_result.writemask = one_mask; + emit(MOV(swizzled_result, src_reg(1.0f))); + } } void @@ -1521,9 +1998,9 @@ vec4_visitor::visit(ir_if *ir) if (intel->gen == 6) { emit_if_gen6(ir); } else { - emit_bool_to_cond_code(ir->condition); - vec4_instruction *inst = emit(BRW_OPCODE_IF); - inst->predicate = BRW_PREDICATE_NORMAL; + uint32_t predicate; + emit_bool_to_cond_code(ir->condition, &predicate); + emit(IF(predicate)); } visit_instructions(&ir->then_instructions); @@ -1539,14 +2016,15 @@ vec4_visitor::visit(ir_if *ir) emit(BRW_OPCODE_ENDIF); } -int -vec4_visitor::emit_vue_header_gen4(int header_mrf) +void +vec4_visitor::emit_ndc_computation() { /* Get the position */ src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]); /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */ dst_reg ndc = dst_reg(this, glsl_type::vec4_type); + output_reg[BRW_VERT_RESULT_NDC] = ndc; current_annotation = "NDC"; dst_reg ndc_w = ndc; @@ -1558,32 +2036,39 @@ vec4_visitor::emit_vue_header_gen4(int header_mrf) dst_reg ndc_xyz = ndc; ndc_xyz.writemask = WRITEMASK_XYZ; - emit(BRW_OPCODE_MUL, ndc_xyz, pos, src_reg(ndc_w)); + emit(MUL(ndc_xyz, pos, src_reg(ndc_w))); +} - if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) || - c->key.nr_userclip || brw->has_negative_rhw_bug) { +void +vec4_visitor::emit_psiz_and_flags(struct brw_reg reg) +{ + if (intel->gen < 6 && + ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) || + c->key.userclip_active || brw->has_negative_rhw_bug)) { dst_reg header1 = dst_reg(this, glsl_type::uvec4_type); + dst_reg header1_w = header1; + header1_w.writemask = WRITEMASK_W; GLuint i; - emit(BRW_OPCODE_MOV, header1, 0u); + emit(MOV(header1, 0u)); if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { - assert(!"finishme: psiz"); - src_reg psiz; + src_reg psiz = src_reg(output_reg[VERT_RESULT_PSIZ]); - header1.writemask = WRITEMASK_W; - emit(BRW_OPCODE_MUL, header1, psiz, 1u << 11); - emit(BRW_OPCODE_AND, header1, src_reg(header1), 0x7ff << 8); + current_annotation = "Point size"; + emit(MUL(header1_w, psiz, src_reg((float)(1 << 11)))); + emit(AND(header1_w, src_reg(header1_w), 0x7ff << 8)); } - for (i = 0; i < c->key.nr_userclip; i++) { + current_annotation = "Clipping flags"; + for (i = 0; i < c->key.nr_userclip_plane_consts; i++) { vec4_instruction *inst; - inst = emit(BRW_OPCODE_DP4, dst_reg(brw_null_reg()), - pos, src_reg(c->userplane[i])); + inst = emit(DP4(dst_null_f(), src_reg(output_reg[VERT_RESULT_HPOS]), + src_reg(this->userplane[i]))); inst->conditional_mod = BRW_CONDITIONAL_L; - emit(BRW_OPCODE_OR, header1, src_reg(header1), 1u << i); + inst = emit(OR(header1_w, src_reg(header1_w), 1u << i)); inst->predicate = BRW_PREDICATE_NORMAL; } @@ -1602,104 +2087,120 @@ vec4_visitor::emit_vue_header_gen4(int header_mrf) brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_L, - brw_swizzle1(ndc, 3), + brw_swizzle1(output_reg[BRW_VERT_RESULT_NDC], 3), brw_imm_f(0)); brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6)); - brw_MOV(p, ndc, brw_imm_f(0)); + brw_MOV(p, output_reg[BRW_VERT_RESULT_NDC], brw_imm_f(0)); brw_set_predicate_control(p, BRW_PREDICATE_NONE); #endif } - header1.writemask = WRITEMASK_XYZW; - emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(header1)); - } else { - emit(BRW_OPCODE_MOV, retype(brw_message_reg(header_mrf++), - BRW_REGISTER_TYPE_UD), 0u); - } - - if (intel->gen == 5) { - /* There are 20 DWs (D0-D19) in VUE header on Ironlake: - * dword 0-3 (m1) of the header is indices, point width, clip flags. - * dword 4-7 (m2) is the ndc position (set above) - * dword 8-11 (m3) of the vertex header is the 4D space position - * dword 12-19 (m4,m5) of the vertex header is the user clip distance. - * m6 is a pad so that the vertex element data is aligned - * m7 is the first vertex data we fill. - */ - current_annotation = "NDC"; - emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc)); - - current_annotation = "gl_Position"; - emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos); - - /* user clip distance. */ - header_mrf += 2; - - /* Pad so that vertex element data is aligned. */ - header_mrf++; + emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), src_reg(header1))); + } else if (intel->gen < 6) { + emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), 0u)); } else { - /* There are 8 dwords in VUE header pre-Ironlake: - * dword 0-3 (m1) is indices, point width, clip flags. - * dword 4-7 (m2) is ndc position (set above) - * - * dword 8-11 (m3) is the first vertex data. - */ - current_annotation = "NDC"; - emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc)); - - current_annotation = "gl_Position"; - emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos); + emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), src_reg(0))); + if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { + emit(MOV(brw_writemask(reg, WRITEMASK_W), + src_reg(output_reg[VERT_RESULT_PSIZ]))); + } } - - return header_mrf; } -int -vec4_visitor::emit_vue_header_gen6(int header_mrf) +void +vec4_visitor::emit_clip_distances(struct brw_reg reg, int offset) { - struct brw_reg reg; + if (intel->gen < 6) { + /* Clip distance slots are set aside in gen5, but they are not used. It + * is not clear whether we actually need to set aside space for them, + * but the performance cost is negligible. + */ + return; + } - /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge: - * dword 0-3 (m2) of the header is indices, point width, clip flags. - * dword 4-7 (m3) is the 4D space position - * dword 8-15 (m4,m5) of the vertex header is the user clip distance if - * enabled. + /* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables): * - * m4 or 6 is the first vertex element data we fill. + * "If a linked set of shaders forming the vertex stage contains no + * static write to gl_ClipVertex or gl_ClipDistance, but the + * application has requested clipping against user clip planes through + * the API, then the coordinate written to gl_Position is used for + * comparison against the user clip planes." + * + * This function is only called if the shader didn't write to + * gl_ClipDistance. Accordingly, we use gl_ClipVertex to perform clipping + * if the user wrote to it; otherwise we use gl_Position. */ + gl_vert_result clip_vertex = VERT_RESULT_CLIP_VERTEX; + if (!(c->prog_data.outputs_written + & BITFIELD64_BIT(VERT_RESULT_CLIP_VERTEX))) { + clip_vertex = VERT_RESULT_HPOS; + } - current_annotation = "indices, point width, clip flags"; - reg = brw_message_reg(header_mrf++); - emit(BRW_OPCODE_MOV, retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)); - if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { - emit(BRW_OPCODE_MOV, brw_writemask(reg, WRITEMASK_W), - src_reg(output_reg[VERT_RESULT_PSIZ])); - } - - current_annotation = "gl_Position"; - emit(BRW_OPCODE_MOV, - brw_message_reg(header_mrf++), src_reg(output_reg[VERT_RESULT_HPOS])); - - current_annotation = "user clip distances"; - if (c->key.nr_userclip) { - for (int i = 0; i < c->key.nr_userclip; i++) { - struct brw_reg m; - if (i < 4) - m = brw_message_reg(header_mrf); - else - m = brw_message_reg(header_mrf + 1); - - emit(BRW_OPCODE_DP4, - dst_reg(brw_writemask(m, 1 << (i & 3))), - src_reg(c->userplane[i])); - } - header_mrf += 2; + for (int i = 0; i + offset < c->key.nr_userclip_plane_consts && i < 4; + ++i) { + emit(DP4(dst_reg(brw_writemask(reg, 1 << i)), + src_reg(output_reg[clip_vertex]), + src_reg(this->userplane[i + offset]))); } +} - current_annotation = NULL; +void +vec4_visitor::emit_generic_urb_slot(dst_reg reg, int vert_result) +{ + assert (vert_result < VERT_RESULT_MAX); + reg.type = output_reg[vert_result].type; + current_annotation = output_reg_annotation[vert_result]; + /* Copy the register, saturating if necessary */ + vec4_instruction *inst = emit(MOV(reg, + src_reg(output_reg[vert_result]))); + if ((vert_result == VERT_RESULT_COL0 || + vert_result == VERT_RESULT_COL1 || + vert_result == VERT_RESULT_BFC0 || + vert_result == VERT_RESULT_BFC1) && + c->key.clamp_vertex_color) { + inst->saturate = true; + } +} - return header_mrf; +void +vec4_visitor::emit_urb_slot(int mrf, int vert_result) +{ + struct brw_reg hw_reg = brw_message_reg(mrf); + dst_reg reg = dst_reg(MRF, mrf); + reg.type = BRW_REGISTER_TYPE_F; + + switch (vert_result) { + case VERT_RESULT_PSIZ: + /* PSIZ is always in slot 0, and is coupled with other flags. */ + current_annotation = "indices, point width, clip flags"; + emit_psiz_and_flags(hw_reg); + break; + case BRW_VERT_RESULT_NDC: + current_annotation = "NDC"; + emit(MOV(reg, src_reg(output_reg[BRW_VERT_RESULT_NDC]))); + break; + case BRW_VERT_RESULT_HPOS_DUPLICATE: + case VERT_RESULT_HPOS: + current_annotation = "gl_Position"; + emit(MOV(reg, src_reg(output_reg[VERT_RESULT_HPOS]))); + break; + case VERT_RESULT_CLIP_DIST0: + case VERT_RESULT_CLIP_DIST1: + if (this->c->key.uses_clip_distance) { + emit_generic_urb_slot(reg, vert_result); + } else { + current_annotation = "user clip distances"; + emit_clip_distances(hw_reg, (vert_result - VERT_RESULT_CLIP_DIST0) * 4); + } + break; + case BRW_VERT_RESULT_PAD: + /* No need to write to this slot */ + break; + default: + emit_generic_urb_slot(reg, vert_result); + break; + } } static int @@ -1737,14 +2238,18 @@ vec4_visitor::emit_urb_writes() */ int base_mrf = 1; int mrf = base_mrf; - int urb_entry_size; - uint64_t outputs_remaining = c->prog_data.outputs_written; /* In the process of generating our URB write message contents, we * may need to unspill a register or load from an array. Those * reads would use MRFs 14-15. */ int max_usable_mrf = 13; + /* The following assertion verifies that max_usable_mrf causes an + * even-numbered amount of URB write data, which will meet gen6's + * requirements for length alignment. + */ + assert ((max_usable_mrf - base_mrf) % 2 == 0); + /* FINISHME: edgeflag */ /* First mrf is the g0-based message header containing URB handles and such, @@ -1752,63 +2257,41 @@ vec4_visitor::emit_urb_writes() */ mrf++; - if (intel->gen >= 6) { - mrf = emit_vue_header_gen6(mrf); - } else { - mrf = emit_vue_header_gen4(mrf); + if (intel->gen < 6) { + emit_ndc_computation(); } /* Set up the VUE data for the first URB write */ - int attr; - for (attr = 0; attr < VERT_RESULT_MAX; attr++) { - if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) - continue; - - outputs_remaining &= ~BITFIELD64_BIT(attr); - - /* This is set up in the VUE header. */ - if (attr == VERT_RESULT_HPOS) - continue; - - /* This is loaded into the VUE header, and thus doesn't occupy - * an attribute slot. - */ - if (attr == VERT_RESULT_PSIZ) - continue; - - emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr])); + int slot; + for (slot = 0; slot < c->prog_data.vue_map.num_slots; ++slot) { + emit_urb_slot(mrf++, c->prog_data.vue_map.slot_to_vert_result[slot]); - /* If this was MRF 15, we can't fit anything more into this URB - * WRITE. Note that base_mrf of 1 means that MRF 15 is an - * even-numbered amount of URB write data, which will meet - * gen6's requirements for length alignment. + /* If this was max_usable_mrf, we can't fit anything more into this URB + * WRITE. */ if (mrf > max_usable_mrf) { - attr++; + slot++; break; } } + current_annotation = "URB write"; vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE); inst->base_mrf = base_mrf; inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf); - inst->eot = !outputs_remaining; - - urb_entry_size = mrf - base_mrf; + inst->eot = (slot >= c->prog_data.vue_map.num_slots); /* Optional second URB write */ - if (outputs_remaining) { + if (!inst->eot) { mrf = base_mrf + 1; - for (; attr < VERT_RESULT_MAX; attr++) { - if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) - continue; - + for (; slot < c->prog_data.vue_map.num_slots; ++slot) { assert(mrf < max_usable_mrf); - emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr])); + emit_urb_slot(mrf++, c->prog_data.vue_map.slot_to_vert_result[slot]); } + current_annotation = "URB write"; inst = emit(VS_OPCODE_URB_WRITE); inst->base_mrf = base_mrf; inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf); @@ -1819,14 +2302,7 @@ vec4_visitor::emit_urb_writes() * those, since we're doing interleaved writes. */ inst->offset = (max_usable_mrf - base_mrf) / 2; - - urb_entry_size += mrf - base_mrf; } - - if (intel->gen == 6) - c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 8) / 8; - else - c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4; } src_reg @@ -1847,21 +2323,35 @@ vec4_visitor::get_scratch_offset(vec4_instruction *inst, if (reladdr) { src_reg index = src_reg(this, glsl_type::int_type); - vec4_instruction *add = emit(BRW_OPCODE_ADD, - dst_reg(index), - *reladdr, - src_reg(reg_offset)); - /* Move our new instruction from the tail to its correct place. */ - add->remove(); - inst->insert_before(add); + emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset))); + emit_before(inst, MUL(dst_reg(index), + index, src_reg(message_header_scale))); - vec4_instruction *mul = emit(BRW_OPCODE_MUL, dst_reg(index), - index, src_reg(message_header_scale)); - mul->remove(); - inst->insert_before(mul); + return index; + } else { + return src_reg(reg_offset * message_header_scale); + } +} + +src_reg +vec4_visitor::get_pull_constant_offset(vec4_instruction *inst, + src_reg *reladdr, int reg_offset) +{ + if (reladdr) { + src_reg index = src_reg(this, glsl_type::int_type); + + emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset))); + + /* Pre-gen6, the message header uses byte offsets instead of vec4 + * (16-byte) offset units. + */ + if (intel->gen < 6) { + emit_before(inst, MUL(dst_reg(index), index, src_reg(16))); + } return index; } else { + int message_header_scale = intel->gen < 6 ? 16 : 1; return src_reg(reg_offset * message_header_scale); } } @@ -1878,14 +2368,7 @@ vec4_visitor::emit_scratch_read(vec4_instruction *inst, int reg_offset = base_offset + orig_src.reg_offset; src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset); - vec4_instruction *scratch_read_inst = emit(VS_OPCODE_SCRATCH_READ, - temp, index); - - scratch_read_inst->base_mrf = 14; - scratch_read_inst->mlen = 1; - /* Move our instruction from the tail to its correct place. */ - scratch_read_inst->remove(); - inst->insert_before(scratch_read_inst); + emit_before(inst, SCRATCH_READ(temp, index)); } /** @@ -1902,14 +2385,11 @@ vec4_visitor::emit_scratch_write(vec4_instruction *inst, dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0), orig_dst.writemask)); - vec4_instruction *scratch_write_inst = emit(VS_OPCODE_SCRATCH_WRITE, - dst, temp, index); - scratch_write_inst->base_mrf = 13; - scratch_write_inst->mlen = 2; - scratch_write_inst->predicate = inst->predicate; - /* Move our instruction from the tail to its correct place. */ - scratch_write_inst->remove(); - inst->insert_after(scratch_write_inst); + vec4_instruction *write = SCRATCH_WRITE(dst, temp, index); + write->predicate = inst->predicate; + write->ir = inst->ir; + write->annotation = inst->annotation; + inst->insert_after(write); } /** @@ -1991,6 +2471,110 @@ vec4_visitor::move_grf_array_access_to_scratch() } } +/** + * Emits an instruction before @inst to load the value named by @orig_src + * from the pull constant buffer (surface) at @base_offset to @temp. + */ +void +vec4_visitor::emit_pull_constant_load(vec4_instruction *inst, + dst_reg temp, src_reg orig_src, + int base_offset) +{ + int reg_offset = base_offset + orig_src.reg_offset; + src_reg index = get_pull_constant_offset(inst, orig_src.reladdr, reg_offset); + vec4_instruction *load; + + load = new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD, + temp, index); + load->base_mrf = 14; + load->mlen = 1; + emit_before(inst, load); +} + +/** + * Implements array access of uniforms by inserting a + * PULL_CONSTANT_LOAD instruction. + * + * Unlike temporary GRF array access (where we don't support it due to + * the difficulty of doing relative addressing on instruction + * destinations), we could potentially do array access of uniforms + * that were loaded in GRF space as push constants. In real-world + * usage we've seen, though, the arrays being used are always larger + * than we could load as push constants, so just always move all + * uniform array access out to a pull constant buffer. + */ +void +vec4_visitor::move_uniform_array_access_to_pull_constants() +{ + int pull_constant_loc[this->uniforms]; + + for (int i = 0; i < this->uniforms; i++) { + pull_constant_loc[i] = -1; + } + + /* Walk through and find array access of uniforms. Put a copy of that + * uniform in the pull constant buffer. + * + * Note that we don't move constant-indexed accesses to arrays. No + * testing has been done of the performance impact of this choice. + */ + foreach_list_safe(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + + for (int i = 0 ; i < 3; i++) { + if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr) + continue; + + int uniform = inst->src[i].reg; + + /* If this array isn't already present in the pull constant buffer, + * add it. + */ + if (pull_constant_loc[uniform] == -1) { + const float **values = &prog_data->param[uniform * 4]; + + pull_constant_loc[uniform] = prog_data->nr_pull_params / 4; + + for (int j = 0; j < uniform_size[uniform] * 4; j++) { + prog_data->pull_param[prog_data->nr_pull_params++] = values[j]; + } + } + + /* Set up the annotation tracking for new generated instructions. */ + base_ir = inst->ir; + current_annotation = inst->annotation; + + dst_reg temp = dst_reg(this, glsl_type::vec4_type); + + emit_pull_constant_load(inst, temp, inst->src[i], + pull_constant_loc[uniform]); + + inst->src[i].file = temp.file; + inst->src[i].reg = temp.reg; + inst->src[i].reg_offset = temp.reg_offset; + inst->src[i].reladdr = NULL; + } + } + + /* Now there are no accesses of the UNIFORM file with a reladdr, so + * no need to track them as larger-than-vec4 objects. This will be + * relied on in cutting out unused uniform vectors from push + * constants. + */ + split_uniform_registers(); +} + +void +vec4_visitor::resolve_ud_negate(src_reg *reg) +{ + if (reg->type != BRW_REGISTER_TYPE_UD || + !reg->negate) + return; + + src_reg temp = src_reg(this, glsl_type::uvec4_type); + emit(BRW_OPCODE_MOV, dst_reg(temp), *reg); + *reg = temp; +} vec4_visitor::vec4_visitor(struct brw_vs_compile *c, struct gl_shader_program *prog, @@ -2011,26 +2595,31 @@ vec4_visitor::vec4_visitor(struct brw_vs_compile *c, this->current_annotation = NULL; this->c = c; - this->vp = brw->vertex_program; /* FINISHME: change for precompile */ + this->vp = (struct gl_vertex_program *) + prog->_LinkedShaders[MESA_SHADER_VERTEX]->Program; this->prog_data = &c->prog_data; this->variable_ht = hash_table_ctor(0, hash_table_pointer_hash, hash_table_pointer_compare); + this->virtual_grf_def = NULL; + this->virtual_grf_use = NULL; this->virtual_grf_sizes = NULL; this->virtual_grf_count = 0; + this->virtual_grf_reg_map = NULL; + this->virtual_grf_reg_count = 0; this->virtual_grf_array_size = 0; + this->live_intervals_valid = false; - this->uniforms = 0; + this->max_grf = intel->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF; - this->variable_ht = hash_table_ctor(0, - hash_table_pointer_hash, - hash_table_pointer_compare); + this->uniforms = 0; } vec4_visitor::~vec4_visitor() { + ralloc_free(this->mem_ctx); hash_table_dtor(this->variable_ht); }