X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_fs_visitor.cpp;h=c89f4d6a05ddc21081b2b8407de047e4affc2799;hb=cf40ebacb113a370c1b2445e881f8dc440a7d8f3;hp=b0fed9eecdac43b8630dc89fcda0068ab02d48f5;hpb=eef710fc53113a5b3d6bbf7d9a20f63d7add7911;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index b0fed9eecda..c89f4d6a05d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -212,7 +212,8 @@ fs_visitor::visit(ir_dereference_array *ir) } void -fs_visitor::emit_lrp(fs_reg dst, fs_reg x, fs_reg y, fs_reg a) +fs_visitor::emit_lrp(const fs_reg &dst, const fs_reg &x, const fs_reg &y, + const fs_reg &a) { if (brw->gen < 6 || !x.is_valid_3src() || @@ -225,8 +226,9 @@ fs_visitor::emit_lrp(fs_reg dst, fs_reg x, fs_reg y, fs_reg a) emit(MUL(y_times_a, y, a)); - a.negate = !a.negate; - emit(ADD(one_minus_a, a, fs_reg(1.0f))); + fs_reg negative_a = a; + negative_a.negate = !a.negate; + emit(ADD(one_minus_a, negative_a, fs_reg(1.0f))); emit(MUL(x_times_one_minus_a, x, one_minus_a)); emit(ADD(dst, x_times_one_minus_a, y_times_a)); @@ -239,8 +241,8 @@ fs_visitor::emit_lrp(fs_reg dst, fs_reg x, fs_reg y, fs_reg a) } void -fs_visitor::emit_minmax(uint32_t conditionalmod, fs_reg dst, - fs_reg src0, fs_reg src1) +fs_visitor::emit_minmax(uint32_t conditionalmod, const fs_reg &dst, + const fs_reg &src0, const fs_reg &src1) { fs_inst *inst; @@ -347,8 +349,8 @@ fs_visitor::visit(ir_expression *ir) ir->operands[operand]->accept(this); if (this->result.file == BAD_FILE) { fail("Failed to get tree for expression operand:\n"); - ir->operands[operand]->print(); - printf("\n"); + ir->operands[operand]->fprint(stderr); + fprintf(stderr, "\n"); } assert(this->result.is_valid_3src()); op[operand] = this->result; @@ -456,25 +458,34 @@ fs_visitor::visit(ir_expression *ir) * of one of the operands (src0 on gen6, src1 on gen7). The * MACH accumulates in the contribution of the upper 16 bits * of that operand. - * - * FINISHME: Emit just the MUL if we know an operand is small - * enough. - */ - if (brw->gen >= 7 && dispatch_width == 16) - fail("SIMD16 explicit accumulator operands unsupported\n"); - - struct brw_reg acc = retype(brw_acc_reg(), this->result.type); - - emit(MUL(acc, op[0], op[1])); - emit(MACH(reg_null_d, op[0], op[1])); - emit(MOV(this->result, fs_reg(acc))); + */ + if (ir->operands[0]->is_uint16_constant()) { + if (brw->gen < 7) + emit(MUL(this->result, op[0], op[1])); + else + emit(MUL(this->result, op[1], op[0])); + } else if (ir->operands[1]->is_uint16_constant()) { + if (brw->gen < 7) + emit(MUL(this->result, op[1], op[0])); + else + emit(MUL(this->result, op[0], op[1])); + } else { + if (brw->gen >= 7) + no16("SIMD16 explicit accumulator operands unsupported\n"); + + struct brw_reg acc = retype(brw_acc_reg(), this->result.type); + + emit(MUL(acc, op[0], op[1])); + emit(MACH(reg_null_d, op[0], op[1])); + emit(MOV(this->result, fs_reg(acc))); + } } else { emit(MUL(this->result, op[0], op[1])); } break; case ir_binop_imul_high: { - if (brw->gen >= 7 && dispatch_width == 16) - fail("SIMD16 explicit accumulator operands unsupported\n"); + if (brw->gen >= 7) + no16("SIMD16 explicit accumulator operands unsupported\n"); struct brw_reg acc = retype(brw_acc_reg(), this->result.type); @@ -488,8 +499,8 @@ fs_visitor::visit(ir_expression *ir) emit_math(SHADER_OPCODE_INT_QUOTIENT, this->result, op[0], op[1]); break; case ir_binop_carry: { - if (brw->gen >= 7 && dispatch_width == 16) - fail("SIMD16 explicit accumulator operands unsupported\n"); + if (brw->gen >= 7) + no16("SIMD16 explicit accumulator operands unsupported\n"); struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD); @@ -498,8 +509,8 @@ fs_visitor::visit(ir_expression *ir) break; } case ir_binop_borrow: { - if (brw->gen >= 7 && dispatch_width == 16) - fail("SIMD16 explicit accumulator operands unsupported\n"); + if (brw->gen >= 7) + no16("SIMD16 explicit accumulator operands unsupported\n"); struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD); @@ -741,8 +752,8 @@ fs_visitor::visit(ir_expression *ir) packed_consts.type = result.type; fs_reg const_offset_reg = fs_reg(const_offset->value.u[0] & ~15); - emit(fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, - packed_consts, surf_index, const_offset_reg)); + emit(new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, + packed_consts, surf_index, const_offset_reg)); for (int i = 0; i < ir->type->vector_elements; i++) { packed_consts.set_smear(const_offset->value.u[0] % 16 / 4 + i); @@ -1288,8 +1299,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, next.reg_offset++; break; case ir_txd: { - if (dispatch_width == 16) - fail("Gen7 does not support sample_d/sample_d_c in SIMD16 mode."); + no16("Gen7 does not support sample_d/sample_d_c in SIMD16 mode."); /* Load dPdx and the coordinate together: * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z @@ -1362,8 +1372,8 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, break; case ir_tg4: if (has_nonconstant_offset) { - if (ir->shadow_comparitor && dispatch_width == 16) - fail("Gen7 does not support gather4_po_c in SIMD16 mode."); + if (ir->shadow_comparitor) + no16("Gen7 does not support gather4_po_c in SIMD16 mode."); /* More crazy intermixing */ ir->offset->accept(this); @@ -1462,8 +1472,8 @@ fs_visitor::rescale_texcoord(ir_texture *ir, fs_reg coordinate, 0 }; + no16("rectangle scale uniform setup not supported on SIMD16\n"); if (dispatch_width == 16) { - fail("rectangle scale uniform setup not supported on SIMD16\n"); return coordinate; } @@ -2181,8 +2191,8 @@ fs_visitor::try_replace_with_sel() void fs_visitor::visit(ir_if *ir) { - if (brw->gen < 6 && dispatch_width == 16) { - fail("Can't support (non-uniform) control flow on SIMD16\n"); + if (brw->gen < 6) { + no16("Can't support (non-uniform) control flow on SIMD16\n"); } /* Don't point the annotation at the if statement, because then it plus @@ -2224,8 +2234,8 @@ fs_visitor::visit(ir_if *ir) void fs_visitor::visit(ir_loop *ir) { - if (brw->gen < 6 && dispatch_width == 16) { - fail("Can't support (non-uniform) control flow on SIMD16\n"); + if (brw->gen < 6) { + no16("Can't support (non-uniform) control flow on SIMD16\n"); } this->base_ir = NULL; @@ -2397,9 +2407,10 @@ fs_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, } /* Emit the instruction. */ - fs_inst inst(SHADER_OPCODE_UNTYPED_ATOMIC, dst, atomic_op, surf_index); - inst.base_mrf = 0; - inst.mlen = mlen; + fs_inst *inst = new(mem_ctx) fs_inst(SHADER_OPCODE_UNTYPED_ATOMIC, dst, + atomic_op, surf_index); + inst->base_mrf = 0; + inst->mlen = mlen; emit(inst); } @@ -2430,21 +2441,13 @@ fs_visitor::emit_untyped_surface_read(unsigned surf_index, fs_reg dst, mlen += operand_len; /* Emit the instruction. */ - fs_inst inst(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst, surf_index); - inst.base_mrf = 0; - inst.mlen = mlen; + fs_inst *inst = new(mem_ctx) + fs_inst(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst, surf_index); + inst->base_mrf = 0; + inst->mlen = mlen; emit(inst); } -fs_inst * -fs_visitor::emit(fs_inst inst) -{ - fs_inst *list_inst = new(mem_ctx) fs_inst; - *list_inst = inst; - emit(list_inst); - return list_inst; -} - fs_inst * fs_visitor::emit(fs_inst *inst) { @@ -2730,9 +2733,10 @@ fs_visitor::emit_fb_writes() bool do_dual_src = this->dual_src_output.file != BAD_FILE; bool src0_alpha_to_render_target = false; - if (dispatch_width == 16 && do_dual_src) { - fail("GL_ARB_blend_func_extended not yet supported in SIMD16."); - do_dual_src = false; + if (do_dual_src) { + no16("GL_ARB_blend_func_extended not yet supported in SIMD16."); + if (dispatch_width == 16) + do_dual_src = false; } /* From the Sandy Bridge PRM, volume 4, page 198: @@ -2783,13 +2787,13 @@ fs_visitor::emit_fb_writes() nr += reg_width; if (c->source_depth_to_render_target) { - if (brw->gen == 6 && dispatch_width == 16) { + if (brw->gen == 6) { /* For outputting oDepth on gen6, SIMD8 writes have to be * used. This would require SIMD8 moves of each half to * message regs, kind of like pre-gen5 SIMD16 FB writes. * Just bail on doing so for now. */ - fail("Missing support for simd16 depth writes on gen6\n"); + no16("Missing support for simd16 depth writes on gen6\n"); } if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { @@ -2947,23 +2951,16 @@ fs_visitor::fs_visitor(struct brw_context *brw, struct gl_shader_program *shader_prog, struct gl_fragment_program *fp, unsigned dispatch_width) - : dispatch_width(dispatch_width) + : backend_visitor(brw, shader_prog, &fp->Base, &c->prog_data.base, + MESA_SHADER_FRAGMENT), + dispatch_width(dispatch_width) { this->c = c; - this->brw = brw; this->fp = fp; - this->prog = &fp->Base; - this->shader_prog = shader_prog; - this->prog = &fp->Base; - this->stage_prog_data = &c->prog_data.base; - this->ctx = &brw->ctx; this->mem_ctx = ralloc_context(NULL); - if (shader_prog) - shader = (struct brw_shader *) - shader_prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; - else - shader = NULL; this->failed = false; + this->simd16_unsupported = false; + this->no16_msg = NULL; this->variable_ht = hash_table_ctor(0, hash_table_pointer_hash, hash_table_pointer_compare); @@ -2985,14 +2982,15 @@ fs_visitor::fs_visitor(struct brw_context *brw, this->regs_live_at_ip = NULL; this->uniforms = 0; - this->params_remap = NULL; - this->nr_params_remap = 0; + this->pull_constant_loc = NULL; + this->push_constant_loc = NULL; this->force_uncompressed_stack = 0; this->spilled_any_registers = false; - memset(&this->param_size, 0, sizeof(this->param_size)); + if (dispatch_width == 8) + this->param_size = rzalloc_array(mem_ctx, int, stage_prog_data->nr_params); } fs_visitor::~fs_visitor()