X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_fs_visitor.cpp;h=c89f4d6a05ddc21081b2b8407de047e4affc2799;hb=cf40ebacb113a370c1b2445e881f8dc440a7d8f3;hp=7088502340befbe30f3f854450c84f3afa73a3f6;hpb=542f2e47f2f22522b963a7ab1f8b485d1c9985ba;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 7088502340b..c89f4d6a05d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -458,25 +458,34 @@ fs_visitor::visit(ir_expression *ir) * of one of the operands (src0 on gen6, src1 on gen7). The * MACH accumulates in the contribution of the upper 16 bits * of that operand. - * - * FINISHME: Emit just the MUL if we know an operand is small - * enough. - */ - if (brw->gen >= 7 && dispatch_width == 16) - fail("SIMD16 explicit accumulator operands unsupported\n"); - - struct brw_reg acc = retype(brw_acc_reg(), this->result.type); - - emit(MUL(acc, op[0], op[1])); - emit(MACH(reg_null_d, op[0], op[1])); - emit(MOV(this->result, fs_reg(acc))); + */ + if (ir->operands[0]->is_uint16_constant()) { + if (brw->gen < 7) + emit(MUL(this->result, op[0], op[1])); + else + emit(MUL(this->result, op[1], op[0])); + } else if (ir->operands[1]->is_uint16_constant()) { + if (brw->gen < 7) + emit(MUL(this->result, op[1], op[0])); + else + emit(MUL(this->result, op[0], op[1])); + } else { + if (brw->gen >= 7) + no16("SIMD16 explicit accumulator operands unsupported\n"); + + struct brw_reg acc = retype(brw_acc_reg(), this->result.type); + + emit(MUL(acc, op[0], op[1])); + emit(MACH(reg_null_d, op[0], op[1])); + emit(MOV(this->result, fs_reg(acc))); + } } else { emit(MUL(this->result, op[0], op[1])); } break; case ir_binop_imul_high: { - if (brw->gen >= 7 && dispatch_width == 16) - fail("SIMD16 explicit accumulator operands unsupported\n"); + if (brw->gen >= 7) + no16("SIMD16 explicit accumulator operands unsupported\n"); struct brw_reg acc = retype(brw_acc_reg(), this->result.type); @@ -490,8 +499,8 @@ fs_visitor::visit(ir_expression *ir) emit_math(SHADER_OPCODE_INT_QUOTIENT, this->result, op[0], op[1]); break; case ir_binop_carry: { - if (brw->gen >= 7 && dispatch_width == 16) - fail("SIMD16 explicit accumulator operands unsupported\n"); + if (brw->gen >= 7) + no16("SIMD16 explicit accumulator operands unsupported\n"); struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD); @@ -500,8 +509,8 @@ fs_visitor::visit(ir_expression *ir) break; } case ir_binop_borrow: { - if (brw->gen >= 7 && dispatch_width == 16) - fail("SIMD16 explicit accumulator operands unsupported\n"); + if (brw->gen >= 7) + no16("SIMD16 explicit accumulator operands unsupported\n"); struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD); @@ -1290,8 +1299,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, next.reg_offset++; break; case ir_txd: { - if (dispatch_width == 16) - fail("Gen7 does not support sample_d/sample_d_c in SIMD16 mode."); + no16("Gen7 does not support sample_d/sample_d_c in SIMD16 mode."); /* Load dPdx and the coordinate together: * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z @@ -1364,8 +1372,8 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, break; case ir_tg4: if (has_nonconstant_offset) { - if (ir->shadow_comparitor && dispatch_width == 16) - fail("Gen7 does not support gather4_po_c in SIMD16 mode."); + if (ir->shadow_comparitor) + no16("Gen7 does not support gather4_po_c in SIMD16 mode."); /* More crazy intermixing */ ir->offset->accept(this); @@ -1464,8 +1472,8 @@ fs_visitor::rescale_texcoord(ir_texture *ir, fs_reg coordinate, 0 }; + no16("rectangle scale uniform setup not supported on SIMD16\n"); if (dispatch_width == 16) { - fail("rectangle scale uniform setup not supported on SIMD16\n"); return coordinate; } @@ -2183,8 +2191,8 @@ fs_visitor::try_replace_with_sel() void fs_visitor::visit(ir_if *ir) { - if (brw->gen < 6 && dispatch_width == 16) { - fail("Can't support (non-uniform) control flow on SIMD16\n"); + if (brw->gen < 6) { + no16("Can't support (non-uniform) control flow on SIMD16\n"); } /* Don't point the annotation at the if statement, because then it plus @@ -2226,8 +2234,8 @@ fs_visitor::visit(ir_if *ir) void fs_visitor::visit(ir_loop *ir) { - if (brw->gen < 6 && dispatch_width == 16) { - fail("Can't support (non-uniform) control flow on SIMD16\n"); + if (brw->gen < 6) { + no16("Can't support (non-uniform) control flow on SIMD16\n"); } this->base_ir = NULL; @@ -2725,9 +2733,10 @@ fs_visitor::emit_fb_writes() bool do_dual_src = this->dual_src_output.file != BAD_FILE; bool src0_alpha_to_render_target = false; - if (dispatch_width == 16 && do_dual_src) { - fail("GL_ARB_blend_func_extended not yet supported in SIMD16."); - do_dual_src = false; + if (do_dual_src) { + no16("GL_ARB_blend_func_extended not yet supported in SIMD16."); + if (dispatch_width == 16) + do_dual_src = false; } /* From the Sandy Bridge PRM, volume 4, page 198: @@ -2778,13 +2787,13 @@ fs_visitor::emit_fb_writes() nr += reg_width; if (c->source_depth_to_render_target) { - if (brw->gen == 6 && dispatch_width == 16) { + if (brw->gen == 6) { /* For outputting oDepth on gen6, SIMD8 writes have to be * used. This would require SIMD8 moves of each half to * message regs, kind of like pre-gen5 SIMD16 FB writes. * Just bail on doing so for now. */ - fail("Missing support for simd16 depth writes on gen6\n"); + no16("Missing support for simd16 depth writes on gen6\n"); } if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { @@ -2950,6 +2959,8 @@ fs_visitor::fs_visitor(struct brw_context *brw, this->fp = fp; this->mem_ctx = ralloc_context(NULL); this->failed = false; + this->simd16_unsupported = false; + this->no16_msg = NULL; this->variable_ht = hash_table_ctor(0, hash_table_pointer_hash, hash_table_pointer_compare); @@ -2978,7 +2989,8 @@ fs_visitor::fs_visitor(struct brw_context *brw, this->spilled_any_registers = false; - this->param_size = rzalloc_array(mem_ctx, int, stage_prog_data->nr_params); + if (dispatch_width == 8) + this->param_size = rzalloc_array(mem_ctx, int, stage_prog_data->nr_params); } fs_visitor::~fs_visitor()