From 29362875f2613ad87abe7725ce3c56c36d16cf9b Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 25 Apr 2012 13:58:07 -0700 Subject: [PATCH] i965/gen6+: Add support for GL_ARB_blend_func_extended. v2: Add support for gen6, and don't turn it on if blending is disabled. (fixes GPU hang), and note it in docs/GL3.txt Reviewed-by: Kenneth Graunke --- docs/GL3.txt | 2 +- src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 1 + src/mesa/drivers/dri/i965/brw_context.c | 1 + src/mesa/drivers/dri/i965/brw_context.h | 1 + src/mesa/drivers/dri/i965/brw_eu.h | 1 + src/mesa/drivers/dri/i965/brw_eu_emit.c | 8 +-- src/mesa/drivers/dri/i965/brw_fs.h | 1 + src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 9 +++ src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 58 ++++++++++++++++++- src/mesa/drivers/dri/i965/brw_util.c | 10 ++++ src/mesa/drivers/dri/i965/brw_wm_emit.c | 7 +++ src/mesa/drivers/dri/i965/gen6_wm_state.c | 7 +++ src/mesa/drivers/dri/i965/gen7_wm_state.c | 15 ++++- src/mesa/drivers/dri/intel/intel_extensions.c | 1 + 14 files changed, 113 insertions(+), 9 deletions(-) diff --git a/docs/GL3.txt b/docs/GL3.txt index 5fc6c69ba4b..d6dc822e50a 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -73,7 +73,7 @@ GLX_ARB_create_context_profile DONE GL 3.3: GLSL 3.30 not started -GL_ARB_blend_func_extended DONE (r600, softpipe) +GL_ARB_blend_func_extended DONE (i965, r600, softpipe) GL_ARB_explicit_attrib_location DONE (i915, i965, r300, r600, swrast) GL_ARB_occlusion_query2 DONE (r300, r600, swrast) GL_ARB_sampler_objects DONE (i965, r300, r600) diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp index ad31f4090e0..97928351add 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp @@ -980,6 +980,7 @@ brw_blorp_blit_program::render_target_write() 16 /* dispatch_width */, base_mrf /* msg_reg_nr */, mrf_rt_write /* src0 */, + BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX, mrf_offset /* msg_length. TODO: Should be smaller for non-RGBA formats. */, 0 /* response_length */, diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 65de260fdd1..26f0d54466c 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -108,6 +108,7 @@ brwCreateContext(int api, TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; + ctx->Const.MaxDualSourceDrawBuffers = 1; ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS; ctx->Const.MaxTextureImageUnits = BRW_MAX_TEX_UNIT; ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */ diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 251893f8a04..5baffabae86 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -309,6 +309,7 @@ struct brw_wm_prog_data { GLuint nr_params; /**< number of float params/constants */ GLuint nr_pull_params; bool error; + bool dual_src_blend; int dispatch_width; uint32_t prog_offset_16; diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index e052ee081ac..f25b09d896d 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -950,6 +950,7 @@ void brw_fb_WRITE(struct brw_compile *p, int dispatch_width, GLuint msg_reg_nr, struct brw_reg src0, + GLuint msg_control, GLuint binding_table_index, GLuint msg_length, GLuint response_length, diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 179b59ac6fb..8de872efcae 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -2220,6 +2220,7 @@ void brw_fb_WRITE(struct brw_compile *p, int dispatch_width, GLuint msg_reg_nr, struct brw_reg src0, + GLuint msg_control, GLuint binding_table_index, GLuint msg_length, GLuint response_length, @@ -2228,7 +2229,7 @@ void brw_fb_WRITE(struct brw_compile *p, { struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn; - GLuint msg_control, msg_type; + GLuint msg_type; struct brw_reg dest; if (dispatch_width == 16) @@ -2256,11 +2257,6 @@ void brw_fb_WRITE(struct brw_compile *p, msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; } - if (dispatch_width == 16) - msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; - else - msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; - brw_set_dest(p, insn, dest); brw_set_src0(p, insn, src0); brw_set_dp_write_message(p, diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index d7fd9a44348..9d1746cecc8 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -610,6 +610,7 @@ public: struct hash_table *variable_ht; ir_variable *frag_depth; fs_reg outputs[BRW_MAX_DRAW_BUFFERS]; + fs_reg dual_src_output; int first_non_payload_grf; int max_grf; int urb_setup[FRAG_ATTRIB_MAX]; diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index a4b71de49a0..522123c1dd8 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -42,6 +42,7 @@ fs_visitor::generate_fb_write(fs_inst *inst) { bool eot = inst->eot; struct brw_reg implied_header; + uint32_t msg_control; /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied * move, here's g1. @@ -78,12 +79,20 @@ fs_visitor::generate_fb_write(fs_inst *inst) implied_header = brw_null_reg(); } + if (this->dual_src_output.file != BAD_FILE) + msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01; + else if (c->dispatch_width == 16) + msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; + else + msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; + brw_pop_insn_state(p); brw_fb_WRITE(p, c->dispatch_width, inst->base_mrf, implied_header, + msg_control, inst->target, inst->mlen, 0, diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index cba016517a4..275a1f4ef83 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -72,7 +72,11 @@ fs_visitor::visit(ir_variable *ir) } else if (ir->mode == ir_var_out) { reg = new(this->mem_ctx) fs_reg(this, ir->type); - if (ir->location == FRAG_RESULT_COLOR) { + if (ir->index > 0) { + assert(ir->location == FRAG_RESULT_DATA0); + assert(ir->index == 1); + this->dual_src_output = *reg; + } else if (ir->location == FRAG_RESULT_COLOR) { /* Writing gl_FragColor outputs to all color regions. */ for (unsigned int i = 0; i < MAX2(c->key.nr_color_regions, 1); i++) { this->outputs[i] = *reg; @@ -2037,9 +2041,23 @@ fs_visitor::emit_fb_writes() int base_mrf = 1; int nr = base_mrf; int reg_width = c->dispatch_width / 8; + bool do_dual_src = this->dual_src_output.file != BAD_FILE; + if (c->dispatch_width == 16 && do_dual_src) { + fail("GL_ARB_blend_func_extended not yet supported in 16-wide."); + do_dual_src = false; + } + + /* From the Sandy Bridge PRM, volume 4, page 198: + * + * "Dispatched Pixel Enables. One bit per pixel indicating + * which pixels were originally enabled when the thread was + * dispatched. This field is only required for the end-of- + * thread message and on all dual-source messages." + */ if (intel->gen >= 6 && !this->kill_emitted && + !do_dual_src && c->key.nr_color_regions == 1) { header_present = false; } @@ -2059,6 +2077,8 @@ fs_visitor::emit_fb_writes() /* Reserve space for color. It'll be filled in per MRT below. */ int color_mrf = nr; nr += 4 * reg_width; + if (do_dual_src) + nr += 4; if (c->source_depth_to_render_target) { if (intel->gen == 6 && c->dispatch_width == 16) { @@ -2090,6 +2110,42 @@ fs_visitor::emit_fb_writes() nr += reg_width; } + if (do_dual_src) { + fs_reg src0 = this->outputs[0]; + fs_reg src1 = this->dual_src_output; + + this->current_annotation = ralloc_asprintf(this->mem_ctx, + "FB write src0"); + for (int i = 0; i < 4; i++) { + fs_inst *inst = emit(BRW_OPCODE_MOV, + fs_reg(MRF, color_mrf + i, src0.type), + src0); + src0.reg_offset++; + inst->saturate = c->key.clamp_fragment_color; + } + + this->current_annotation = ralloc_asprintf(this->mem_ctx, + "FB write src1"); + for (int i = 0; i < 4; i++) { + fs_inst *inst = emit(BRW_OPCODE_MOV, + fs_reg(MRF, color_mrf + 4 + i, src1.type), + src1); + src1.reg_offset++; + inst->saturate = c->key.clamp_fragment_color; + } + + fs_inst *inst = emit(FS_OPCODE_FB_WRITE); + inst->target = 0; + inst->base_mrf = base_mrf; + inst->mlen = nr - base_mrf; + inst->eot = true; + inst->header_present = header_present; + + c->prog_data.dual_src_blend = true; + this->current_annotation = NULL; + return; + } + for (int target = 0; target < c->key.nr_color_regions; target++) { this->current_annotation = ralloc_asprintf(this->mem_ctx, "FB write target %d", diff --git a/src/mesa/drivers/dri/i965/brw_util.c b/src/mesa/drivers/dri/i965/brw_util.c index d28d9abcb33..5441c5e5bc5 100644 --- a/src/mesa/drivers/dri/i965/brw_util.c +++ b/src/mesa/drivers/dri/i965/brw_util.c @@ -89,6 +89,16 @@ GLuint brw_translate_blend_factor( GLenum factor ) return BRW_BLENDFACTOR_CONST_ALPHA; case GL_ONE_MINUS_CONSTANT_ALPHA: return BRW_BLENDFACTOR_INV_CONST_ALPHA; + + case GL_SRC1_COLOR: + return BRW_BLENDFACTOR_SRC1_COLOR; + case GL_SRC1_ALPHA: + return BRW_BLENDFACTOR_SRC1_ALPHA; + case GL_ONE_MINUS_SRC1_COLOR: + return BRW_BLENDFACTOR_INV_SRC1_COLOR; + case GL_ONE_MINUS_SRC1_ALPHA: + return BRW_BLENDFACTOR_INV_SRC1_ALPHA; + default: assert(0); return BRW_BLENDFACTOR_ZERO; diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index 2647a386f4f..e27ff3528a8 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -1331,6 +1331,7 @@ static void fire_fb_write( struct brw_wm_compile *c, { struct brw_compile *p = &c->func; struct intel_context *intel = &p->brw->intel; + uint32_t msg_control; /* Pass through control information: * @@ -1348,12 +1349,18 @@ static void fire_fb_write( struct brw_wm_compile *c, brw_pop_insn_state(p); } + if (c->dispatch_width == 16) + msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; + else + msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; + /* Send framebuffer write message: */ /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */ brw_fb_WRITE(p, c->dispatch_width, base_reg, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), + msg_control, target, nr, 0, diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index 28b3c2989c3..cba2a5741ff 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -162,6 +162,13 @@ upload_wm_state(struct brw_context *brw) dw5 |= GEN6_WM_16_DISPATCH_ENABLE; } + /* CACHE_NEW_WM_PROG | _NEW_COLOR */ + if (brw->wm.prog_data->dual_src_blend && + (ctx->Color.BlendEnabled & 1) && + ctx->Color.Blend[0]._UsesDualSrc) { + dw5 |= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE; + } + /* _NEW_LINE */ if (ctx->Line.StippleFlag) dw5 |= GEN6_WM_LINE_STIPPLE_ENABLE; diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index 2a0462f3fc9..f46e3f26ceb 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -109,6 +109,7 @@ static void upload_ps_state(struct brw_context *brw) { struct intel_context *intel = &brw->intel; + struct gl_context *ctx = &intel->ctx; uint32_t dw2, dw4, dw5; const int max_threads_shift = brw->intel.is_haswell ? HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT; @@ -176,6 +177,17 @@ upload_ps_state(struct brw_context *brw) if (brw->wm.prog_data->nr_params > 0) dw4 |= GEN7_PS_PUSH_CONSTANT_ENABLE; + /* CACHE_NEW_WM_PROG | _NEW_COLOR + * + * The hardware wedges if you have this bit set but don't turn on any dual + * source blend factors. + */ + if (brw->wm.prog_data->dual_src_blend && + (ctx->Color.BlendEnabled & 1) && + ctx->Color.Blend[0]._UsesDualSrc) { + dw4 |= GEN7_PS_DUAL_SOURCE_BLEND_ENABLE; + } + /* BRW_NEW_FRAGMENT_PROGRAM */ if (brw->fragment_program->Base.InputsRead != 0) dw4 |= GEN7_PS_ATTRIBUTE_ENABLE; @@ -213,7 +225,8 @@ upload_ps_state(struct brw_context *brw) const struct brw_tracked_state gen7_ps_state = { .dirty = { - .mesa = _NEW_PROGRAM_CONSTANTS, + .mesa = (_NEW_PROGRAM_CONSTANTS | + _NEW_COLOR), .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_PS_BINDING_TABLE | BRW_NEW_BATCH), diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c index d4713c98589..24778a474ce 100644 --- a/src/mesa/drivers/dri/intel/intel_extensions.c +++ b/src/mesa/drivers/dri/intel/intel_extensions.c @@ -101,6 +101,7 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.EXT_transform_feedback = true; if (intel->gen >= 6) { + ctx->Extensions.ARB_blend_func_extended = true; ctx->Extensions.ARB_draw_buffers_blend = true; } -- 2.30.2