i965/gen6+: Add support for GL_ARB_blend_func_extended.
authorEric Anholt <eric@anholt.net>
Wed, 25 Apr 2012 20:58:07 +0000 (13:58 -0700)
committerEric Anholt <eric@anholt.net>
Wed, 23 May 2012 17:46:15 +0000 (10:46 -0700)
v2: Add support for gen6, and don't turn it on if blending is
    disabled. (fixes GPU hang), and note it in docs/GL3.txt

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
14 files changed:
docs/GL3.txt
src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
src/mesa/drivers/dri/i965/brw_context.c
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_eu.h
src/mesa/drivers/dri/i965/brw_eu_emit.c
src/mesa/drivers/dri/i965/brw_fs.h
src/mesa/drivers/dri/i965/brw_fs_emit.cpp
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
src/mesa/drivers/dri/i965/brw_util.c
src/mesa/drivers/dri/i965/brw_wm_emit.c
src/mesa/drivers/dri/i965/gen6_wm_state.c
src/mesa/drivers/dri/i965/gen7_wm_state.c
src/mesa/drivers/dri/intel/intel_extensions.c

index 5fc6c69ba4b95d02d805d23a09ce7b5be976257d..d6dc822e50a1932796dde1b5958ba3a71f3c1154 100644 (file)
@@ -73,7 +73,7 @@ GLX_ARB_create_context_profile                        DONE
 GL 3.3:
 
 GLSL 3.30                                             not started
-GL_ARB_blend_func_extended                            DONE (r600, softpipe)
+GL_ARB_blend_func_extended                            DONE (i965, r600, softpipe)
 GL_ARB_explicit_attrib_location                       DONE (i915, i965, r300, r600, swrast)
 GL_ARB_occlusion_query2                               DONE (r300, r600, swrast)
 GL_ARB_sampler_objects                                DONE (i965, r300, r600)
index ad31f4090e03a37761a84b255209885d7725edf5..97928351add934171853ac9d7204676110beb747 100644 (file)
@@ -980,6 +980,7 @@ brw_blorp_blit_program::render_target_write()
                 16 /* dispatch_width */,
                 base_mrf /* msg_reg_nr */,
                 mrf_rt_write /* src0 */,
+                BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE,
                 BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX,
                 mrf_offset /* msg_length.  TODO: Should be smaller for non-RGBA formats. */,
                 0 /* response_length */,
index 65de260fdd1931c2fa57f8ebf74f3333f25dd1b0..26f0d54466cab06f8345365bd28dd73d5f3f3dcd 100644 (file)
@@ -108,6 +108,7 @@ brwCreateContext(int api,
 
    TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
 
+   ctx->Const.MaxDualSourceDrawBuffers = 1;
    ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
    ctx->Const.MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
    ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
index 251893f8a04c7484a064d0f3a28a84ff7a80c9f3..5baffabae8671c6a624b6d6248f5bde88c1265fe 100644 (file)
@@ -309,6 +309,7 @@ struct brw_wm_prog_data {
    GLuint nr_params;       /**< number of float params/constants */
    GLuint nr_pull_params;
    bool error;
+   bool dual_src_blend;
    int dispatch_width;
    uint32_t prog_offset_16;
 
index e052ee081acba8b12f5fe94a78332ec5e54e205e..f25b09d896df03e866164ecb2b1d72f69ebe5195 100644 (file)
@@ -950,6 +950,7 @@ void brw_fb_WRITE(struct brw_compile *p,
                  int dispatch_width,
                   GLuint msg_reg_nr,
                   struct brw_reg src0,
+                  GLuint msg_control,
                   GLuint binding_table_index,
                   GLuint msg_length,
                   GLuint response_length,
index 179b59ac6fb3c32984cd24ba8cb0e8598dc1e3a8..8de872efcae8b9dd1dd4094d323da1d61dee9982 100644 (file)
@@ -2220,6 +2220,7 @@ void brw_fb_WRITE(struct brw_compile *p,
                  int dispatch_width,
                   GLuint msg_reg_nr,
                   struct brw_reg src0,
+                  GLuint msg_control,
                   GLuint binding_table_index,
                   GLuint msg_length,
                   GLuint response_length,
@@ -2228,7 +2229,7 @@ void brw_fb_WRITE(struct brw_compile *p,
 {
    struct intel_context *intel = &p->brw->intel;
    struct brw_instruction *insn;
-   GLuint msg_control, msg_type;
+   GLuint msg_type;
    struct brw_reg dest;
 
    if (dispatch_width == 16)
@@ -2256,11 +2257,6 @@ void brw_fb_WRITE(struct brw_compile *p,
       msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
    }
 
-   if (dispatch_width == 16)
-      msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
-   else
-      msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
-
    brw_set_dest(p, insn, dest);
    brw_set_src0(p, insn, src0);
    brw_set_dp_write_message(p,
index d7fd9a44348ca0393a2e0046a2bef8d23330236e..9d1746cecc894786bc70910c9a32c2a1d7800ce1 100644 (file)
@@ -610,6 +610,7 @@ public:
    struct hash_table *variable_ht;
    ir_variable *frag_depth;
    fs_reg outputs[BRW_MAX_DRAW_BUFFERS];
+   fs_reg dual_src_output;
    int first_non_payload_grf;
    int max_grf;
    int urb_setup[FRAG_ATTRIB_MAX];
index a4b71de49a08a5d562097f44f058af32117fadfb..522123c1dd8643548c56b18ae3d27dc3c2f7eb8b 100644 (file)
@@ -42,6 +42,7 @@ fs_visitor::generate_fb_write(fs_inst *inst)
 {
    bool eot = inst->eot;
    struct brw_reg implied_header;
+   uint32_t msg_control;
 
    /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
     * move, here's g1.
@@ -78,12 +79,20 @@ fs_visitor::generate_fb_write(fs_inst *inst)
       implied_header = brw_null_reg();
    }
 
+   if (this->dual_src_output.file != BAD_FILE)
+      msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;
+   else if (c->dispatch_width == 16)
+      msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
+   else
+      msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
+
    brw_pop_insn_state(p);
 
    brw_fb_WRITE(p,
                c->dispatch_width,
                inst->base_mrf,
                implied_header,
+               msg_control,
                inst->target,
                inst->mlen,
                0,
index cba016517a4bf9f9812dbbc14989ea9c5198976f..275a1f4ef83930603cd8da724f068be72ebf5512 100644 (file)
@@ -72,7 +72,11 @@ fs_visitor::visit(ir_variable *ir)
    } else if (ir->mode == ir_var_out) {
       reg = new(this->mem_ctx) fs_reg(this, ir->type);
 
-      if (ir->location == FRAG_RESULT_COLOR) {
+      if (ir->index > 0) {
+        assert(ir->location == FRAG_RESULT_DATA0);
+        assert(ir->index == 1);
+        this->dual_src_output = *reg;
+      } else if (ir->location == FRAG_RESULT_COLOR) {
         /* Writing gl_FragColor outputs to all color regions. */
         for (unsigned int i = 0; i < MAX2(c->key.nr_color_regions, 1); i++) {
            this->outputs[i] = *reg;
@@ -2037,9 +2041,23 @@ fs_visitor::emit_fb_writes()
    int base_mrf = 1;
    int nr = base_mrf;
    int reg_width = c->dispatch_width / 8;
+   bool do_dual_src = this->dual_src_output.file != BAD_FILE;
 
+   if (c->dispatch_width == 16 && do_dual_src) {
+      fail("GL_ARB_blend_func_extended not yet supported in 16-wide.");
+      do_dual_src = false;
+   }
+
+   /* From the Sandy Bridge PRM, volume 4, page 198:
+    *
+    *     "Dispatched Pixel Enables. One bit per pixel indicating
+    *      which pixels were originally enabled when the thread was
+    *      dispatched. This field is only required for the end-of-
+    *      thread message and on all dual-source messages."
+    */
    if (intel->gen >= 6 &&
        !this->kill_emitted &&
+       !do_dual_src &&
        c->key.nr_color_regions == 1) {
       header_present = false;
    }
@@ -2059,6 +2077,8 @@ fs_visitor::emit_fb_writes()
    /* Reserve space for color. It'll be filled in per MRT below. */
    int color_mrf = nr;
    nr += 4 * reg_width;
+   if (do_dual_src)
+      nr += 4;
 
    if (c->source_depth_to_render_target) {
       if (intel->gen == 6 && c->dispatch_width == 16) {
@@ -2090,6 +2110,42 @@ fs_visitor::emit_fb_writes()
       nr += reg_width;
    }
 
+   if (do_dual_src) {
+      fs_reg src0 = this->outputs[0];
+      fs_reg src1 = this->dual_src_output;
+
+      this->current_annotation = ralloc_asprintf(this->mem_ctx,
+                                                "FB write src0");
+      for (int i = 0; i < 4; i++) {
+        fs_inst *inst = emit(BRW_OPCODE_MOV,
+                             fs_reg(MRF, color_mrf + i, src0.type),
+                             src0);
+        src0.reg_offset++;
+        inst->saturate = c->key.clamp_fragment_color;
+      }
+
+      this->current_annotation = ralloc_asprintf(this->mem_ctx,
+                                                "FB write src1");
+      for (int i = 0; i < 4; i++) {
+        fs_inst *inst = emit(BRW_OPCODE_MOV,
+                             fs_reg(MRF, color_mrf + 4 + i, src1.type),
+                             src1);
+        src1.reg_offset++;
+        inst->saturate = c->key.clamp_fragment_color;
+      }
+
+      fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
+      inst->target = 0;
+      inst->base_mrf = base_mrf;
+      inst->mlen = nr - base_mrf;
+      inst->eot = true;
+      inst->header_present = header_present;
+
+      c->prog_data.dual_src_blend = true;
+      this->current_annotation = NULL;
+      return;
+   }
+
    for (int target = 0; target < c->key.nr_color_regions; target++) {
       this->current_annotation = ralloc_asprintf(this->mem_ctx,
                                                 "FB write target %d",
index d28d9abcb3362af0f4afc64b73624705c656ed6d..5441c5e5bc5c69674ec6bc18aee85acfaa6633da 100644 (file)
@@ -89,6 +89,16 @@ GLuint brw_translate_blend_factor( GLenum factor )
       return BRW_BLENDFACTOR_CONST_ALPHA; 
    case GL_ONE_MINUS_CONSTANT_ALPHA:
       return BRW_BLENDFACTOR_INV_CONST_ALPHA;
+
+   case GL_SRC1_COLOR:
+      return BRW_BLENDFACTOR_SRC1_COLOR;
+   case GL_SRC1_ALPHA:
+      return BRW_BLENDFACTOR_SRC1_ALPHA;
+   case GL_ONE_MINUS_SRC1_COLOR:
+      return BRW_BLENDFACTOR_INV_SRC1_COLOR;
+   case GL_ONE_MINUS_SRC1_ALPHA:
+      return BRW_BLENDFACTOR_INV_SRC1_ALPHA;
+
    default:
       assert(0);
       return BRW_BLENDFACTOR_ZERO;
index 2647a386f4f70e94d0fae8280ffa6b4ff8512626..e27ff3528a89f3d524666d08d6ff7aae64634c66 100644 (file)
@@ -1331,6 +1331,7 @@ static void fire_fb_write( struct brw_wm_compile *c,
 {
    struct brw_compile *p = &c->func;
    struct intel_context *intel = &p->brw->intel;
+   uint32_t msg_control;
 
    /* Pass through control information:
     * 
@@ -1348,12 +1349,18 @@ static void fire_fb_write( struct brw_wm_compile *c,
       brw_pop_insn_state(p);
    }
 
+   if (c->dispatch_width == 16)
+      msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
+   else
+      msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
+
    /* Send framebuffer write message: */
 /*  send (16) null.0<1>:uw m0               r0.0<8;8,1>:uw   0x85a04000:ud    { Align1 EOT } */
    brw_fb_WRITE(p,
                c->dispatch_width,
                base_reg,
                retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
+               msg_control,
                target,         
                nr,
                0, 
index 28b3c2989c3b35cb611caa57d396acf82e0a83fa..cba2a5741ff369ca500af9c523e37edef4b92e56 100644 (file)
@@ -162,6 +162,13 @@ upload_wm_state(struct brw_context *brw)
       dw5 |= GEN6_WM_16_DISPATCH_ENABLE;
    }
 
+   /* CACHE_NEW_WM_PROG | _NEW_COLOR */
+   if (brw->wm.prog_data->dual_src_blend &&
+       (ctx->Color.BlendEnabled & 1) &&
+       ctx->Color.Blend[0]._UsesDualSrc) {
+      dw5 |= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE;
+   }
+
    /* _NEW_LINE */
    if (ctx->Line.StippleFlag)
       dw5 |= GEN6_WM_LINE_STIPPLE_ENABLE;
index 2a0462f3fc969b9482af9f206274f245adf9eade..f46e3f26cebd7b2dace584bd70e52225cd123cad 100644 (file)
@@ -109,6 +109,7 @@ static void
 upload_ps_state(struct brw_context *brw)
 {
    struct intel_context *intel = &brw->intel;
+   struct gl_context *ctx = &intel->ctx;
    uint32_t dw2, dw4, dw5;
    const int max_threads_shift = brw->intel.is_haswell ?
       HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT;
@@ -176,6 +177,17 @@ upload_ps_state(struct brw_context *brw)
    if (brw->wm.prog_data->nr_params > 0)
       dw4 |= GEN7_PS_PUSH_CONSTANT_ENABLE;
 
+   /* CACHE_NEW_WM_PROG | _NEW_COLOR
+    *
+    * The hardware wedges if you have this bit set but don't turn on any dual
+    * source blend factors.
+    */
+   if (brw->wm.prog_data->dual_src_blend &&
+       (ctx->Color.BlendEnabled & 1) &&
+       ctx->Color.Blend[0]._UsesDualSrc) {
+      dw4 |= GEN7_PS_DUAL_SOURCE_BLEND_ENABLE;
+   }
+
    /* BRW_NEW_FRAGMENT_PROGRAM */
    if (brw->fragment_program->Base.InputsRead != 0)
       dw4 |= GEN7_PS_ATTRIBUTE_ENABLE;
@@ -213,7 +225,8 @@ upload_ps_state(struct brw_context *brw)
 
 const struct brw_tracked_state gen7_ps_state = {
    .dirty = {
-      .mesa  = _NEW_PROGRAM_CONSTANTS,
+      .mesa  = (_NEW_PROGRAM_CONSTANTS |
+               _NEW_COLOR),
       .brw   = (BRW_NEW_FRAGMENT_PROGRAM |
                BRW_NEW_PS_BINDING_TABLE |
                BRW_NEW_BATCH),
index d4713c985898be95f612788edc8e5cbb5797ae02..24778a474ce415f12573cf269c9e1d7e3620b05c 100644 (file)
@@ -101,6 +101,7 @@ intelInitExtensions(struct gl_context *ctx)
       ctx->Extensions.EXT_transform_feedback = true;
 
    if (intel->gen >= 6) {
+      ctx->Extensions.ARB_blend_func_extended = true;
       ctx->Extensions.ARB_draw_buffers_blend = true;
    }