i965/fs: Add gen6 register spilling support.
authorEric Anholt <eric@anholt.net>
Fri, 15 Apr 2011 02:36:28 +0000 (19:36 -0700)
committerEric Anholt <eric@anholt.net>
Sun, 17 Apr 2011 17:26:09 +0000 (10:26 -0700)
Most of this is code movement to get the scratch space allocated in a
shared location.  Other than that, the only real changes are that the
old oword block messages now operate on oword-aligned areas (with new
messages for unaligned access, which we don't do), and that the
caching control is in the SFID part of the descriptor instead of
message control.

Fixes glsl-fs-convolution-1.

src/mesa/drivers/dri/i965/brw_eu_emit.c
src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
src/mesa/drivers/dri/i965/brw_wm.c
src/mesa/drivers/dri/i965/brw_wm_state.c
src/mesa/drivers/dri/i965/gen6_wm_state.c

index 57313a59c080a28321d2d6c856a6237a1f31cecb..2d654e71432d1f9377868eb5131aa6a516875e2b 100644 (file)
@@ -496,6 +496,8 @@ static void brw_set_dp_write_message( struct brw_context *brw,
        insn->bits3.dp_render_cache.response_length = response_length;
        insn->bits3.dp_render_cache.msg_length = msg_length;
        insn->bits3.dp_render_cache.end_of_thread = end_of_thread;
+
+       /* We always use the render cache for write messages */
        insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
        /* XXX really need below? */
        insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
@@ -539,6 +541,13 @@ brw_set_dp_read_message(struct brw_context *brw,
    brw_set_src1(insn, brw_imm_d(0));
 
    if (intel->gen >= 6) {
+       uint32_t target_function;
+
+       if (target_cache == BRW_DATAPORT_READ_TARGET_DATA_CACHE)
+         target_function = BRW_MESSAGE_TARGET_DATAPORT_READ; /* data cache */
+       else
+         target_function = BRW_MESSAGE_TARGET_DATAPORT_WRITE; /* render cache */
+
        insn->bits3.dp_render_cache.binding_table_index = binding_table_index;
        insn->bits3.dp_render_cache.msg_control = msg_control;
        insn->bits3.dp_render_cache.pixel_scoreboard_clear = 0;
@@ -548,9 +557,9 @@ brw_set_dp_read_message(struct brw_context *brw,
        insn->bits3.dp_render_cache.response_length = response_length;
        insn->bits3.dp_render_cache.msg_length = msg_length;
        insn->bits3.dp_render_cache.end_of_thread = 0;
-       insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_READ;
+       insn->header.destreg__conditionalmod = target_function;
        /* XXX really need below? */
-       insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
+       insn->bits2.send_gen5.sfid = target_function;
        insn->bits2.send_gen5.end_of_thread = 0;
    } else if (intel->gen == 5) {
        insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
@@ -1486,9 +1495,12 @@ void brw_oword_block_write_scratch(struct brw_compile *p,
                                   GLuint offset)
 {
    struct intel_context *intel = &p->brw->intel;
-   uint32_t msg_control;
+   uint32_t msg_control, msg_type;
    int mlen;
 
+   if (intel->gen >= 6)
+      offset /= 16;
+
    mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
 
    if (num_regs == 1) {
@@ -1554,13 +1566,22 @@ void brw_oword_block_write_scratch(struct brw_compile *p,
       }
 
       brw_set_dest(p, insn, dest);
-      brw_set_src0(insn, brw_null_reg());
+      if (intel->gen >= 6) {
+        brw_set_src0(insn, mrf);
+      } else {
+        brw_set_src0(insn, brw_null_reg());
+      }
+
+      if (intel->gen >= 6)
+        msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
+      else
+        msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
 
       brw_set_dp_write_message(p->brw,
                               insn,
                               255, /* binding table index (255=stateless) */
                               msg_control,
-                              BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
+                              msg_type,
                               mlen,
                               GL_TRUE, /* header_present */
                               0, /* pixel scoreboard */
@@ -1585,9 +1606,13 @@ brw_oword_block_read_scratch(struct brw_compile *p,
                             int num_regs,
                             GLuint offset)
 {
+   struct intel_context *intel = &p->brw->intel;
    uint32_t msg_control;
    int rlen;
 
+   if (intel->gen >= 6)
+      offset /= 16;
+
    mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
    dest = retype(dest, BRW_REGISTER_TYPE_UW);
 
@@ -1624,14 +1649,18 @@ brw_oword_block_read_scratch(struct brw_compile *p,
       insn->header.destreg__conditionalmod = mrf.nr;
 
       brw_set_dest(p, insn, dest);     /* UW? */
-      brw_set_src0(insn, brw_null_reg());
+      if (intel->gen >= 6) {
+        brw_set_src0(insn, mrf);
+      } else {
+        brw_set_src0(insn, brw_null_reg());
+      }
 
       brw_set_dp_read_message(p->brw,
                              insn,
                              255, /* binding table index (255=stateless) */
                              msg_control,
                              BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
-                             1, /* target cache (render/scratch) */
+                             BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
                              1, /* msg_length */
                              rlen);
    }
@@ -1839,7 +1868,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p,
                           bind_table_index,
                           BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
                           msg_type,
-                          0, /* source cache = data cache */
+                          BRW_DATAPORT_READ_TARGET_DATA_CACHE,
                           2, /* msg_length */
                           1); /* response_length */
 }
index 479a91436a781e493fcde37e75f3c6131124855a..67f29ce1816c6c5eeabfa133f8f221b14c251b65 100644 (file)
@@ -228,8 +228,6 @@ fs_visitor::assign_regs()
 
       if (reg == -1) {
         fail("no register to spill\n");
-      } else if (intel->gen >= 6) {
-        fail("no spilling support on gen6 yet\n");
       } else {
         spill_reg(reg);
       }
index 2dd28fd1c581cb6f87f332856c8e6385837d7a95..ab731a807a7e2f5ccad8af046bf79e8060e88ca2 100644 (file)
@@ -185,6 +185,7 @@ static void do_wm_prog( struct brw_context *brw,
                        struct brw_fragment_program *fp, 
                        struct brw_wm_prog_key *key)
 {
+   struct intel_context *intel = &brw->intel;
    struct brw_wm_compile *c;
    const GLuint *program;
    GLuint program_size;
@@ -238,12 +239,26 @@ static void do_wm_prog( struct brw_context *brw,
 
    /* Scratch space is used for register spilling */
    if (c->last_scratch) {
+      uint32_t total_scratch;
+
       /* Per-thread scratch space is power-of-two sized. */
       for (c->prog_data.total_scratch = 1024;
           c->prog_data.total_scratch <= c->last_scratch;
           c->prog_data.total_scratch *= 2) {
         /* empty */
       }
+      total_scratch = c->prog_data.total_scratch * brw->wm_max_threads;
+
+      if (brw->wm.scratch_bo && total_scratch > brw->wm.scratch_bo->size) {
+        drm_intel_bo_unreference(brw->wm.scratch_bo);
+        brw->wm.scratch_bo = NULL;
+      }
+      if (brw->wm.scratch_bo == NULL) {
+        brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr,
+                                                "wm scratch",
+                                                total_scratch,
+                                                4096);
+      }
    }
    else {
       c->prog_data.total_scratch = 0;
index 5b5afc4626b5b02c7f665bfdd6d761f27ef9e522..be4b260a5ff881c0de884d3edb58b996ca37d797 100644 (file)
@@ -278,30 +278,10 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
 
 static void upload_wm_unit( struct brw_context *brw )
 {
-   struct intel_context *intel = &brw->intel;
    struct brw_wm_unit_key key;
    drm_intel_bo *reloc_bufs[3];
    wm_unit_populate_key(brw, &key);
 
-   /* Allocate the necessary scratch space if we haven't already.  Don't
-    * bother reducing the allocation later, since we use scratch so
-    * rarely.
-    */
-   if (key.total_scratch) {
-      GLuint total = key.total_scratch * brw->wm_max_threads;
-
-      if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) {
-        drm_intel_bo_unreference(brw->wm.scratch_bo);
-        brw->wm.scratch_bo = NULL;
-      }
-      if (brw->wm.scratch_bo == NULL) {
-        brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr,
-                                                "wm scratch",
-                                                total,
-                                                4096);
-      }
-   }
-
    reloc_bufs[0] = brw->wm.prog_bo;
    reloc_bufs[1] = brw->wm.scratch_bo;
    reloc_bufs[2] = brw->wm.sampler_bo;
index f4f04750aeb5f86e7af4dbc818fa258cc59bbd29..8215cb15a9c22dbc7707fff986bec3089385fb71 100644 (file)
@@ -184,7 +184,12 @@ upload_wm_state(struct brw_context *brw)
    OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));
    OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
    OUT_BATCH(dw2);
-   OUT_BATCH(0); /* scratch space base offset */
+   if (brw->wm.prog_data->total_scratch) {
+      OUT_RELOC(brw->wm.scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+               ffs(brw->wm.prog_data->total_scratch) - 11);
+   } else {
+      OUT_BATCH(0);
+   }
    OUT_BATCH(dw4);
    OUT_BATCH(dw5);
    OUT_BATCH(dw6);