i965/fs: Move the computation of register block count from unit to compile.
authorEric Anholt <eric@anholt.net>
Tue, 17 May 2011 15:55:11 +0000 (08:55 -0700)
committerEric Anholt <eric@anholt.net>
Fri, 27 May 2011 15:19:27 +0000 (08:19 -0700)
No net code size change, but unit update is down 0.8% code size
pre-gen6.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_wm.c
src/mesa/drivers/dri/i965/brw_wm_state.c

index 1d2ef066db2f6dd8f83e513d99c33471f8d5ad4d..621b6f8990b2661ffac0e772a45cd3ab0f35be81 100644 (file)
@@ -231,8 +231,8 @@ struct brw_wm_prog_data {
 
    GLuint first_curbe_grf;
    GLuint first_curbe_grf_16;
-   GLuint total_grf;
-   GLuint total_grf_16;
+   GLuint reg_blocks;
+   GLuint reg_blocks_16;
    GLuint total_scratch;
 
    GLuint nr_params;       /**< number of float params/constants */
@@ -863,6 +863,17 @@ float convert_param(enum param_conversion conversion, float param)
    }
 }
 
+/**
+ * Pre-gen6, the register file of the EUs was shared between threads,
+ * and each thread used some subset allocated on a 16-register block
+ * granularity.  The unit states wanted these block counts.
+ */
+static inline int
+brw_register_blocks(int reg_count)
+{
+   return ALIGN(reg_count, 16) / 16 - 1;
+}
+
 GLboolean brw_do_cubemap_normalize(struct exec_list *instructions);
 
 #endif
index 1cee37cfdb23dd2f7bbd65c40a55c10f8f7da457..49a4c4dcb04818ecfde5c336086a50d57baccf5e 100644 (file)
@@ -4170,9 +4170,9 @@ fs_visitor::run()
    generate_code();
 
    if (c->dispatch_width == 8) {
-      c->prog_data.total_grf = grf_used;
+      c->prog_data.reg_blocks = brw_register_blocks(grf_used);
    } else {
-      c->prog_data.total_grf_16 = grf_used;
+      c->prog_data.reg_blocks_16 = brw_register_blocks(grf_used);
       c->prog_data.prog_offset_16 = prog_offset_16;
 
       /* Make sure we didn't try to sneak in an extra uniform */
index 578890872f07d329dfde940e099e9de46ae24122..0f17c50c4f9a65d257e7a8407e3dd2567079f2e9 100644 (file)
@@ -115,7 +115,7 @@ brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
    brw_wm_pass2(c);
 
    /* how many general-purpose registers are used */
-   c->prog_data.total_grf = c->max_wm_grf;
+   c->prog_data.reg_blocks = brw_register_blocks(c->max_wm_grf);
 
    /* Emit GEN4 code.
     */
index a356711470a6d5bb7ac81a975ace6cf7a010e48b..ef98f8126dc7401134febbf518c434cbeaa839b0 100644 (file)
@@ -91,8 +91,8 @@ brw_prepare_wm_unit(struct brw_context *brw)
    }
 
    /* CACHE_NEW_WM_PROG */
-   wm->thread0.grf_reg_count = ALIGN(brw->wm.prog_data->total_grf, 16) / 16 - 1;
-   wm->wm9.grf_reg_count_2 = ALIGN(brw->wm.prog_data->total_grf_16, 16) / 16 - 1;
+   wm->thread0.grf_reg_count = brw->wm.prog_data->reg_blocks;
+   wm->wm9.grf_reg_count_2 = brw->wm.prog_data->reg_blocks_16;
    wm->thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */
    /* reloc */
    wm->wm9.kernel_start_pointer_2 = (brw->wm.prog_bo->offset +