i965/vec4: Allow for dispatch_grf_start_reg to vary.
authorPaul Berry <stereotype441@gmail.com>
Sat, 13 Jul 2013 14:09:54 +0000 (07:09 -0700)
committerPaul Berry <stereotype441@gmail.com>
Fri, 23 Aug 2013 18:02:47 +0000 (11:02 -0700)
Both 3DSTATE_VS and 3DSTATE_GS have a dispatch_grf_start_reg control,
which determines the register where the hardware delivers data sourced
from the URB (push constants followed by per-vertex input data).

For vertex shaders, we always set dispatch_grf_start_reg to 1, since
R1 is always the first register available for push constants in vertex
shaders.

For geometry shaders, we'll need the flexibility to set
dispatch_grf_start_reg to different values depending on the behvaiour
of the geometry shader; if it accesses gl_PrimitiveIDIn, we'll need to
set it to 2 to allow the primitive ID to be delivered to the thread in
R1.

This patch eliminates the assumption that dispatch_grf_start_reg is
always 1.  In vec4_visitor, we record the regnum that was passed to
vec4_visitor::setup_uniforms() in prog_data for later use.  In
vec4_generator, we consult this value when converting an abstract
UNIFORM register to a concrete hardware register.  And in the code
that emits 3DSTATE_VS, we set dispatch_grf_start_reg based on the
value recorded in prog_data.

This will allow us to set dispatch_grf_start_reg to the appropriate
value when compiling geometry shaders.  Vertex shaders will continue
to always use a dispatch_grf_start_reg of 1.

v2: Make dispatch_grf_start_reg "unsigned" rather than "GLuint".

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_vec4.cpp
src/mesa/drivers/dri/i965/brw_vec4.h
src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
src/mesa/drivers/dri/i965/brw_vs_state.c
src/mesa/drivers/dri/i965/gen6_vs_state.c
src/mesa/drivers/dri/i965/gen7_vs_state.c

index dae3219a09a5f689e92c19bca1108ded643a3511..3f6cc6e6b65f93626c35c5dfef455bb83a4b0955 100644 (file)
@@ -480,6 +480,12 @@ struct brw_gs_prog_data {
 struct brw_vec4_prog_data {
    struct brw_vue_map vue_map;
 
+   /**
+    * Register where the thread expects to find input data from the URB
+    * (typically uniforms, followed by per-vertex inputs).
+    */
+   unsigned dispatch_grf_start_reg;
+
    GLuint curb_read_length;
    GLuint urb_read_length;
    GLuint total_grf;
index 36527cd7972bdb7cf9bf911aba8fd890f789c3ab..bfef8e0829bd6eb59c7ce21679d7cbb8c71d596b 100644 (file)
@@ -1260,6 +1260,8 @@ vec4_vs_visitor::setup_attributes(int payload_reg)
 int
 vec4_visitor::setup_uniforms(int reg)
 {
+   prog_data->dispatch_grf_start_reg = reg;
+
    /* The pre-gen6 VS requires that some push constants get loaded no
     * matter what, or the GPU would hang.
     */
@@ -1280,7 +1282,7 @@ vec4_visitor::setup_uniforms(int reg)
 
    prog_data->nr_params = this->uniforms * 4;
 
-   prog_data->curb_read_length = reg - 1;
+   prog_data->curb_read_length = reg - prog_data->dispatch_grf_start_reg;
 
    return reg;
 }
index 512b6b3089420c681359cc84754c0137530a9b22..587cb45c60c11be3cf24b427fa09f1b0b2ce7b0f 100644 (file)
@@ -206,7 +206,7 @@ public:
                    src_reg src2 = src_reg());
 
    struct brw_reg get_dst(void);
-   struct brw_reg get_src(int i);
+   struct brw_reg get_src(const struct brw_vec4_prog_data *prog_data, int i);
 
    dst_reg dst;
    src_reg src[3];
index ce9bcd08d387deb07ca6b100c181d867a0732c74..53b4bf2b30779704a69f6d48adbc164f5812668d 100644 (file)
@@ -66,7 +66,7 @@ vec4_instruction::get_dst(void)
 }
 
 struct brw_reg
-vec4_instruction::get_src(int i)
+vec4_instruction::get_src(const struct brw_vec4_prog_data *prog_data, int i)
 {
    struct brw_reg brw_reg;
 
@@ -100,7 +100,8 @@ vec4_instruction::get_src(int i)
       break;
 
    case UNIFORM:
-      brw_reg = stride(brw_vec4_grf(1 + (src[i].reg + src[i].reg_offset) / 2,
+      brw_reg = stride(brw_vec4_grf(prog_data->dispatch_grf_start_reg +
+                                    (src[i].reg + src[i].reg_offset) / 2,
                                    ((src[i].reg + src[i].reg_offset) % 2) * 4),
                       0, 4, 1);
       brw_reg = retype(brw_reg, src[i].type);
@@ -946,7 +947,7 @@ vec4_generator::generate_code(exec_list *instructions)
       }
 
       for (unsigned int i = 0; i < 3; i++) {
-        src[i] = inst->get_src(i);
+        src[i] = inst->get_src(this->prog_data, i);
       }
       dst = inst->get_dst();
 
index a8729df336cb16e2190115f7038deadab5eec98e..e5421f1c3a9e11115624bbe07c1775a02ee6aec7 100644 (file)
@@ -92,7 +92,8 @@ brw_upload_vs_unit(struct brw_context *brw)
    vs->thread3.urb_entry_read_length = brw->vs.prog_data->base.urb_read_length;
    vs->thread3.const_urb_entry_read_length
       = brw->vs.prog_data->base.curb_read_length;
-   vs->thread3.dispatch_grf_start_reg = 1;
+   vs->thread3.dispatch_grf_start_reg =
+      brw->vs.prog_data->base.dispatch_grf_start_reg;
    vs->thread3.urb_entry_read_offset = 0;
 
    /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM, BRW_NEW_VERTEX_PROGRAM */
index 4af7cda180a710fd83e17d56cdc67e777f064a12..c5f2fd03a504847d16f45b623b4d8b406ce12156 100644 (file)
@@ -159,7 +159,8 @@ upload_vs_state(struct brw_context *brw)
       OUT_BATCH(0);
    }
 
-   OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) |
+   OUT_BATCH((brw->vs.prog_data->base.dispatch_grf_start_reg <<
+              GEN6_VS_DISPATCH_START_GRF_SHIFT) |
             (brw->vs.prog_data->base.urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
             (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
 
index 64a19fa9d7323d455315381021e7056307dd962f..ce584f675f6e33ca7a8e773c266c568478bb1aff 100644 (file)
@@ -97,7 +97,8 @@ upload_vs_state(struct brw_context *brw)
       OUT_BATCH(0);
    }
 
-   OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) |
+   OUT_BATCH((brw->vs.prog_data->base.dispatch_grf_start_reg <<
+              GEN6_VS_DISPATCH_START_GRF_SHIFT) |
             (brw->vs.prog_data->base.urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
             (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));