i965: Move down genX_upload_sbe in profiles.
authorMathias Fröhlich <mathias.froehlich@web.de>
Tue, 11 Dec 2018 17:45:43 +0000 (18:45 +0100)
committerMarge Bot <eric+marge@anholt.net>
Tue, 10 Mar 2020 14:28:36 +0000 (14:28 +0000)
Avoid looping over all VARYING_SLOT_MAX urb_setup array
entries from genX_upload_sbe. Prepare an array indirection
to the active entries of urb_setup already in the compile
step. On upload only walk the active arrays.

v2: Use uint8_t to store the attribute numbers.
v3: Change loop to build up the array indirection.
v4: Rebase.
v5: Style fix.

Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Signed-off-by: Mathias Fröhlich <Mathias.Froehlich@web.de>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/308>

src/intel/compiler/brw_compiler.h
src/intel/compiler/brw_fs.cpp
src/intel/compiler/brw_fs.h
src/intel/compiler/brw_fs_visitor.cpp
src/mesa/drivers/dri/i965/genX_state_upload.c

index 1b34ed2d6bbcb7a90c4245f77a79016c89610136..517afe96788c0617966d430f1c4cf08ed1e003da 100644 (file)
@@ -788,6 +788,14 @@ struct brw_wm_prog_data {
     * For varying slots that are not used by the FS, the value is -1.
     */
    int urb_setup[VARYING_SLOT_MAX];
+
+   /**
+    * Cache structure into the urb_setup array above that contains the
+    * attribute numbers of active varyings out of urb_setup.
+    * The actual count is stored in urb_setup_attribs_count.
+    */
+   uint8_t urb_setup_attribs[VARYING_SLOT_MAX];
+   uint8_t urb_setup_attribs_count;
 };
 
 /** Returns the SIMD width corresponding to a given KSP index
index f284a2b6644fc8d41a951038c705644d87fe447f..b2a563fd94ecf7e54b45e83c788f3872aed57b33 100644 (file)
@@ -1641,6 +1641,26 @@ fs_visitor::assign_curb_setup()
    this->first_non_payload_grf = payload.num_regs + prog_data->curb_read_length;
 }
 
+/*
+ * Build up an array of indices into the urb_setup array that
+ * references the active entries of the urb_setup array.
+ * Used to accelerate walking the active entries of the urb_setup array
+ * on each upload.
+ */
+void
+brw_compute_urb_setup_index(struct brw_wm_prog_data *wm_prog_data)
+{
+   /* Make sure uint8_t is sufficient */
+   STATIC_ASSERT(VARYING_SLOT_MAX <= 0xff);
+   uint8_t index = 0;
+   for (uint8_t attr = 0; attr < VARYING_SLOT_MAX; attr++) {
+      if (wm_prog_data->urb_setup[attr] >= 0) {
+         wm_prog_data->urb_setup_attribs[index++] = attr;
+      }
+   }
+   wm_prog_data->urb_setup_attribs_count = index;
+}
+
 static void
 calculate_urb_setup(const struct gen_device_info *devinfo,
                     const struct brw_wm_prog_key *key,
@@ -1728,6 +1748,8 @@ calculate_urb_setup(const struct gen_device_info *devinfo,
    }
 
    prog_data->num_varying_inputs = urb_next;
+
+   brw_compute_urb_setup_index(prog_data);
 }
 
 void
@@ -8164,6 +8186,8 @@ gen9_ps_header_only_workaround(struct brw_wm_prog_data *wm_prog_data)
 
    wm_prog_data->urb_setup[VARYING_SLOT_LAYER] = 0;
    wm_prog_data->num_varying_inputs = 1;
+
+   brw_compute_urb_setup_index(wm_prog_data);
 }
 
 bool
index e0717eebb0e3c06aff0440db55da8fa3c0139667..c09c4eb87590906919fe36942ee0a45a5f26047d 100644 (file)
@@ -654,5 +654,6 @@ enum brw_barycentric_mode brw_barycentric_mode(enum glsl_interp_mode mode,
 uint32_t brw_fb_write_msg_control(const fs_inst *inst,
                                   const struct brw_wm_prog_data *prog_data);
 
+void brw_compute_urb_setup_index(struct brw_wm_prog_data *wm_prog_data);
 
 #endif /* BRW_FS_H */
index fdaabf43c483f7ec1424c0ec2eeee0a6fd2c9047..e276227f0acf9b5b5b13e25d99f916b912239cb6 100644 (file)
@@ -122,6 +122,7 @@ fs_visitor::emit_dummy_fs()
    wm_prog_data->num_varying_inputs = devinfo->gen < 6 ? 1 : 0;
    memset(wm_prog_data->urb_setup, -1,
           sizeof(wm_prog_data->urb_setup[0]) * VARYING_SLOT_MAX);
+   brw_compute_urb_setup_index(wm_prog_data);
 
    /* We don't have any uniforms. */
    stage_prog_data->nr_params = 0;
index 635d314ef2b1a19659b9f72de840fa7fbb881f78..0bd124011f494c283e9a581f4efcb57a008ec1d3 100644 (file)
@@ -1099,11 +1099,11 @@ genX(calculate_attr_overrides)(const struct brw_context *brw,
     */
    bool drawing_points = brw_is_drawing_points(brw);
 
-   for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) {
+   for (uint8_t idx = 0; idx < wm_prog_data->urb_setup_attribs_count; idx++) {
+      uint8_t attr = wm_prog_data->urb_setup_attribs[idx];
       int input_index = wm_prog_data->urb_setup[attr];
 
-      if (input_index < 0)
-         continue;
+      assert(0 <= input_index);
 
       /* _NEW_POINT */
       bool point_sprite = false;