radeonsi: always prefer SWITCH_ON_EOP(0) on CIK
authorMarek Olšák <marek.olsak@amd.com>
Wed, 6 Aug 2014 01:18:06 +0000 (03:18 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Sat, 9 Aug 2014 21:41:15 +0000 (23:41 +0200)
The code is rewritten to take known constraints into account, while always
using 0 by default.

This should improve performance for multi-SE parts in theory.

A debug option is also added for easier debugging. (If there are hangs,
use the option. If the hangs go away, you have found the problem.)

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
v2: fix a typo, set max_se for evergreen GPUs according to the kernel driver

src/gallium/drivers/radeon/r600_pipe_common.c
src/gallium/drivers/radeon/r600_pipe_common.h
src/gallium/drivers/radeonsi/si_state_draw.c
src/gallium/winsys/radeon/drm/radeon_drm_winsys.c

index 3476021138ac84e9058224a120421ddbfaf22822..eb44d7255ba95abc97c6f1dc41fd80a507e9dd4c 100644 (file)
@@ -239,7 +239,6 @@ static const struct debug_named_value common_debug_options[] = {
        { "vm", DBG_VM, "Print virtual addresses when creating resources" },
        { "trace_cs", DBG_TRACE_CS, "Trace cs and write rlockup_<csid>.c file with faulty cs" },
 
-
        /* shaders */
        { "fs", DBG_FS, "Print fetch shaders" },
        { "vs", DBG_VS, "Print vertex shaders" },
@@ -254,6 +253,7 @@ static const struct debug_named_value common_debug_options[] = {
        { "noinvalrange", DBG_NO_DISCARD_RANGE, "Disable handling of INVALIDATE_RANGE map flags" },
        { "no2d", DBG_NO_2D_TILING, "Disable 2D tiling" },
        { "notiling", DBG_NO_TILING, "Disable tiling" },
+       { "switch_on_eop", DBG_SWITCH_ON_EOP, "Program WD/IA to switch on end-of-packet." },
 
        DEBUG_NAMED_VALUE_END /* must be last */
 };
index dcec2bb3b4b43ed1942232c26315b57e2ae4a8e7..ac69d5b8746795675a449cc91f1d37175cf1746a 100644 (file)
@@ -93,6 +93,7 @@
 #define DBG_NO_DISCARD_RANGE   (1 << 12)
 #define DBG_NO_2D_TILING       (1 << 13)
 #define DBG_NO_TILING          (1 << 14)
+#define DBG_SWITCH_ON_EOP      (1 << 15)
 /* The maximum allowed bit is 15. */
 
 #define R600_MAP_BUFFER_ALIGNMENT 64
index 4e808a3eedf7d57958373cb7d05a1af289967242..ae839ba1642a8eb6e9df3b49b9c44c482878456b 100644 (file)
@@ -401,25 +401,40 @@ static bool si_update_draw_info_state(struct si_context *sctx,
 
        if (sctx->b.chip_class >= CIK) {
                struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
-               bool wd_switch_on_eop = prim == V_008958_DI_PT_POLYGON ||
-                                       prim == V_008958_DI_PT_LINELOOP ||
-                                       prim == V_008958_DI_PT_TRIFAN ||
-                                       prim == V_008958_DI_PT_TRISTRIP_ADJ ||
-                                       info->primitive_restart ||
-                                       (rs ? rs->line_stipple_enable : false);
-               /* If the WD switch is false, the IA switch must be false too. */
-               bool ia_switch_on_eop = wd_switch_on_eop;
                unsigned primgroup_size = 64;
 
+               /* SWITCH_ON_EOP(0) is always preferable. */
+               bool wd_switch_on_eop = false;
+               bool ia_switch_on_eop = false;
+
+               /* WD_SWITCH_ON_EOP has no effect on GPUs with less than
+                * 4 shader engines. Set 1 to pass the assertion below.
+                * The other cases are hardware requirements. */
+               if (sctx->b.screen->info.max_se < 4 ||
+                   prim == V_008958_DI_PT_POLYGON ||
+                   prim == V_008958_DI_PT_LINELOOP ||
+                   prim == V_008958_DI_PT_TRIFAN ||
+                   prim == V_008958_DI_PT_TRISTRIP_ADJ ||
+                   info->primitive_restart)
+                       wd_switch_on_eop = true;
+
                /* Hawaii hangs if instancing is enabled and WD_SWITCH_ON_EOP is 0.
                 * We don't know that for indirect drawing, so treat it as
                 * always problematic. */
                if (sctx->b.family == CHIP_HAWAII &&
-                   (info->indirect || info->instance_count > 1)) {
+                   (info->indirect || info->instance_count > 1))
                        wd_switch_on_eop = true;
+
+               /* This is a hardware requirement. */
+               if ((rs && rs->line_stipple_enable) ||
+                   (sctx->b.screen->debug_flags & DBG_SWITCH_ON_EOP)) {
                        ia_switch_on_eop = true;
+                       wd_switch_on_eop = true;
                }
 
+               /* If the WD switch is false, the IA switch must be false too. */
+               assert(wd_switch_on_eop || !ia_switch_on_eop);
+
                si_pm4_set_reg(pm4, R_028B74_VGT_DISPATCH_DRAW_INDEX,
                               ib->index_size == 4 ? 0xFC000000 : 0xFC00);
 
index 910d06b5dba5eda2914d7204376a2b8a5181b46b..21567bbcf018cec78523d616a8bd4bb9a2cccab9 100644 (file)
@@ -392,6 +392,26 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
     radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SE, NULL,
                          &ws->info.max_se);
 
+    if (!ws->info.max_se) {
+        switch (ws->info.family) {
+        default:
+            ws->info.max_se = 1;
+            break;
+        case CHIP_CYPRESS:
+        case CHIP_HEMLOCK:
+        case CHIP_BARTS:
+        case CHIP_CAYMAN:
+        case CHIP_TAHITI:
+        case CHIP_PITCAIRN:
+        case CHIP_BONAIRE:
+            ws->info.max_se = 2;
+            break;
+        case CHIP_HAWAII:
+            ws->info.max_se = 4;
+            break;
+        }
+    }
+
     radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SH_PER_SE, NULL,
                          &ws->info.max_sh_per_se);