amd/common/gfx10: pad shader buffers for instruction prefetch
authorNicolai Hähnle <nicolai.haehnle@amd.com>
Thu, 29 Nov 2018 23:37:07 +0000 (00:37 +0100)
committerMarek Olšák <marek.olsak@amd.com>
Wed, 3 Jul 2019 19:51:12 +0000 (15:51 -0400)
Acked-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
src/amd/common/ac_rtld.c

index ebf64d9165885ec332c64e68e2dc7dadc90e14e2..55315913234fefae2f922ef2e25ad0f0d4db5ae0 100644 (file)
@@ -438,6 +438,25 @@ bool ac_rtld_open(struct ac_rtld_binary *binary,
 
        binary->rx_size += rx_size;
 
+       if (i.info->chip_class >= GFX10) {
+               /* In gfx10, the SQ fetches up to 3 cache lines of 16 dwords
+                * ahead of the PC, configurable by SH_MEM_CONFIG and
+                * S_INST_PREFETCH. This can cause two issues:
+                *
+                * (1) Crossing a page boundary to an unmapped page. The logic
+                *     does not distinguish between a required fetch and a "mere"
+                *     prefetch and will fault.
+                *
+                * (2) Prefetching instructions that will be changed for a
+                *     different shader.
+                *
+                * (2) is not currently an issue because we flush the I$ at IB
+                * boundaries, but (1) needs to be addressed. Due to buffer
+                * suballocation, we just play it safe.
+                */
+               binary->rx_size = align(binary->rx_size + 3 * 64, 64);
+       }
+
        return true;
 
 #undef report_if