intel/compiler: do not copy-propagate strided regions to ddx/ddy arguments
authorIago Toral Quiroga <itoral@igalia.com>
Mon, 28 May 2018 11:03:24 +0000 (13:03 +0200)
committerIago Toral Quiroga <itoral@igalia.com>
Wed, 12 Dec 2018 07:09:45 +0000 (08:09 +0100)
The implementation of these opcodes in the generator assumes that their
arguments are packed, and it generates register regions based on that
assumption.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
src/intel/compiler/brw_fs_copy_propagation.cpp

index ab34b63748e6cfa523b1e2fb4dcaf533ceddf3ed..a8ec1c346305b412ec45d572a79b1242710b7edf 100644 (file)
@@ -361,6 +361,20 @@ can_take_stride(fs_inst *inst, unsigned arg, unsigned stride,
    return true;
 }
 
+static bool
+instruction_requires_packed_data(fs_inst *inst)
+{
+   switch (inst->opcode) {
+   case FS_OPCODE_DDX_FINE:
+   case FS_OPCODE_DDX_COARSE:
+   case FS_OPCODE_DDY_FINE:
+   case FS_OPCODE_DDY_COARSE:
+      return true;
+   default:
+      return false;
+   }
+}
+
 bool
 fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
 {
@@ -407,6 +421,13 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
        inst->opcode == SHADER_OPCODE_GEN4_SCRATCH_WRITE)
       return false;
 
+   /* Some instructions implemented in the generator backend, such as
+    * derivatives, assume that their operands are packed so we can't
+    * generally propagate strided regions to them.
+    */
+   if (instruction_requires_packed_data(inst) && entry->src.stride > 1)
+      return false;
+
    /* Bail if the result of composing both strides would exceed the
     * hardware limit.
     */