r600: implement callstack workaround for evergreen.

author Dave Airlie <airlied@redhat.com>

Fri, 9 Mar 2018 06:03:53 +0000 (16:03 +1000)

committer Dave Airlie <airlied@redhat.com>

Mon, 12 Mar 2018 01:11:44 +0000 (11:11 +1000)
author Dave Airlie <airlied@redhat.com>
Fri, 9 Mar 2018 06:03:53 +0000 (16:03 +1000)
committer Dave Airlie <airlied@redhat.com>
Mon, 12 Mar 2018 01:11:44 +0000 (11:11 +1000)
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c

index 4b44f6614190b2062dfa087599150d0ea1b75aeb..6b5c42f86d9fbe26ecfd851288816b8ffd64752e 100644 (file)
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -377,7 +377,7 @@ struct r600_shader_tgsi_instruction {
  static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so, int stream, bool ind);
  static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
  static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
-static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason);
+static inline int callstack_push(struct r600_shader_ctx *ctx, unsigned reason);
  static void fc_pushlevel(struct r600_shader_ctx *ctx, int type);
  static int tgsi_else(struct r600_shader_ctx *ctx);
  static int tgsi_endif(struct r600_shader_ctx *ctx);
@@ -393,6 +393,15 @@ static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src,
  static int do_lds_fetch_values(struct r600_shader_ctx *ctx, unsigned temp_reg,
                                unsigned dst_reg, unsigned mask);
  
+static bool ctx_needs_stack_workaround_8xx(struct r600_shader_ctx *ctx)
+{
+       if (ctx->bc->family == CHIP_HEMLOCK ||
+           ctx->bc->family == CHIP_CYPRESS ||
+           ctx->bc->family == CHIP_JUNIPER)
+               return false;
+       return true;
+}
+
  static int tgsi_last_instruction(unsigned writemask)
  {
         int i, lasti = 0;
@@ -10168,7 +10177,7 @@ static int pops(struct r600_shader_ctx *ctx, int pops)
         return 0;
  }
  
-static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx,
+static inline int callstack_update_max_depth(struct r600_shader_ctx *ctx,
                                                unsigned reason)
  {
         struct r600_stack_info *stack = &ctx->bc->stack;
@@ -10186,7 +10195,7 @@ static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx,
                 /* pre-r8xx: if any non-WQM PUSH instruction is invoked, 2 elements on
                  * the stack must be reserved to hold the current active/continue
                  * masks */
-               if (reason == FC_PUSH_VPM) {
+               if (reason == FC_PUSH_VPM || stack->push > 0) {
                         elements += 2;
                 }
                 break;
@@ -10212,7 +10221,7 @@ static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx,
                  *    NOTE: it seems we also need to reserve additional element in some
                  *    other cases, e.g. when we have 4 levels of PUSH_VPM in the shader,
                  *    then STACK_SIZE should be 2 instead of 1 */
-               if (reason == FC_PUSH_VPM) {
+               if (reason == FC_PUSH_VPM || stack->push > 0) {
                         elements += 1;
                 }
                 break;
@@ -10231,6 +10240,7 @@ static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx,
  
         if (entries > stack->max_entries)
                 stack->max_entries = entries;
+       return elements;
  }
  
  static inline void callstack_pop(struct r600_shader_ctx *ctx, unsigned reason)
@@ -10254,7 +10264,7 @@ static inline void callstack_pop(struct r600_shader_ctx *ctx, unsigned reason)
         }
  }
  
-static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason)
+static inline int callstack_push(struct r600_shader_ctx *ctx, unsigned reason)
  {
         switch (reason) {
         case FC_PUSH_VPM:
@@ -10262,6 +10272,7 @@ static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason)
                 break;
         case FC_PUSH_WQM:
                 ++ctx->bc->stack.push_wqm;
+               break;
         case FC_LOOP:
                 ++ctx->bc->stack.loop;
                 break;
@@ -10269,7 +10280,7 @@ static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason)
                 assert(0);
         }
  
-       callstack_update_max_depth(ctx, reason);
+       return callstack_update_max_depth(ctx, reason);
  }
  
  static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
@@ -10353,12 +10364,25 @@ static int emit_if(struct r600_shader_ctx *ctx, int opcode,
                    struct r600_bytecode_alu_src *src)
  {
         int alu_type = CF_OP_ALU_PUSH_BEFORE;
+       bool needs_workaround = false;
+       int elems = callstack_push(ctx, FC_PUSH_VPM);
+
+       if (ctx->bc->chip_class == CAYMAN && ctx->bc->stack.loop > 1)
+               needs_workaround = true;
+
+       if (ctx->bc->chip_class == EVERGREEN && ctx_needs_stack_workaround_8xx(ctx)) {
+               unsigned dmod1 = (elems - 1) % ctx->bc->stack.entry_size;
+               unsigned dmod2 = (elems) % ctx->bc->stack.entry_size;
+
+               if (elems && (!dmod1 || !dmod2))
+                       needs_workaround = true;
+       }
  
         /* There is a hardware bug on Cayman where a BREAK/CONTINUE followed by
          * LOOP_STARTxxx for nested loops may put the branch stack into a state
          * such that ALU_PUSH_BEFORE doesn't work as expected. Workaround this
          * by replacing the ALU_PUSH_BEFORE with a PUSH + ALU */
-       if (ctx->bc->chip_class == CAYMAN && ctx->bc->stack.loop > 1) {
+       if (needs_workaround) {
                 r600_bytecode_add_cfinst(ctx->bc, CF_OP_PUSH);
                 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
                 alu_type = CF_OP_ALU;
@@ -10370,7 +10394,6 @@ static int emit_if(struct r600_shader_ctx *ctx, int opcode,
  
         fc_pushlevel(ctx, FC_IF);
  
-       callstack_push(ctx, FC_PUSH_VPM);
         return 0;
  }
author	Dave Airlie <airlied@redhat.com>
	Fri, 9 Mar 2018 06:03:53 +0000 (16:03 +1000)
committer	Dave Airlie <airlied@redhat.com>
	Mon, 12 Mar 2018 01:11:44 +0000 (11:11 +1000)