ac,winsys/amdgpu: align IBs the same as the kernel

author Marek Olšák <marek.olsak@amd.com>

Sat, 20 Jun 2020 04:24:23 +0000 (00:24 -0400)

committer Marge Bot <eric+marge@anholt.net>

Fri, 26 Jun 2020 07:02:57 +0000 (07:02 +0000)
author Marek Olšák <marek.olsak@amd.com>
Sat, 20 Jun 2020 04:24:23 +0000 (00:24 -0400)
committer Marge Bot <eric+marge@anholt.net>
Fri, 26 Jun 2020 07:02:57 +0000 (07:02 +0000)
diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c

index dbf5c930f4616770600f7d19bf691a2b66423ea0..cbc6ed6faa4604954681774be394ac89448378ae 100644 (file)
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -561,6 +561,17 @@ bool ac_query_gpu_info(int fd, void *dev_p,
         info->num_rings[RING_VCN_ENC] = util_bitcount(vcn_enc.available_rings);
         info->num_rings[RING_VCN_JPEG] = util_bitcount(vcn_jpeg.available_rings);
  
+       /* This is "align_mask" copied from the kernel, maximums of all IP versions. */
+       info->ib_pad_dw_mask[RING_GFX] = 0xff;
+       info->ib_pad_dw_mask[RING_COMPUTE] = 0xff;
+       info->ib_pad_dw_mask[RING_DMA] = 0xf;
+       info->ib_pad_dw_mask[RING_UVD] = 0xf;
+       info->ib_pad_dw_mask[RING_VCE] = 0x3f;
+       info->ib_pad_dw_mask[RING_UVD_ENC] = 0x3f;
+       info->ib_pad_dw_mask[RING_VCN_DEC] = 0xf;
+       info->ib_pad_dw_mask[RING_VCN_ENC] = 0x3f;
+       info->ib_pad_dw_mask[RING_VCN_JPEG] = 0xf;
+
         /* The mere presence of CLEAR_STATE in the IB causes random GPU hangs
          * on GFX6. Some CLEAR_STATE cause asic hang on radeon kernel, etc.
          * SPI_VS_OUT_CONFIG. So only enable GFX7 CLEAR_STATE on amdgpu kernel.
@@ -682,7 +693,11 @@ bool ac_query_gpu_info(int fd, void *dev_p,
         /* GFX10 and maybe GFX9 need this alignment for cache coherency. */
         if (info->chip_class >= GFX9)
                 ib_align = MAX2(ib_align, info->tcc_cache_line_size);
-       assert(ib_align);
+       /* The kernel pads gfx and compute IBs to 256 dwords since:
+        *   66f3b2d527154bd258a57c8815004b5964aa1cf5
+        * Do the same.
+        */
+       ib_align = MAX2(ib_align, 1024);
         info->ib_alignment = ib_align;
  
          if ((info->drm_minor >= 31 &&
diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h

index 3f33ec5ae3262417835fddaa83177adeea24692e..6022a199065a6018e18bc719ba1c5b63bd31115c 100644 (file)
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -59,6 +59,7 @@ struct radeon_info {
         /* Features. */
         bool                        has_graphics; /* false if the chip is compute-only */
         uint32_t                    num_rings[NUM_RING_TYPES];
+       uint32_t                    ib_pad_dw_mask[NUM_RING_TYPES];
         bool                        has_clear_state;
         bool                        has_distributed_tess;
         bool                        has_dcc_constant_encode;
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c

index 80eb0f6b7f0c6f744f08a5c17392b6dc7328c447..05e77b03325856c78e43e82c712f6f2a23b6f479 100644 (file)
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -1097,14 +1097,16 @@ static bool amdgpu_cs_check_space(struct radeon_cmdbuf *rcs, unsigned dw,
     /* This space was originally reserved. */
     rcs->current.max_dw += cs_epilog_dw;
  
-   /* Pad with NOPs and add INDIRECT_BUFFER packet */
-   while ((rcs->current.cdw & 7) != 4)
+   /* Pad with NOPs but leave 4 dwords for INDIRECT_BUFFER. */
+   uint32_t ib_pad_dw_mask = cs->ctx->ws->info.ib_pad_dw_mask[cs->ring_type];
+   while ((rcs->current.cdw & ib_pad_dw_mask) != ib_pad_dw_mask - 3)
        radeon_emit(rcs, PKT3_NOP_PAD);
  
     radeon_emit(rcs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
     radeon_emit(rcs, va);
     radeon_emit(rcs, va >> 32);
     new_ptr_ib_size = &rcs->current.buf[rcs->current.cdw++];
+   assert((rcs->current.cdw & ib_pad_dw_mask) == 0);
  
     assert((rcs->current.cdw & 7) == 0);
     assert(rcs->current.cdw <= rcs->current.max_dw);
@@ -1664,25 +1666,28 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs,
     struct amdgpu_cs *cs = amdgpu_cs(rcs);
     struct amdgpu_winsys *ws = cs->ctx->ws;
     int error_code = 0;
+   uint32_t ib_pad_dw_mask = ws->info.ib_pad_dw_mask[cs->ring_type];
  
     rcs->current.max_dw += amdgpu_cs_epilog_dws(cs);
  
+   /* Pad the IB according to the mask. */
     switch (cs->ring_type) {
     case RING_DMA:
-      /* pad DMA ring to 8 DWs */
        if (ws->info.chip_class <= GFX6) {
-         while (rcs->current.cdw & 7)
+         while (rcs->current.cdw & ib_pad_dw_mask)
              radeon_emit(rcs, 0xf0000000); /* NOP packet */
+      } else {
+         while (rcs->current.cdw & ib_pad_dw_mask)
+            radeon_emit(rcs, 0x00000000); /* NOP packet */
        }
        break;
     case RING_GFX:
     case RING_COMPUTE:
-      /* pad GFX ring to 8 DWs to meet CP fetch alignment requirements */
        if (ws->info.gfx_ib_pad_with_type2) {
-         while (rcs->current.cdw & 7)
+         while (rcs->current.cdw & ib_pad_dw_mask)
              radeon_emit(rcs, PKT2_NOP_PAD);
        } else {
-         while (rcs->current.cdw & 7)
+         while (rcs->current.cdw & ib_pad_dw_mask)
              radeon_emit(rcs, PKT3_NOP_PAD);
        }
        if (cs->ring_type == RING_GFX)
@@ -1690,25 +1695,25 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs,
  
        /* Also pad secondary IBs. */
        if (cs->compute_ib.ib_mapped) {
-         while (cs->compute_ib.base.current.cdw & 7)
+         while (cs->compute_ib.base.current.cdw & ib_pad_dw_mask)
              radeon_emit(&cs->compute_ib.base, PKT3_NOP_PAD);
        }
        break;
     case RING_UVD:
     case RING_UVD_ENC:
-      while (rcs->current.cdw & 15)
+      while (rcs->current.cdw & ib_pad_dw_mask)
           radeon_emit(rcs, 0x80000000); /* type2 nop packet */
        break;
     case RING_VCN_JPEG:
        if (rcs->current.cdw % 2)
           assert(0);
-      while (rcs->current.cdw & 15) {
+      while (rcs->current.cdw & ib_pad_dw_mask) {
           radeon_emit(rcs, 0x60000000); /* nop packet */
           radeon_emit(rcs, 0x00000000);
        }
        break;
     case RING_VCN_DEC:
-      while (rcs->current.cdw & 15)
+      while (rcs->current.cdw & ib_pad_dw_mask)
           radeon_emit(rcs, 0x81ff); /* nop packet */
        break;
     default:
author	Marek Olšák <marek.olsak@amd.com>
	Sat, 20 Jun 2020 04:24:23 +0000 (00:24 -0400)
committer	Marge Bot <eric+marge@anholt.net>
	Fri, 26 Jun 2020 07:02:57 +0000 (07:02 +0000)
src/amd/common/ac_gpu_info.c		patch \| blob \| history
src/amd/common/ac_gpu_info.h		patch \| blob \| history
src/gallium/winsys/amdgpu/drm/amdgpu_cs.c		patch \| blob \| history