From c7680625c361a13d6a18d0b339b3a8e269962a2c Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 20 Jun 2020 00:24:23 -0400 Subject: [PATCH] ac,winsys/amdgpu: align IBs the same as the kernel Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/amd/common/ac_gpu_info.c | 17 +++++++++++++- src/amd/common/ac_gpu_info.h | 1 + src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 27 ++++++++++++++--------- 3 files changed, 33 insertions(+), 12 deletions(-) diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index dbf5c930f46..cbc6ed6faa4 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -561,6 +561,17 @@ bool ac_query_gpu_info(int fd, void *dev_p, info->num_rings[RING_VCN_ENC] = util_bitcount(vcn_enc.available_rings); info->num_rings[RING_VCN_JPEG] = util_bitcount(vcn_jpeg.available_rings); + /* This is "align_mask" copied from the kernel, maximums of all IP versions. */ + info->ib_pad_dw_mask[RING_GFX] = 0xff; + info->ib_pad_dw_mask[RING_COMPUTE] = 0xff; + info->ib_pad_dw_mask[RING_DMA] = 0xf; + info->ib_pad_dw_mask[RING_UVD] = 0xf; + info->ib_pad_dw_mask[RING_VCE] = 0x3f; + info->ib_pad_dw_mask[RING_UVD_ENC] = 0x3f; + info->ib_pad_dw_mask[RING_VCN_DEC] = 0xf; + info->ib_pad_dw_mask[RING_VCN_ENC] = 0x3f; + info->ib_pad_dw_mask[RING_VCN_JPEG] = 0xf; + /* The mere presence of CLEAR_STATE in the IB causes random GPU hangs * on GFX6. Some CLEAR_STATE cause asic hang on radeon kernel, etc. * SPI_VS_OUT_CONFIG. So only enable GFX7 CLEAR_STATE on amdgpu kernel. @@ -682,7 +693,11 @@ bool ac_query_gpu_info(int fd, void *dev_p, /* GFX10 and maybe GFX9 need this alignment for cache coherency. */ if (info->chip_class >= GFX9) ib_align = MAX2(ib_align, info->tcc_cache_line_size); - assert(ib_align); + /* The kernel pads gfx and compute IBs to 256 dwords since: + * 66f3b2d527154bd258a57c8815004b5964aa1cf5 + * Do the same. + */ + ib_align = MAX2(ib_align, 1024); info->ib_alignment = ib_align; if ((info->drm_minor >= 31 && diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index 3f33ec5ae32..6022a199065 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -59,6 +59,7 @@ struct radeon_info { /* Features. */ bool has_graphics; /* false if the chip is compute-only */ uint32_t num_rings[NUM_RING_TYPES]; + uint32_t ib_pad_dw_mask[NUM_RING_TYPES]; bool has_clear_state; bool has_distributed_tess; bool has_dcc_constant_encode; diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 80eb0f6b7f0..05e77b03325 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -1097,14 +1097,16 @@ static bool amdgpu_cs_check_space(struct radeon_cmdbuf *rcs, unsigned dw, /* This space was originally reserved. */ rcs->current.max_dw += cs_epilog_dw; - /* Pad with NOPs and add INDIRECT_BUFFER packet */ - while ((rcs->current.cdw & 7) != 4) + /* Pad with NOPs but leave 4 dwords for INDIRECT_BUFFER. */ + uint32_t ib_pad_dw_mask = cs->ctx->ws->info.ib_pad_dw_mask[cs->ring_type]; + while ((rcs->current.cdw & ib_pad_dw_mask) != ib_pad_dw_mask - 3) radeon_emit(rcs, PKT3_NOP_PAD); radeon_emit(rcs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0)); radeon_emit(rcs, va); radeon_emit(rcs, va >> 32); new_ptr_ib_size = &rcs->current.buf[rcs->current.cdw++]; + assert((rcs->current.cdw & ib_pad_dw_mask) == 0); assert((rcs->current.cdw & 7) == 0); assert(rcs->current.cdw <= rcs->current.max_dw); @@ -1664,25 +1666,28 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs, struct amdgpu_cs *cs = amdgpu_cs(rcs); struct amdgpu_winsys *ws = cs->ctx->ws; int error_code = 0; + uint32_t ib_pad_dw_mask = ws->info.ib_pad_dw_mask[cs->ring_type]; rcs->current.max_dw += amdgpu_cs_epilog_dws(cs); + /* Pad the IB according to the mask. */ switch (cs->ring_type) { case RING_DMA: - /* pad DMA ring to 8 DWs */ if (ws->info.chip_class <= GFX6) { - while (rcs->current.cdw & 7) + while (rcs->current.cdw & ib_pad_dw_mask) radeon_emit(rcs, 0xf0000000); /* NOP packet */ + } else { + while (rcs->current.cdw & ib_pad_dw_mask) + radeon_emit(rcs, 0x00000000); /* NOP packet */ } break; case RING_GFX: case RING_COMPUTE: - /* pad GFX ring to 8 DWs to meet CP fetch alignment requirements */ if (ws->info.gfx_ib_pad_with_type2) { - while (rcs->current.cdw & 7) + while (rcs->current.cdw & ib_pad_dw_mask) radeon_emit(rcs, PKT2_NOP_PAD); } else { - while (rcs->current.cdw & 7) + while (rcs->current.cdw & ib_pad_dw_mask) radeon_emit(rcs, PKT3_NOP_PAD); } if (cs->ring_type == RING_GFX) @@ -1690,25 +1695,25 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs, /* Also pad secondary IBs. */ if (cs->compute_ib.ib_mapped) { - while (cs->compute_ib.base.current.cdw & 7) + while (cs->compute_ib.base.current.cdw & ib_pad_dw_mask) radeon_emit(&cs->compute_ib.base, PKT3_NOP_PAD); } break; case RING_UVD: case RING_UVD_ENC: - while (rcs->current.cdw & 15) + while (rcs->current.cdw & ib_pad_dw_mask) radeon_emit(rcs, 0x80000000); /* type2 nop packet */ break; case RING_VCN_JPEG: if (rcs->current.cdw % 2) assert(0); - while (rcs->current.cdw & 15) { + while (rcs->current.cdw & ib_pad_dw_mask) { radeon_emit(rcs, 0x60000000); /* nop packet */ radeon_emit(rcs, 0x00000000); } break; case RING_VCN_DEC: - while (rcs->current.cdw & 15) + while (rcs->current.cdw & ib_pad_dw_mask) radeon_emit(rcs, 0x81ff); /* nop packet */ break; default: -- 2.30.2