From 7015d2c249e1f7814bf5681ccd049e49e4d6495c Mon Sep 17 00:00:00 2001 From: =?utf8?q?Daniel=20Sch=C3=BCrmann?= Date: Mon, 20 Jul 2020 12:07:55 +0200 Subject: [PATCH] aco: fix scratch loads which cross element_size boundaries Previously, we've set element_size == 16 which causes loads from packed vec3 arrays to cross the boundary and return wrong data. This patch sets element_size = 4 and splits loads into single channel. Fixes all of dEQP-VK.subgroups.ballot_broadcast.* Cc: 20.1 Reviewed-by: Rhys Perry Part-of: --- .gitlab-ci/deqp-radv-fiji-aco-fails.txt | 75 ------------------- .gitlab-ci/deqp-radv-pitcairn-aco-fails.txt | 75 ------------------- .gitlab-ci/deqp-radv-polaris10-aco-fails.txt | 75 ------------------- .../compiler/aco_instruction_selection.cpp | 25 ++++--- 4 files changed, 14 insertions(+), 236 deletions(-) diff --git a/.gitlab-ci/deqp-radv-fiji-aco-fails.txt b/.gitlab-ci/deqp-radv-fiji-aco-fails.txt index 486716cc515..08aca7dd40c 100644 --- a/.gitlab-ci/deqp-radv-fiji-aco-fails.txt +++ b/.gitlab-ci/deqp-radv-fiji-aco-fails.txt @@ -27,78 +27,3 @@ dEQP-VK.rasterization.interpolation.basic.non_strict_lines_wide dEQP-VK.rasterization.interpolation.projected.lines_wide dEQP-VK.rasterization.interpolation.projected.non_strict_line_strip_wide dEQP-VK.rasterization.interpolation.projected.non_strict_lines_wide -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_bvec3 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_bvec3_requiredsubgroupsize64 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_dvec3 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_dvec3_requiredsubgroupsize64 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_i16vec3 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_i16vec3_requiredsubgroupsize64 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_i64vec3 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_i64vec3_requiredsubgroupsize64 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_i8vec3 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_ivec3 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_ivec3_requiredsubgroupsize64 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_u16vec3 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_u16vec3_requiredsubgroupsize64 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_u64vec3 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_u64vec3_requiredsubgroupsize64 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_u8vec3 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_uvec3 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_uvec3_requiredsubgroupsize64 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_vec3 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_vec3_requiredsubgroupsize64 -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_bvec3geometry -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_bvec3tess_control -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_bvec3tess_eval -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_bvec3vertex -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_dvec3geometry -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_dvec3tess_control -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_dvec3tess_eval -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_dvec3vertex -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_i16vec3geometry -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_i16vec3tess_control -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_i16vec3tess_eval -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_i16vec3vertex -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_i64vec3geometry -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_i64vec3tess_control -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_i64vec3tess_eval -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_i64vec3vertex -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_i8vec3geometry -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_i8vec3tess_control -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_i8vec3tess_eval -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_i8vec3vertex -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_ivec3geometry -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_ivec3tess_control -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_ivec3tess_eval -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_ivec3vertex -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_u16vec3geometry -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_u16vec3tess_control -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_u16vec3tess_eval -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_u16vec3vertex -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_u64vec3geometry -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_u64vec3tess_control -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_u64vec3tess_eval -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_u64vec3vertex -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_u8vec3geometry -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_u8vec3tess_control -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_u8vec3tess_eval -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_u8vec3vertex -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_uvec3geometry -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_uvec3tess_control -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_uvec3tess_eval -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_uvec3vertex -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_vec3geometry -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_vec3tess_control -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_vec3tess_eval -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_vec3vertex -dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcast_bvec3 -dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcast_dvec3 -dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcast_i16vec3 -dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcast_i64vec3 -dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcast_i8vec3 -dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcast_ivec3 -dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcast_u16vec3 -dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcast_u64vec3 -dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcast_u8vec3 -dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcast_uvec3 -dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcast_vec3 diff --git a/.gitlab-ci/deqp-radv-pitcairn-aco-fails.txt b/.gitlab-ci/deqp-radv-pitcairn-aco-fails.txt index 5ec617dcae7..c874e0da0f1 100644 --- a/.gitlab-ci/deqp-radv-pitcairn-aco-fails.txt +++ b/.gitlab-ci/deqp-radv-pitcairn-aco-fails.txt @@ -9,78 +9,3 @@ dEQP-VK.rasterization.interpolation.basic.non_strict_lines_wide dEQP-VK.rasterization.interpolation.projected.lines_wide dEQP-VK.rasterization.interpolation.projected.non_strict_line_strip_wide dEQP-VK.rasterization.interpolation.projected.non_strict_lines_wide -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_bvec3 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_bvec3_requiredsubgroupsize64 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_dvec3 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_dvec3_requiredsubgroupsize64 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_i16vec3 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_i16vec3_requiredsubgroupsize64 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_i64vec3 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_i64vec3_requiredsubgroupsize64 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_i8vec3 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_ivec3 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_ivec3_requiredsubgroupsize64 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_u16vec3 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_u16vec3_requiredsubgroupsize64 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_u64vec3 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_u64vec3_requiredsubgroupsize64 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_u8vec3 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_uvec3 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_uvec3_requiredsubgroupsize64 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_vec3 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_vec3_requiredsubgroupsize64 -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_bvec3geometry -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_bvec3tess_control -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_bvec3tess_eval -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_bvec3vertex -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_dvec3geometry -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_dvec3tess_control -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_dvec3tess_eval -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_dvec3vertex -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_i16vec3geometry -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_i16vec3tess_control -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_i16vec3tess_eval -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_i16vec3vertex -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_i64vec3geometry -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_i64vec3tess_control -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_i64vec3tess_eval -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_i64vec3vertex -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_i8vec3geometry -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_i8vec3tess_control -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_i8vec3tess_eval -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_i8vec3vertex -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_ivec3geometry -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_ivec3tess_control -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_ivec3tess_eval -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_ivec3vertex -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_u16vec3geometry -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_u16vec3tess_control -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_u16vec3tess_eval -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_u16vec3vertex -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_u64vec3geometry -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_u64vec3tess_control -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_u64vec3tess_eval -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_u64vec3vertex -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_u8vec3geometry -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_u8vec3tess_control -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_u8vec3tess_eval -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_u8vec3vertex -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_uvec3geometry -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_uvec3tess_control -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_uvec3tess_eval -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_uvec3vertex -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_vec3geometry -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_vec3tess_control -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_vec3tess_eval -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_vec3vertex -dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcast_bvec3 -dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcast_dvec3 -dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcast_i16vec3 -dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcast_i64vec3 -dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcast_i8vec3 -dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcast_ivec3 -dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcast_u16vec3 -dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcast_u64vec3 -dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcast_u8vec3 -dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcast_uvec3 -dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcast_vec3 diff --git a/.gitlab-ci/deqp-radv-polaris10-aco-fails.txt b/.gitlab-ci/deqp-radv-polaris10-aco-fails.txt index b921be647c0..3c810f18c74 100644 --- a/.gitlab-ci/deqp-radv-polaris10-aco-fails.txt +++ b/.gitlab-ci/deqp-radv-polaris10-aco-fails.txt @@ -7,78 +7,3 @@ dEQP-VK.rasterization.interpolation.basic.non_strict_lines_wide dEQP-VK.rasterization.interpolation.projected.lines_wide dEQP-VK.rasterization.interpolation.projected.non_strict_line_strip_wide dEQP-VK.rasterization.interpolation.projected.non_strict_lines_wide -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_bvec3 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_bvec3_requiredsubgroupsize64 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_dvec3 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_dvec3_requiredsubgroupsize64 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_i16vec3 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_i16vec3_requiredsubgroupsize64 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_i64vec3 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_i64vec3_requiredsubgroupsize64 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_i8vec3 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_ivec3 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_ivec3_requiredsubgroupsize64 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_u16vec3 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_u16vec3_requiredsubgroupsize64 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_u64vec3 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_u64vec3_requiredsubgroupsize64 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_u8vec3 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_uvec3 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_uvec3_requiredsubgroupsize64 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_vec3 -dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_vec3_requiredsubgroupsize64 -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_bvec3geometry -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_bvec3tess_control -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_bvec3tess_eval -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_bvec3vertex -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_dvec3geometry -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_dvec3tess_control -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_dvec3tess_eval -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_dvec3vertex -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_i16vec3geometry -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_i16vec3tess_control -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_i16vec3tess_eval -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_i16vec3vertex -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_i64vec3geometry -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_i64vec3tess_control -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_i64vec3tess_eval -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_i64vec3vertex -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_i8vec3geometry -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_i8vec3tess_control -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_i8vec3tess_eval -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_i8vec3vertex -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_ivec3geometry -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_ivec3tess_control -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_ivec3tess_eval -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_ivec3vertex -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_u16vec3geometry -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_u16vec3tess_control -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_u16vec3tess_eval -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_u16vec3vertex -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_u64vec3geometry -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_u64vec3tess_control -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_u64vec3tess_eval -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_u64vec3vertex -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_u8vec3geometry -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_u8vec3tess_control -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_u8vec3tess_eval -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_u8vec3vertex -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_uvec3geometry -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_uvec3tess_control -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_uvec3tess_eval -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_uvec3vertex -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_vec3geometry -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_vec3tess_control -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_vec3tess_eval -dEQP-VK.subgroups.ballot_broadcast.framebuffer.subgroupbroadcast_vec3vertex -dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcast_bvec3 -dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcast_dvec3 -dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcast_i16vec3 -dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcast_i64vec3 -dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcast_i8vec3 -dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcast_ivec3 -dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcast_u16vec3 -dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcast_u64vec3 -dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcast_u8vec3 -dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcast_uvec3 -dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcast_vec3 diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 3145f719b46..0bc0a5c5433 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -3231,7 +3231,9 @@ void emit_load(isel_context *ctx, Builder& bld, const LoadEmitInfo *info) /* align offset down if needed */ Operand aligned_offset = offset; + unsigned align = align_offset ? 1 << (ffs(align_offset) - 1) : align_mul; if (need_to_align_offset) { + align = 4; Temp offset_tmp = offset.isTemp() ? offset.getTemp() : Temp(); if (offset.isConstant()) { aligned_offset = Operand(offset.constantValue() & 0xfffffffcu); @@ -3251,7 +3253,6 @@ void emit_load(isel_context *ctx, Builder& bld, const LoadEmitInfo *info) Temp aligned_offset_tmp = aligned_offset.isTemp() ? aligned_offset.getTemp() : bld.copy(bld.def(s1), aligned_offset); - unsigned align = align_offset ? 1 << (ffs(align_offset) - 1) : align_mul; Temp val = callback(bld, info, aligned_offset_tmp, bytes_needed, align, reduced_const_offset, byte_align ? Temp() : info->dst); @@ -3313,7 +3314,7 @@ void emit_load(isel_context *ctx, Builder& bld, const LoadEmitInfo *info) if (num_tmps > 1) { aco_ptr vec{create_instruction( aco_opcode::p_create_vector, Format::PSEUDO, num_tmps, 1)}; - for (unsigned i = 0; i < num_vals; i++) + for (unsigned i = 0; i < num_tmps; i++) vec->operands[i] = Operand(tmp[i]); tmp[0] = bld.tmp(RegClass::get(reg_type, tmp_size)); vec->definitions[0] = Definition(tmp[0]); @@ -3512,10 +3513,10 @@ Temp mubuf_load_callback(Builder& bld, const LoadEmitInfo *info, unsigned bytes_size = 0; aco_opcode op; - if (bytes_needed == 1) { + if (bytes_needed == 1 || align_ % 2) { bytes_size = 1; op = aco_opcode::buffer_load_ubyte; - } else if (bytes_needed == 2) { + } else if (bytes_needed == 2 || align_ % 4) { bytes_size = 2; op = aco_opcode::buffer_load_ushort; } else if (bytes_needed <= 4) { @@ -3542,7 +3543,7 @@ Temp mubuf_load_callback(Builder& bld, const LoadEmitInfo *info, mubuf->can_reorder = info->can_reorder; mubuf->offset = const_offset; mubuf->swizzled = info->swizzle_component_size != 0; - RegClass rc = RegClass::get(RegType::vgpr, align(bytes_size, 4)); + RegClass rc = RegClass::get(RegType::vgpr, bytes_size); Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc); mubuf->definitions[0] = Definition(val); bld.insert(std::move(mubuf)); @@ -3551,6 +3552,7 @@ Temp mubuf_load_callback(Builder& bld, const LoadEmitInfo *info, } static auto emit_mubuf_load = emit_load; +static auto emit_scratch_load = emit_load; Temp get_gfx6_global_rsrc(Builder& bld, Temp addr) { @@ -6849,7 +6851,7 @@ Temp get_scratch_resource(isel_context *ctx) scratch_addr = bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), scratch_addr, Operand(0u)); uint32_t rsrc_conf = S_008F0C_ADD_TID_ENABLE(1) | - S_008F0C_INDEX_STRIDE(ctx->program->wave_size == 64 ? 3 : 2);; + S_008F0C_INDEX_STRIDE(ctx->program->wave_size == 64 ? 3 : 2); if (ctx->program->chip_class >= GFX10) { rsrc_conf |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) | @@ -6860,9 +6862,9 @@ Temp get_scratch_resource(isel_context *ctx) S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); } - /* older generations need element size = 16 bytes. element size removed in GFX9 */ + /* older generations need element size = 4 bytes. element size removed in GFX9 */ if (ctx->program->chip_class <= GFX8) - rsrc_conf |= S_008F0C_ELEMENT_SIZE(3); + rsrc_conf |= S_008F0C_ELEMENT_SIZE(1); return bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), scratch_addr, Operand(-1u), Operand(rsrc_conf)); } @@ -6877,10 +6879,10 @@ void visit_load_scratch(isel_context *ctx, nir_intrinsic_instr *instr) { instr->dest.ssa.bit_size / 8u, rsrc}; info.align_mul = nir_intrinsic_align_mul(instr); info.align_offset = nir_intrinsic_align_offset(instr); - info.swizzle_component_size = 16; + info.swizzle_component_size = ctx->program->chip_class <= GFX8 ? 4 : 0; info.can_reorder = false; info.soffset = ctx->program->scratch_offset; - emit_mubuf_load(ctx, bld, &info); + emit_scratch_load(ctx, bld, &info); } void visit_store_scratch(isel_context *ctx, nir_intrinsic_instr *instr) { @@ -6895,8 +6897,9 @@ void visit_store_scratch(isel_context *ctx, nir_intrinsic_instr *instr) { unsigned write_count = 0; Temp write_datas[32]; unsigned offsets[32]; + unsigned swizzle_component_size = ctx->program->chip_class <= GFX8 ? 4 : 16; split_buffer_store(ctx, instr, false, RegType::vgpr, data, writemask, - 16, &write_count, write_datas, offsets); + swizzle_component_size, &write_count, write_datas, offsets); for (unsigned i = 0; i < write_count; i++) { aco_opcode op = get_buffer_store_op(false, write_datas[i].bytes()); -- 2.30.2