ac/llvm: load 1 byte at a time if unaligned on gfx10

[mesa.git] / src / amd / llvm / ac_llvm_build.c
diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c

index 7fa2e38361e4730df9c9dcbbc73e02d0d423fe03..77681834ffae7c31889110bc5b80118cf90d4e83 100644 (file)
--- a/src/amd/llvm/ac_llvm_build.c
+++ b/src/amd/llvm/ac_llvm_build.c
@@ -1315,7 +1315,8 @@ ac_build_buffer_load_common(struct ac_llvm_context *ctx,
         char name[256], type_name[8];
  
         /* D16 is only supported on gfx8+ */
-       assert((channel_type != ctx->f16 && channel_type != ctx->i16) ||
+       assert(!use_format ||
+              (channel_type != ctx->f16 && channel_type != ctx->i16) ||
                ctx->chip_class >= GFX8);
  
         LLVMTypeRef type = func > 1 ? LLVMVectorType(channel_type, func) : channel_type;
@@ -1650,7 +1651,7 @@ ac_build_opencoded_load_format(struct ac_llvm_context *ctx,
         }
  
         int log_recombine = 0;
-       if (ctx->chip_class == GFX6 && !known_aligned) {
+       if ((ctx->chip_class == GFX6 || ctx->chip_class == GFX10) && !known_aligned) {
                 /* Avoid alignment restrictions by loading one byte at a time. */
                 load_num_channels <<= load_log_size;
                 log_recombine = load_log_size;
@@ -2055,6 +2056,8 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
  
         if (result_type == ctx->f16)
                 val = LLVMBuildZExt(ctx->builder, val, ctx->i32, "");
+       else if (result_type == ctx->v2f16)
+               val = LLVMBuildBitCast(ctx->builder, val, ctx->i32, "");
  
         for (unsigned i = 0; i < 4; ++i) {
                 tl_lanes[i] = i & mask;