aco: fix sgpr ubfe/ibfe if the offset is too large

[mesa.git] / src / amd / vulkan / radv_device.c
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c

index 5b93083913b9c81afbe69d0595b9cdd066a557a3..31f486d3d75615531c9fc29a595f14ad2c757561 100644 (file)
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -380,7 +380,7 @@ radv_physical_device_try_create(struct radv_instance *instance,
         disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
         device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags);
  
-       if (device->rad_info.chip_class < GFX8 || !device->use_llvm)
+       if (device->rad_info.chip_class < GFX8)
                 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
  
         radv_get_driver_uuid(&device->driver_uuid);
@@ -1645,7 +1645,7 @@ radv_get_physical_device_properties_1_2(struct radv_physical_device *pdevice,
         p->conformanceVersion = (VkConformanceVersion) {
                 .major = 1,
                 .minor = 2,
-               .subminor = 0,
+               .subminor = 3,
                 .patch = 0,
         };
  
@@ -2817,6 +2817,23 @@ VkResult radv_CreateDevice(
                         goto fail;
         }
  
+       if (getenv("RADV_TRAP_HANDLER")) {
+               /* TODO: Add support for more hardware. */
+               assert(device->physical_device->rad_info.chip_class == GFX8);
+
+               fprintf(stderr, "**********************************************************************\n");
+               fprintf(stderr, "* WARNING: RADV_TRAP_HANDLER is experimental and only for debugging! *\n");
+               fprintf(stderr, "**********************************************************************\n");
+
+               /* To get the disassembly of the faulty shaders, we have to
+                * keep some shader info around.
+                */
+               keep_shader_info = true;
+
+               if (!radv_trap_handler_init(device))
+                       goto fail;
+       }
+
         device->keep_shader_info = keep_shader_info;
         result = radv_device_init_meta(device);
         if (result != VK_SUCCESS)
@@ -2893,6 +2910,8 @@ fail:
  
         radv_thread_trace_finish(device);
  
+       radv_trap_handler_finish(device);
+
         if (device->trace_bo)
                 device->ws->buffer_destroy(device->trace_bo);
  
@@ -2942,6 +2961,8 @@ void radv_DestroyDevice(
         VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
         radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
  
+       radv_trap_handler_finish(device);
+
         radv_destroy_shader_slabs(device);
  
         pthread_cond_destroy(&device->timeline_cond);
@@ -3420,6 +3441,50 @@ radv_emit_global_shader_pointers(struct radv_queue *queue,
         }
  }
  
+static void
+radv_emit_trap_handler(struct radv_queue *queue,
+                      struct radeon_cmdbuf *cs,
+                      struct radeon_winsys_bo *tma_bo)
+{
+       struct radv_device *device = queue->device;
+       struct radeon_winsys_bo *tba_bo;
+       uint64_t tba_va, tma_va;
+
+       if (!device->trap_handler_shader || !tma_bo)
+               return;
+
+       tba_bo = device->trap_handler_shader->bo;
+
+       tba_va = radv_buffer_get_va(tba_bo) + device->trap_handler_shader->bo_offset;
+       tma_va = radv_buffer_get_va(tma_bo);
+
+       radv_cs_add_buffer(queue->device->ws, cs, tba_bo);
+       radv_cs_add_buffer(queue->device->ws, cs, tma_bo);
+
+       if (queue->queue_family_index == RADV_QUEUE_GENERAL) {
+               uint32_t regs[] = {R_00B000_SPI_SHADER_TBA_LO_PS,
+                                  R_00B100_SPI_SHADER_TBA_LO_VS,
+                                  R_00B200_SPI_SHADER_TBA_LO_GS,
+                                  R_00B300_SPI_SHADER_TBA_LO_ES,
+                                  R_00B400_SPI_SHADER_TBA_LO_HS,
+                                  R_00B500_SPI_SHADER_TBA_LO_LS};
+
+               for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
+                       radeon_set_sh_reg_seq(cs, regs[i], 4);
+                       radeon_emit(cs, tba_va >> 8);
+                       radeon_emit(cs, tba_va >> 40);
+                       radeon_emit(cs, tma_va >> 8);
+                       radeon_emit(cs, tma_va >> 40);
+               }
+       } else {
+               radeon_set_sh_reg_seq(cs, R_00B838_COMPUTE_TBA_LO, 4);
+               radeon_emit(cs, tba_va >> 8);
+               radeon_emit(cs, tba_va >> 40);
+               radeon_emit(cs, tma_va >> 8);
+               radeon_emit(cs, tma_va >> 40);
+       }
+}
+
  static void
  radv_init_graphics_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)
  {
@@ -3724,6 +3789,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
                                           compute_scratch_waves, compute_scratch_bo);
                 radv_emit_graphics_scratch(queue, cs, scratch_size_per_wave,
                                            scratch_waves, scratch_bo);
+               radv_emit_trap_handler(queue, cs, queue->device->tma_bo);
  
                 if (gds_bo)
                         radv_cs_add_buffer(queue->device->ws, cs, gds_bo);
@@ -4489,6 +4555,10 @@ radv_queue_submit_deferred(struct radv_deferred_queue_submission *submission,
                         if (queue->device->trace_bo) {
                                 radv_check_gpu_hangs(queue, cs_array[j]);
                         }
+
+                       if (queue->device->tma_bo) {
+                               radv_check_trap_handler(queue);
+                       }
                 }
  
                 free(cs_array);