winsys/radeon: fold cs_set_flush_callback into cs_create

[mesa.git] / src / gallium / drivers / radeonsi / si_compute.c
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c

index b53b1f65211cce3df0bea90bdde5c3a320a3cc2f..c0637f6f7ece7a950556cb03004812f8bc669ad1 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -1,8 +1,33 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
  #include "util/u_memory.h"
  
  #include "../radeon/r600_cs.h"
  #include "si_pipe.h"
  #include "si_shader.h"
+#include "sid.h"
  
  #include "radeon_llvm_util.h"
  
@@ -23,11 +48,11 @@ struct si_pipe_compute {
         LLVMContextRef llvm_ctx;
  };
  
-static void *radeonsi_create_compute_state(
+static void *si_create_compute_state(
         struct pipe_context *ctx,
         const struct pipe_compute_state *cso)
  {
-       struct si_context *rctx = (struct si_context *)ctx;
+       struct si_context *sctx = (struct si_context *)ctx;
         struct si_pipe_compute *program =
                                         CALLOC_STRUCT(si_pipe_compute);
         const struct pipe_llvm_program_header *header;
@@ -39,7 +64,7 @@ static void *radeonsi_create_compute_state(
         header = cso->prog;
         code = cso->prog + sizeof(struct pipe_llvm_program_header);
  
-       program->ctx = rctx;
+       program->ctx = sctx;
         program->local_size = cso->req_local_mem;
         program->private_size = cso->req_private_mem;
         program->input_size = cso->req_input_mem;
@@ -51,27 +76,27 @@ static void *radeonsi_create_compute_state(
         for (i = 0; i < program->num_kernels; i++) {
                 LLVMModuleRef mod = radeon_llvm_get_kernel_module(program->llvm_ctx, i,
                                                         code, header->num_bytes);
-               si_compile_llvm(rctx, &program->kernels[i], mod);
+               si_compile_llvm(sctx, &program->kernels[i], mod);
                 LLVMDisposeModule(mod);
         }
  
         return program;
  }
  
-static void radeonsi_bind_compute_state(struct pipe_context *ctx, void *state)
+static void si_bind_compute_state(struct pipe_context *ctx, void *state)
  {
-       struct si_context *rctx = (struct si_context*)ctx;
-       rctx->cs_shader_state.program = (struct si_pipe_compute*)state;
+       struct si_context *sctx = (struct si_context*)ctx;
+       sctx->cs_shader_state.program = (struct si_pipe_compute*)state;
  }
  
-static void radeonsi_set_global_binding(
+static void si_set_global_binding(
         struct pipe_context *ctx, unsigned first, unsigned n,
         struct pipe_resource **resources,
         uint32_t **handles)
  {
         unsigned i;
-       struct si_context *rctx = (struct si_context*)ctx;
-       struct si_pipe_compute *program = rctx->cs_shader_state.program;
+       struct si_context *sctx = (struct si_context*)ctx;
+       struct si_pipe_compute *program = sctx->cs_shader_state.program;
  
         if (!resources) {
                 for (i = first; i < first + n; i++) {
@@ -82,19 +107,23 @@ static void radeonsi_set_global_binding(
  
         for (i = first; i < first + n; i++) {
                 uint64_t va;
+               uint32_t offset;
                 program->global_buffers[i] = resources[i];
                 va = r600_resource_va(ctx->screen, resources[i]);
+               offset = util_le32_to_cpu(*handles[i]);
+               va += offset;
+               va = util_cpu_to_le64(va);
                 memcpy(handles[i], &va, sizeof(va));
         }
  }
  
-static void radeonsi_launch_grid(
+static void si_launch_grid(
                 struct pipe_context *ctx,
                 const uint *block_layout, const uint *grid_layout,
                 uint32_t pc, const void *input)
  {
-       struct si_context *rctx = (struct si_context*)ctx;
-       struct si_pipe_compute *program = rctx->cs_shader_state.program;
+       struct si_context *sctx = (struct si_context*)ctx;
+       struct si_pipe_compute *program = sctx->cs_shader_state.program;
         struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
         struct r600_resource *kernel_args_buffer = NULL;
         unsigned kernel_args_size;
@@ -134,13 +163,13 @@ static void radeonsi_launch_grid(
  
         memcpy(kernel_args + (num_work_size_bytes / 4), input, program->input_size);
  
-       r600_upload_const_buffer(rctx, &kernel_args_buffer, (uint8_t*)kernel_args,
+       si_upload_const_buffer(sctx, &kernel_args_buffer, (uint8_t*)kernel_args,
                                         kernel_args_size, &kernel_args_offset);
         kernel_args_va = r600_resource_va(ctx->screen,
                                 (struct pipe_resource*)kernel_args_buffer);
         kernel_args_va += kernel_args_offset;
  
-       si_pm4_add_bo(pm4, kernel_args_buffer, RADEON_USAGE_READ);
+       si_pm4_add_bo(pm4, kernel_args_buffer, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
  
         si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0, kernel_args_va);
         si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 4, S_008F04_BASE_ADDRESS_HI (kernel_args_va >> 32) | S_008F04_STRIDE(0));
@@ -163,7 +192,7 @@ static void radeonsi_launch_grid(
                 if (!buffer) {
                         continue;
                 }
-               si_pm4_add_bo(pm4, buffer, RADEON_USAGE_READWRITE);
+               si_pm4_add_bo(pm4, buffer, RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RESOURCE_RW);
         }
  
         /* This register has been moved to R_00CD20_COMPUTE_MAX_WAVE_ID
@@ -171,7 +200,7 @@ static void radeonsi_launch_grid(
          * kernel if we want to use something other than the default value,
          * which is now 0x22f.
          */
-       if (rctx->b.chip_class <= SI) {
+       if (sctx->b.chip_class <= SI) {
                 /* XXX: This should be:
                  * (number of compute units) * 4 * (waves per simd) - 1 */
  
@@ -180,7 +209,7 @@ static void radeonsi_launch_grid(
         }
  
         shader_va = r600_resource_va(ctx->screen, (void *)shader->bo);
-       si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ);
+       si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
         si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, (shader_va >> 8) & 0xffffffff);
         si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, shader_va >> 40);
  
@@ -204,7 +233,7 @@ static void radeonsi_launch_grid(
          * the shader and 4 bytes allocated by the state tracker, then
          * we will set LDS_SIZE to 512 bytes rather than 256.
          */
-       if (rctx->b.chip_class <= SI) {
+       if (sctx->b.chip_class <= SI) {
                 lds_blocks += align(program->local_size, 256) >> 8;
         } else {
                 lds_blocks += align(program->local_size, 512) >> 9;
@@ -250,12 +279,12 @@ static void radeonsi_launch_grid(
         si_pm4_inval_shader_cache(pm4);
         si_cmd_surface_sync(pm4, pm4->cp_coher_cntl);
  
-       si_pm4_emit(rctx, pm4);
+       si_pm4_emit(sctx, pm4);
  
  #if 0
-       fprintf(stderr, "cdw: %i\n", rctx->cs->cdw);
-       for (i = 0; i < rctx->cs->cdw; i++) {
-               fprintf(stderr, "%4i : 0x%08X\n", i, rctx->cs->buf[i]);
+       fprintf(stderr, "cdw: %i\n", sctx->cs->cdw);
+       for (i = 0; i < sctx->cs->cdw; i++) {
+               fprintf(stderr, "%4i : 0x%08X\n", i, sctx->cs->buf[i]);
         }
  #endif
  
@@ -272,6 +301,12 @@ static void si_delete_compute_state(struct pipe_context *ctx, void* state){
         }
  
         if (program->kernels) {
+               for (int i = 0; i < program->num_kernels; i++){
+                       if (program->kernels[i].bo){
+                               si_pipe_shader_destroy(ctx, &program->kernels[i]);
+                       }
+               }
+               
                 FREE(program->kernels);
         }
  
@@ -287,13 +322,13 @@ static void si_set_compute_resources(struct pipe_context * ctx_,
                 unsigned start, unsigned count,
                 struct pipe_surface ** surfaces) { }
  
-void si_init_compute_functions(struct si_context *rctx)
+void si_init_compute_functions(struct si_context *sctx)
  {
-       rctx->b.b.create_compute_state = radeonsi_create_compute_state;
-       rctx->b.b.delete_compute_state = si_delete_compute_state;
-       rctx->b.b.bind_compute_state = radeonsi_bind_compute_state;
+       sctx->b.b.create_compute_state = si_create_compute_state;
+       sctx->b.b.delete_compute_state = si_delete_compute_state;
+       sctx->b.b.bind_compute_state = si_bind_compute_state;
  /*      ctx->context.create_sampler_view = evergreen_compute_create_sampler_view; */
-       rctx->b.b.set_compute_resources = si_set_compute_resources;
-       rctx->b.b.set_global_binding = radeonsi_set_global_binding;
-       rctx->b.b.launch_grid = radeonsi_launch_grid;
+       sctx->b.b.set_compute_resources = si_set_compute_resources;
+       sctx->b.b.set_global_binding = si_set_global_binding;
+       sctx->b.b.launch_grid = si_launch_grid;
  }