llvmpipe: fix race between draw and setting fragment shader.

[mesa.git] / src / gallium / drivers / llvmpipe / lp_state_cs.c
diff --git a/src/gallium/drivers/llvmpipe/lp_state_cs.c b/src/gallium/drivers/llvmpipe/lp_state_cs.c

index f4f021742d69aa288afb2ac7571a6b90afcdaf89..38210d444f55013671969a2a843227608a1518bd 100644 (file)
--- a/src/gallium/drivers/llvmpipe/lp_state_cs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_cs.c
@@ -46,11 +46,12 @@
  #include "lp_cs_tpool.h"
  #include "state_tracker/sw_winsys.h"
  #include "nir/nir_to_tgsi_info.h"
-
+#include "nir_serialize.h"
  struct lp_cs_job_info {
     unsigned grid_size[3];
     unsigned block_size[3];
     unsigned req_local_mem;
+   unsigned work_dim;
     struct lp_cs_exec *current;
  };
  
@@ -62,14 +63,14 @@ generate_compute(struct llvmpipe_context *lp,
     struct gallivm_state *gallivm = variant->gallivm;
     const struct lp_compute_shader_variant_key *key = &variant->key;
     char func_name[64], func_name_coro[64];
-   LLVMTypeRef arg_types[13];
+   LLVMTypeRef arg_types[17];
     LLVMTypeRef func_type, coro_func_type;
     LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
     LLVMValueRef context_ptr;
     LLVMValueRef x_size_arg, y_size_arg, z_size_arg;
     LLVMValueRef grid_x_arg, grid_y_arg, grid_z_arg;
     LLVMValueRef grid_size_x_arg, grid_size_y_arg, grid_size_z_arg;
-   LLVMValueRef thread_data_ptr;
+   LLVMValueRef work_dim_arg, thread_data_ptr;
     LLVMBasicBlockRef block;
     LLVMBuilderRef builder;
     struct lp_build_sampler_soa *sampler;
@@ -107,11 +108,15 @@ generate_compute(struct llvmpipe_context *lp,
     arg_types[7] = int32_type;                          /* grid_size_x */
     arg_types[8] = int32_type;                          /* grid_size_y */
     arg_types[9] = int32_type;                          /* grid_size_z */
-   arg_types[10] = variant->jit_cs_thread_data_ptr_type;  /* per thread data */
-   arg_types[11] = int32_type;
-   arg_types[12] = int32_type;
+   arg_types[10] = int32_type;                         /* work dim */
+   arg_types[11] = variant->jit_cs_thread_data_ptr_type;  /* per thread data */
+   arg_types[12] = int32_type;                         /* coro only - num X loops */
+   arg_types[13] = int32_type;                         /* coro only - partials */
+   arg_types[14] = int32_type;                         /* coro block_x_size */
+   arg_types[15] = int32_type;                         /* coro block_y_size */
+   arg_types[16] = int32_type;                         /* coro block_z_size */
     func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context),
-                                arg_types, ARRAY_SIZE(arg_types) - 2, 0);
+                                arg_types, ARRAY_SIZE(arg_types) - 5, 0);
  
     coro_func_type = LLVMFunctionType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0),
                                       arg_types, ARRAY_SIZE(arg_types), 0);
@@ -141,7 +146,8 @@ generate_compute(struct llvmpipe_context *lp,
     grid_size_x_arg = LLVMGetParam(function, 7);
     grid_size_y_arg = LLVMGetParam(function, 8);
     grid_size_z_arg = LLVMGetParam(function, 9);
-   thread_data_ptr  = LLVMGetParam(function, 10);
+   work_dim_arg = LLVMGetParam(function, 10);
+   thread_data_ptr  = LLVMGetParam(function, 11);
  
     lp_build_name(context_ptr, "context");
     lp_build_name(x_size_arg, "x_size");
@@ -153,6 +159,7 @@ generate_compute(struct llvmpipe_context *lp,
     lp_build_name(grid_size_x_arg, "grid_size_x");
     lp_build_name(grid_size_y_arg, "grid_size_y");
     lp_build_name(grid_size_z_arg, "grid_size_z");
+   lp_build_name(work_dim_arg, "work_dim");
     lp_build_name(thread_data_ptr, "thread_data");
  
     block = LLVMAppendBasicBlockInContext(gallivm->context, function, "entry");
@@ -193,7 +200,7 @@ generate_compute(struct llvmpipe_context *lp,
     lp_build_loop_begin(&loop_state[0], gallivm,
                         lp_build_const_int32(gallivm, 0)); /* x loop */
     {
-      LLVMValueRef args[13];
+      LLVMValueRef args[17];
        args[0] = context_ptr;
        args[1] = loop_state[0].counter;
        args[2] = loop_state[1].counter;
@@ -204,9 +211,13 @@ generate_compute(struct llvmpipe_context *lp,
        args[7] = grid_size_x_arg;
        args[8] = grid_size_y_arg;
        args[9] = grid_size_z_arg;
-      args[10] = thread_data_ptr;
-      args[11] = num_x_loop;
-      args[12] = partials;
+      args[10] = work_dim_arg;
+      args[11] = thread_data_ptr;
+      args[12] = num_x_loop;
+      args[13] = partials;
+      args[14] = x_size_arg;
+      args[15] = y_size_arg;
+      args[16] = z_size_arg;
  
        /* idx = (z * (size_x * size_y) + y * size_x + x */
        LLVMValueRef coro_hdl_idx = LLVMBuildMul(gallivm->builder, loop_state[2].counter,
@@ -226,7 +237,7 @@ generate_compute(struct llvmpipe_context *lp,
                                         lp_build_const_int32(gallivm, 0), "");
        /* first time here - call the coroutine function entry point */
        lp_build_if(&ifstate, gallivm, cmp);
-      LLVMValueRef coro_ret = LLVMBuildCall(gallivm->builder, coro, args, 13, "");
+      LLVMValueRef coro_ret = LLVMBuildCall(gallivm->builder, coro, args, 17, "");
        LLVMBuildStore(gallivm->builder, coro_ret, coro_entry);
        lp_build_else(&ifstate);
        /* subsequent calls for this invocation - check if done. */
@@ -258,6 +269,7 @@ generate_compute(struct llvmpipe_context *lp,
     LLVMBuildRetVoid(builder);
  
     /* This is stage (b) - generate the compute shader code inside the coroutine. */
+   LLVMValueRef block_x_size_arg, block_y_size_arg, block_z_size_arg;
     context_ptr  = LLVMGetParam(coro, 0);
     x_size_arg = LLVMGetParam(coro, 1);
     y_size_arg = LLVMGetParam(coro, 2);
@@ -268,9 +280,13 @@ generate_compute(struct llvmpipe_context *lp,
     grid_size_x_arg = LLVMGetParam(coro, 7);
     grid_size_y_arg = LLVMGetParam(coro, 8);
     grid_size_z_arg = LLVMGetParam(coro, 9);
-   thread_data_ptr  = LLVMGetParam(coro, 10);
-   num_x_loop = LLVMGetParam(coro, 11);
-   partials = LLVMGetParam(coro, 12);
+   work_dim_arg = LLVMGetParam(coro, 10);
+   thread_data_ptr  = LLVMGetParam(coro, 11);
+   num_x_loop = LLVMGetParam(coro, 12);
+   partials = LLVMGetParam(coro, 13);
+   block_x_size_arg = LLVMGetParam(coro, 14);
+   block_y_size_arg = LLVMGetParam(coro, 15);
+   block_z_size_arg = LLVMGetParam(coro, 16);
     block = LLVMAppendBasicBlockInContext(gallivm->context, coro, "entry");
     LLVMPositionBuilderAtEnd(builder, block);
     {
@@ -320,6 +336,13 @@ generate_compute(struct llvmpipe_context *lp,
        for (i = 0; i < 3; i++)
           system_values.grid_size = LLVMBuildInsertElement(builder, system_values.grid_size, gstids[i], lp_build_const_int32(gallivm, i), "");
  
+      system_values.work_dim = work_dim_arg;
+
+      LLVMValueRef bsize[3] = { block_x_size_arg, block_y_size_arg, block_z_size_arg };
+      system_values.block_size = LLVMGetUndef(LLVMVectorType(int32_type, 3));
+      for (i = 0; i < 3; i++)
+         system_values.block_size = LLVMBuildInsertElement(builder, system_values.block_size, bsize[i], lp_build_const_int32(gallivm, i), "");
+
        LLVMValueRef last_x_loop = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, x_size_arg, LLVMBuildSub(gallivm->builder, num_x_loop, lp_build_const_int32(gallivm, 1), ""), "");
        LLVMValueRef use_partial_mask = LLVMBuildAnd(gallivm->builder, last_x_loop, has_partials, "");
        struct lp_build_if_state if_state;
@@ -403,15 +426,26 @@ llvmpipe_create_compute_state(struct pipe_context *pipe,
        return NULL;
  
     shader->base.type = templ->ir_type;
-   if (templ->ir_type == PIPE_SHADER_IR_TGSI) {
+   if (templ->ir_type == PIPE_SHADER_IR_NIR_SERIALIZED) {
+      struct blob_reader reader;
+      const struct pipe_binary_program_header *hdr = templ->prog;
+
+      blob_reader_init(&reader, hdr->blob, hdr->num_bytes);
+      shader->base.ir.nir = nir_deserialize(NULL, pipe->screen->get_compiler_options(pipe->screen, PIPE_SHADER_IR_NIR, PIPE_SHADER_COMPUTE), &reader);
+      shader->base.type = PIPE_SHADER_IR_NIR;
+
+      pipe->screen->finalize_nir(pipe->screen, shader->base.ir.nir, false);
+   } else if (templ->ir_type == PIPE_SHADER_IR_NIR)
+      shader->base.ir.nir = (struct nir_shader *)templ->prog;
+
+   if (shader->base.type == PIPE_SHADER_IR_TGSI) {
        /* get/save the summary info for this shader */
        lp_build_tgsi_info(templ->prog, &shader->info);
  
        /* we need to keep a local copy of the tokens */
        shader->base.tokens = tgsi_dup_tokens(templ->prog);
     } else {
-      shader->base.ir.nir = (struct nir_shader *)templ->prog;
-      nir_tgsi_scan_shader(templ->prog, &shader->info.base, false);
+      nir_tgsi_scan_shader(shader->base.ir.nir, &shader->info.base, false);
     }
  
     shader->req_local_mem = templ->req_local_mem;
@@ -476,6 +510,12 @@ llvmpipe_delete_compute_state(struct pipe_context *pipe,
     struct lp_compute_shader *shader = cs;
     struct lp_cs_variant_list_item *li;
  
+   if (llvmpipe->cs == cs)
+      llvmpipe->cs = NULL;
+   for (unsigned i = 0; i < shader->max_global_buffers; i++)
+      pipe_resource_reference(&shader->global_buffers[i], NULL);
+   FREE(shader->global_buffers);
+
     /* Delete all the variants */
     li = first_elem(&shader->variants);
     while(!at_end(&shader->variants, li)) {
@@ -483,6 +523,8 @@ llvmpipe_delete_compute_state(struct pipe_context *pipe,
        llvmpipe_remove_cs_shader_variant(llvmpipe, li->base);
        li = next;
     }
+   if (shader->base.ir.nir)
+      ralloc_free(shader->base.ir.nir);
     tgsi_free_tokens(shader->base.tokens);
     FREE(shader);
  }
@@ -820,6 +862,8 @@ lp_csctx_set_sampler_views(struct lp_cs_context *csctx,
                 jit_tex->mip_offsets[0] = 0;
                 jit_tex->row_stride[0] = 0;
                 jit_tex->img_stride[0] = 0;
+               jit_tex->num_samples = 0;
+               jit_tex->sample_stride = 0;
              }
              else {
                 jit_tex->width = res->width0;
@@ -827,6 +871,8 @@ lp_csctx_set_sampler_views(struct lp_cs_context *csctx,
                 jit_tex->depth = res->depth0;
                 jit_tex->first_level = first_level;
                 jit_tex->last_level = last_level;
+               jit_tex->num_samples = res->nr_samples;
+               jit_tex->sample_stride = 0;
  
                 if (llvmpipe_resource_is_texture(res)) {
                    for (j = first_level; j <= last_level; j++) {
@@ -834,6 +880,7 @@ lp_csctx_set_sampler_views(struct lp_cs_context *csctx,
                       jit_tex->row_stride[j] = lp_tex->row_stride[j];
                       jit_tex->img_stride[j] = lp_tex->img_stride[j];
                    }
+                  jit_tex->sample_stride = lp_tex->sample_stride;
  
                    if (res->target == PIPE_TEXTURE_1D_ARRAY ||
                        res->target == PIPE_TEXTURE_2D_ARRAY ||
@@ -893,6 +940,8 @@ lp_csctx_set_sampler_views(struct lp_cs_context *csctx,
              jit_tex->height = res->height0;
              jit_tex->depth = res->depth0;
              jit_tex->first_level = jit_tex->last_level = 0;
+            jit_tex->num_samples = res->nr_samples;
+            jit_tex->sample_stride = 0;
              assert(jit_tex->base);
           }
        }
@@ -1002,6 +1051,7 @@ lp_csctx_set_cs_images(struct lp_cs_context *csctx,
           jit_image->width = res->width0;
           jit_image->height = res->height0;
           jit_image->depth = res->depth0;
+         jit_image->num_samples = res->nr_samples;
  
           if (llvmpipe_resource_is_texture(res)) {
              uint32_t mip_offset = lp_res->mip_offsets[image->u.tex.level];
@@ -1027,6 +1077,7 @@ lp_csctx_set_cs_images(struct lp_cs_context *csctx,
  
              jit_image->row_stride = lp_res->row_stride[image->u.tex.level];
              jit_image->img_stride = lp_res->img_stride[image->u.tex.level];
+            jit_image->sample_stride = lp_res->sample_stride;
              jit_image->base = (uint8_t *)jit_image->base + mip_offset;
           } else {
              unsigned view_blocksize = util_format_get_blocksize(image->format);
@@ -1162,7 +1213,7 @@ cs_exec_fn(void *init_data, int iter_idx, struct lp_cs_local_mem *lmem)
     variant->jit_function(&job_info->current->jit_context,
                           job_info->block_size[0], job_info->block_size[1], job_info->block_size[2],
                           grid_x, grid_y, grid_z,
-                         job_info->grid_size[0], job_info->grid_size[1], job_info->grid_size[2],
+                         job_info->grid_size[0], job_info->grid_size[1], job_info->grid_size[2], job_info->work_dim,
                           &thread_data);
  }
  
@@ -1210,6 +1261,7 @@ static void llvmpipe_launch_grid(struct pipe_context *pipe,
     job_info.block_size[0] = info->block[0];
     job_info.block_size[1] = info->block[1];
     job_info.block_size[2] = info->block[2];
+   job_info.work_dim = info->work_dim;
     job_info.req_local_mem = llvmpipe->cs->req_local_mem;
     job_info.current = &llvmpipe->csctx->cs.current;
  
@@ -1225,12 +1277,62 @@ static void llvmpipe_launch_grid(struct pipe_context *pipe,
     llvmpipe->pipeline_statistics.cs_invocations += num_tasks * info->block[0] * info->block[1] * info->block[2];
  }
  
+static void
+llvmpipe_set_compute_resources(struct pipe_context *pipe,
+                               unsigned start, unsigned count,
+                               struct pipe_surface **resources)
+{
+
+
+}
+
+static void
+llvmpipe_set_global_binding(struct pipe_context *pipe,
+                            unsigned first, unsigned count,
+                            struct pipe_resource **resources,
+                            uint32_t **handles)
+{
+   struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+   struct lp_compute_shader *cs = llvmpipe->cs;
+   unsigned i;
+
+   if (first + count > cs->max_global_buffers) {
+      unsigned old_max = cs->max_global_buffers;
+      cs->max_global_buffers = first + count;
+      cs->global_buffers = realloc(cs->global_buffers,
+                                   cs->max_global_buffers * sizeof(cs->global_buffers[0]));
+      if (!cs->global_buffers) {
+         return;
+      }
+
+      memset(&cs->global_buffers[old_max], 0, (cs->max_global_buffers - old_max) * sizeof(cs->global_buffers[0]));
+   }
+
+   if (!resources) {
+      for (i = 0; i < count; i++)
+         pipe_resource_reference(&cs->global_buffers[first + i], NULL);
+      return;
+   }
+
+   for (i = 0; i < count; i++) {
+      uintptr_t va;
+      uint32_t offset;
+      pipe_resource_reference(&cs->global_buffers[first + i], resources[i]);
+      struct llvmpipe_resource *lp_res = llvmpipe_resource(resources[i]);
+      offset = *handles[i];
+      va = (uintptr_t)((char *)lp_res->data + offset);
+      memcpy(handles[i], &va, sizeof(va));
+   }
+}
+
  void
  llvmpipe_init_compute_funcs(struct llvmpipe_context *llvmpipe)
  {
     llvmpipe->pipe.create_compute_state = llvmpipe_create_compute_state;
     llvmpipe->pipe.bind_compute_state = llvmpipe_bind_compute_state;
     llvmpipe->pipe.delete_compute_state = llvmpipe_delete_compute_state;
+   llvmpipe->pipe.set_compute_resources = llvmpipe_set_compute_resources;
+   llvmpipe->pipe.set_global_binding = llvmpipe_set_global_binding;
     llvmpipe->pipe.launch_grid = llvmpipe_launch_grid;
  }