llvmpipe: fix race between draw and setting fragment shader.
[mesa.git] / src / gallium / drivers / llvmpipe / lp_state_cs.c
index f4f021742d69aa288afb2ac7571a6b90afcdaf89..38210d444f55013671969a2a843227608a1518bd 100644 (file)
 #include "lp_cs_tpool.h"
 #include "state_tracker/sw_winsys.h"
 #include "nir/nir_to_tgsi_info.h"
-
+#include "nir_serialize.h"
 struct lp_cs_job_info {
    unsigned grid_size[3];
    unsigned block_size[3];
    unsigned req_local_mem;
+   unsigned work_dim;
    struct lp_cs_exec *current;
 };
 
@@ -62,14 +63,14 @@ generate_compute(struct llvmpipe_context *lp,
    struct gallivm_state *gallivm = variant->gallivm;
    const struct lp_compute_shader_variant_key *key = &variant->key;
    char func_name[64], func_name_coro[64];
-   LLVMTypeRef arg_types[13];
+   LLVMTypeRef arg_types[17];
    LLVMTypeRef func_type, coro_func_type;
    LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
    LLVMValueRef context_ptr;
    LLVMValueRef x_size_arg, y_size_arg, z_size_arg;
    LLVMValueRef grid_x_arg, grid_y_arg, grid_z_arg;
    LLVMValueRef grid_size_x_arg, grid_size_y_arg, grid_size_z_arg;
-   LLVMValueRef thread_data_ptr;
+   LLVMValueRef work_dim_arg, thread_data_ptr;
    LLVMBasicBlockRef block;
    LLVMBuilderRef builder;
    struct lp_build_sampler_soa *sampler;
@@ -107,11 +108,15 @@ generate_compute(struct llvmpipe_context *lp,
    arg_types[7] = int32_type;                          /* grid_size_x */
    arg_types[8] = int32_type;                          /* grid_size_y */
    arg_types[9] = int32_type;                          /* grid_size_z */
-   arg_types[10] = variant->jit_cs_thread_data_ptr_type;  /* per thread data */
-   arg_types[11] = int32_type;
-   arg_types[12] = int32_type;
+   arg_types[10] = int32_type;                         /* work dim */
+   arg_types[11] = variant->jit_cs_thread_data_ptr_type;  /* per thread data */
+   arg_types[12] = int32_type;                         /* coro only - num X loops */
+   arg_types[13] = int32_type;                         /* coro only - partials */
+   arg_types[14] = int32_type;                         /* coro block_x_size */
+   arg_types[15] = int32_type;                         /* coro block_y_size */
+   arg_types[16] = int32_type;                         /* coro block_z_size */
    func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context),
-                                arg_types, ARRAY_SIZE(arg_types) - 2, 0);
+                                arg_types, ARRAY_SIZE(arg_types) - 5, 0);
 
    coro_func_type = LLVMFunctionType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0),
                                      arg_types, ARRAY_SIZE(arg_types), 0);
@@ -141,7 +146,8 @@ generate_compute(struct llvmpipe_context *lp,
    grid_size_x_arg = LLVMGetParam(function, 7);
    grid_size_y_arg = LLVMGetParam(function, 8);
    grid_size_z_arg = LLVMGetParam(function, 9);
-   thread_data_ptr  = LLVMGetParam(function, 10);
+   work_dim_arg = LLVMGetParam(function, 10);
+   thread_data_ptr  = LLVMGetParam(function, 11);
 
    lp_build_name(context_ptr, "context");
    lp_build_name(x_size_arg, "x_size");
@@ -153,6 +159,7 @@ generate_compute(struct llvmpipe_context *lp,
    lp_build_name(grid_size_x_arg, "grid_size_x");
    lp_build_name(grid_size_y_arg, "grid_size_y");
    lp_build_name(grid_size_z_arg, "grid_size_z");
+   lp_build_name(work_dim_arg, "work_dim");
    lp_build_name(thread_data_ptr, "thread_data");
 
    block = LLVMAppendBasicBlockInContext(gallivm->context, function, "entry");
@@ -193,7 +200,7 @@ generate_compute(struct llvmpipe_context *lp,
    lp_build_loop_begin(&loop_state[0], gallivm,
                        lp_build_const_int32(gallivm, 0)); /* x loop */
    {
-      LLVMValueRef args[13];
+      LLVMValueRef args[17];
       args[0] = context_ptr;
       args[1] = loop_state[0].counter;
       args[2] = loop_state[1].counter;
@@ -204,9 +211,13 @@ generate_compute(struct llvmpipe_context *lp,
       args[7] = grid_size_x_arg;
       args[8] = grid_size_y_arg;
       args[9] = grid_size_z_arg;
-      args[10] = thread_data_ptr;
-      args[11] = num_x_loop;
-      args[12] = partials;
+      args[10] = work_dim_arg;
+      args[11] = thread_data_ptr;
+      args[12] = num_x_loop;
+      args[13] = partials;
+      args[14] = x_size_arg;
+      args[15] = y_size_arg;
+      args[16] = z_size_arg;
 
       /* idx = (z * (size_x * size_y) + y * size_x + x */
       LLVMValueRef coro_hdl_idx = LLVMBuildMul(gallivm->builder, loop_state[2].counter,
@@ -226,7 +237,7 @@ generate_compute(struct llvmpipe_context *lp,
                                        lp_build_const_int32(gallivm, 0), "");
       /* first time here - call the coroutine function entry point */
       lp_build_if(&ifstate, gallivm, cmp);
-      LLVMValueRef coro_ret = LLVMBuildCall(gallivm->builder, coro, args, 13, "");
+      LLVMValueRef coro_ret = LLVMBuildCall(gallivm->builder, coro, args, 17, "");
       LLVMBuildStore(gallivm->builder, coro_ret, coro_entry);
       lp_build_else(&ifstate);
       /* subsequent calls for this invocation - check if done. */
@@ -258,6 +269,7 @@ generate_compute(struct llvmpipe_context *lp,
    LLVMBuildRetVoid(builder);
 
    /* This is stage (b) - generate the compute shader code inside the coroutine. */
+   LLVMValueRef block_x_size_arg, block_y_size_arg, block_z_size_arg;
    context_ptr  = LLVMGetParam(coro, 0);
    x_size_arg = LLVMGetParam(coro, 1);
    y_size_arg = LLVMGetParam(coro, 2);
@@ -268,9 +280,13 @@ generate_compute(struct llvmpipe_context *lp,
    grid_size_x_arg = LLVMGetParam(coro, 7);
    grid_size_y_arg = LLVMGetParam(coro, 8);
    grid_size_z_arg = LLVMGetParam(coro, 9);
-   thread_data_ptr  = LLVMGetParam(coro, 10);
-   num_x_loop = LLVMGetParam(coro, 11);
-   partials = LLVMGetParam(coro, 12);
+   work_dim_arg = LLVMGetParam(coro, 10);
+   thread_data_ptr  = LLVMGetParam(coro, 11);
+   num_x_loop = LLVMGetParam(coro, 12);
+   partials = LLVMGetParam(coro, 13);
+   block_x_size_arg = LLVMGetParam(coro, 14);
+   block_y_size_arg = LLVMGetParam(coro, 15);
+   block_z_size_arg = LLVMGetParam(coro, 16);
    block = LLVMAppendBasicBlockInContext(gallivm->context, coro, "entry");
    LLVMPositionBuilderAtEnd(builder, block);
    {
@@ -320,6 +336,13 @@ generate_compute(struct llvmpipe_context *lp,
       for (i = 0; i < 3; i++)
          system_values.grid_size = LLVMBuildInsertElement(builder, system_values.grid_size, gstids[i], lp_build_const_int32(gallivm, i), "");
 
+      system_values.work_dim = work_dim_arg;
+
+      LLVMValueRef bsize[3] = { block_x_size_arg, block_y_size_arg, block_z_size_arg };
+      system_values.block_size = LLVMGetUndef(LLVMVectorType(int32_type, 3));
+      for (i = 0; i < 3; i++)
+         system_values.block_size = LLVMBuildInsertElement(builder, system_values.block_size, bsize[i], lp_build_const_int32(gallivm, i), "");
+
       LLVMValueRef last_x_loop = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, x_size_arg, LLVMBuildSub(gallivm->builder, num_x_loop, lp_build_const_int32(gallivm, 1), ""), "");
       LLVMValueRef use_partial_mask = LLVMBuildAnd(gallivm->builder, last_x_loop, has_partials, "");
       struct lp_build_if_state if_state;
@@ -403,15 +426,26 @@ llvmpipe_create_compute_state(struct pipe_context *pipe,
       return NULL;
 
    shader->base.type = templ->ir_type;
-   if (templ->ir_type == PIPE_SHADER_IR_TGSI) {
+   if (templ->ir_type == PIPE_SHADER_IR_NIR_SERIALIZED) {
+      struct blob_reader reader;
+      const struct pipe_binary_program_header *hdr = templ->prog;
+
+      blob_reader_init(&reader, hdr->blob, hdr->num_bytes);
+      shader->base.ir.nir = nir_deserialize(NULL, pipe->screen->get_compiler_options(pipe->screen, PIPE_SHADER_IR_NIR, PIPE_SHADER_COMPUTE), &reader);
+      shader->base.type = PIPE_SHADER_IR_NIR;
+
+      pipe->screen->finalize_nir(pipe->screen, shader->base.ir.nir, false);
+   } else if (templ->ir_type == PIPE_SHADER_IR_NIR)
+      shader->base.ir.nir = (struct nir_shader *)templ->prog;
+
+   if (shader->base.type == PIPE_SHADER_IR_TGSI) {
       /* get/save the summary info for this shader */
       lp_build_tgsi_info(templ->prog, &shader->info);
 
       /* we need to keep a local copy of the tokens */
       shader->base.tokens = tgsi_dup_tokens(templ->prog);
    } else {
-      shader->base.ir.nir = (struct nir_shader *)templ->prog;
-      nir_tgsi_scan_shader(templ->prog, &shader->info.base, false);
+      nir_tgsi_scan_shader(shader->base.ir.nir, &shader->info.base, false);
    }
 
    shader->req_local_mem = templ->req_local_mem;
@@ -476,6 +510,12 @@ llvmpipe_delete_compute_state(struct pipe_context *pipe,
    struct lp_compute_shader *shader = cs;
    struct lp_cs_variant_list_item *li;
 
+   if (llvmpipe->cs == cs)
+      llvmpipe->cs = NULL;
+   for (unsigned i = 0; i < shader->max_global_buffers; i++)
+      pipe_resource_reference(&shader->global_buffers[i], NULL);
+   FREE(shader->global_buffers);
+
    /* Delete all the variants */
    li = first_elem(&shader->variants);
    while(!at_end(&shader->variants, li)) {
@@ -483,6 +523,8 @@ llvmpipe_delete_compute_state(struct pipe_context *pipe,
       llvmpipe_remove_cs_shader_variant(llvmpipe, li->base);
       li = next;
    }
+   if (shader->base.ir.nir)
+      ralloc_free(shader->base.ir.nir);
    tgsi_free_tokens(shader->base.tokens);
    FREE(shader);
 }
@@ -820,6 +862,8 @@ lp_csctx_set_sampler_views(struct lp_cs_context *csctx,
                jit_tex->mip_offsets[0] = 0;
                jit_tex->row_stride[0] = 0;
                jit_tex->img_stride[0] = 0;
+               jit_tex->num_samples = 0;
+               jit_tex->sample_stride = 0;
             }
             else {
                jit_tex->width = res->width0;
@@ -827,6 +871,8 @@ lp_csctx_set_sampler_views(struct lp_cs_context *csctx,
                jit_tex->depth = res->depth0;
                jit_tex->first_level = first_level;
                jit_tex->last_level = last_level;
+               jit_tex->num_samples = res->nr_samples;
+               jit_tex->sample_stride = 0;
 
                if (llvmpipe_resource_is_texture(res)) {
                   for (j = first_level; j <= last_level; j++) {
@@ -834,6 +880,7 @@ lp_csctx_set_sampler_views(struct lp_cs_context *csctx,
                      jit_tex->row_stride[j] = lp_tex->row_stride[j];
                      jit_tex->img_stride[j] = lp_tex->img_stride[j];
                   }
+                  jit_tex->sample_stride = lp_tex->sample_stride;
 
                   if (res->target == PIPE_TEXTURE_1D_ARRAY ||
                       res->target == PIPE_TEXTURE_2D_ARRAY ||
@@ -893,6 +940,8 @@ lp_csctx_set_sampler_views(struct lp_cs_context *csctx,
             jit_tex->height = res->height0;
             jit_tex->depth = res->depth0;
             jit_tex->first_level = jit_tex->last_level = 0;
+            jit_tex->num_samples = res->nr_samples;
+            jit_tex->sample_stride = 0;
             assert(jit_tex->base);
          }
       }
@@ -1002,6 +1051,7 @@ lp_csctx_set_cs_images(struct lp_cs_context *csctx,
          jit_image->width = res->width0;
          jit_image->height = res->height0;
          jit_image->depth = res->depth0;
+         jit_image->num_samples = res->nr_samples;
 
          if (llvmpipe_resource_is_texture(res)) {
             uint32_t mip_offset = lp_res->mip_offsets[image->u.tex.level];
@@ -1027,6 +1077,7 @@ lp_csctx_set_cs_images(struct lp_cs_context *csctx,
 
             jit_image->row_stride = lp_res->row_stride[image->u.tex.level];
             jit_image->img_stride = lp_res->img_stride[image->u.tex.level];
+            jit_image->sample_stride = lp_res->sample_stride;
             jit_image->base = (uint8_t *)jit_image->base + mip_offset;
          } else {
             unsigned view_blocksize = util_format_get_blocksize(image->format);
@@ -1162,7 +1213,7 @@ cs_exec_fn(void *init_data, int iter_idx, struct lp_cs_local_mem *lmem)
    variant->jit_function(&job_info->current->jit_context,
                          job_info->block_size[0], job_info->block_size[1], job_info->block_size[2],
                          grid_x, grid_y, grid_z,
-                         job_info->grid_size[0], job_info->grid_size[1], job_info->grid_size[2],
+                         job_info->grid_size[0], job_info->grid_size[1], job_info->grid_size[2], job_info->work_dim,
                          &thread_data);
 }
 
@@ -1210,6 +1261,7 @@ static void llvmpipe_launch_grid(struct pipe_context *pipe,
    job_info.block_size[0] = info->block[0];
    job_info.block_size[1] = info->block[1];
    job_info.block_size[2] = info->block[2];
+   job_info.work_dim = info->work_dim;
    job_info.req_local_mem = llvmpipe->cs->req_local_mem;
    job_info.current = &llvmpipe->csctx->cs.current;
 
@@ -1225,12 +1277,62 @@ static void llvmpipe_launch_grid(struct pipe_context *pipe,
    llvmpipe->pipeline_statistics.cs_invocations += num_tasks * info->block[0] * info->block[1] * info->block[2];
 }
 
+static void
+llvmpipe_set_compute_resources(struct pipe_context *pipe,
+                               unsigned start, unsigned count,
+                               struct pipe_surface **resources)
+{
+
+
+}
+
+static void
+llvmpipe_set_global_binding(struct pipe_context *pipe,
+                            unsigned first, unsigned count,
+                            struct pipe_resource **resources,
+                            uint32_t **handles)
+{
+   struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+   struct lp_compute_shader *cs = llvmpipe->cs;
+   unsigned i;
+
+   if (first + count > cs->max_global_buffers) {
+      unsigned old_max = cs->max_global_buffers;
+      cs->max_global_buffers = first + count;
+      cs->global_buffers = realloc(cs->global_buffers,
+                                   cs->max_global_buffers * sizeof(cs->global_buffers[0]));
+      if (!cs->global_buffers) {
+         return;
+      }
+
+      memset(&cs->global_buffers[old_max], 0, (cs->max_global_buffers - old_max) * sizeof(cs->global_buffers[0]));
+   }
+
+   if (!resources) {
+      for (i = 0; i < count; i++)
+         pipe_resource_reference(&cs->global_buffers[first + i], NULL);
+      return;
+   }
+
+   for (i = 0; i < count; i++) {
+      uintptr_t va;
+      uint32_t offset;
+      pipe_resource_reference(&cs->global_buffers[first + i], resources[i]);
+      struct llvmpipe_resource *lp_res = llvmpipe_resource(resources[i]);
+      offset = *handles[i];
+      va = (uintptr_t)((char *)lp_res->data + offset);
+      memcpy(handles[i], &va, sizeof(va));
+   }
+}
+
 void
 llvmpipe_init_compute_funcs(struct llvmpipe_context *llvmpipe)
 {
    llvmpipe->pipe.create_compute_state = llvmpipe_create_compute_state;
    llvmpipe->pipe.bind_compute_state = llvmpipe_bind_compute_state;
    llvmpipe->pipe.delete_compute_state = llvmpipe_delete_compute_state;
+   llvmpipe->pipe.set_compute_resources = llvmpipe_set_compute_resources;
+   llvmpipe->pipe.set_global_binding = llvmpipe_set_global_binding;
    llvmpipe->pipe.launch_grid = llvmpipe_launch_grid;
 }