llvmpipe: pass number of samplers into llvm sampler code.
[mesa.git] / src / gallium / drivers / llvmpipe / lp_state_fs.c
index f09c49a1ff37fd9acbe9195ccb2aeab2b63ae5cc..1d57f2553d24c49df93f9ac2947fece784828e5e 100644 (file)
 #include "lp_rast.h"
 #include "nir/nir_to_tgsi_info.h"
 
+#include "lp_screen.h"
+#include "compiler/nir/nir_serialize.h"
+#include "util/mesa-sha1.h"
 /** Fragment shader number (for debugging) */
 static unsigned fs_no = 0;
 
@@ -318,6 +321,33 @@ lp_build_sample_alpha_to_coverage(struct gallivm_state *gallivm,
       s_mask = LLVMBuildAnd(builder, s_mask, test, "");
       LLVMBuildStore(builder, s_mask, s_mask_ptr);
    }
+};
+
+struct lp_build_fs_llvm_iface {
+   struct lp_build_fs_iface base;
+   struct lp_build_interp_soa_context *interp;
+   struct lp_build_for_loop_state *loop_state;
+   LLVMValueRef mask_store;
+};
+
+static LLVMValueRef fs_interp(const struct lp_build_fs_iface *iface,
+                              struct lp_build_context *bld,
+                              unsigned attrib, unsigned chan,
+                              bool centroid, bool sample,
+                              LLVMValueRef attrib_indir,
+                              LLVMValueRef offsets[2])
+{
+   struct lp_build_fs_llvm_iface *fs_iface = (struct lp_build_fs_llvm_iface *)iface;
+   struct lp_build_interp_soa_context *interp = fs_iface->interp;
+   unsigned loc = TGSI_INTERPOLATE_LOC_CENTER;
+   if (centroid)
+      loc = TGSI_INTERPOLATE_LOC_CENTROID;
+   if (sample)
+      loc = TGSI_INTERPOLATE_LOC_SAMPLE;
+
+   return lp_build_interp_soa(interp, bld->gallivm, fs_iface->loop_state->counter,
+                              fs_iface->mask_store,
+                              attrib, chan, loc, attrib_indir, offsets);
 }
 
 /**
@@ -444,12 +474,17 @@ generate_fs_loop(struct gallivm_state *gallivm,
 
    memset(outputs, 0, sizeof outputs);
 
+   /* Allocate color storage for each fragment sample */
+   LLVMValueRef color_store_size = num_loop;
+   if (key->min_samples > 1)
+      color_store_size = LLVMBuildMul(builder, num_loop, lp_build_const_int32(gallivm, key->min_samples), "");
+
    for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
       for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
          out_color[cbuf][chan] = lp_build_array_alloca(gallivm,
                                                        lp_build_vec_type(gallivm,
                                                                          type),
-                                                       num_loop, "color");
+                                                       color_store_size, "color");
       }
    }
    if (dual_source_blend) {
@@ -458,7 +493,7 @@ generate_fs_loop(struct gallivm_state *gallivm,
          out_color[1][chan] = lp_build_array_alloca(gallivm,
                                                     lp_build_vec_type(gallivm,
                                                                       type),
-                                                    num_loop, "color1");
+                                                    color_store_size, "color1");
       }
    }
 
@@ -468,7 +503,9 @@ generate_fs_loop(struct gallivm_state *gallivm,
                            num_loop,
                            lp_build_const_int32(gallivm, 1));
 
+   LLVMValueRef sample_mask_in;
    if (key->multisample) {
+      sample_mask_in = lp_build_const_int_vec(gallivm, type, 0);
       /* create shader execution mask by combining all sample masks. */
       for (unsigned s = 0; s < key->coverage_samples; s++) {
          LLVMValueRef s_mask_idx = LLVMBuildMul(builder, num_loop, lp_build_const_int32(gallivm, s), "");
@@ -478,11 +515,18 @@ generate_fs_loop(struct gallivm_state *gallivm,
             mask_val = s_mask;
          else
             mask_val = LLVMBuildOr(builder, s_mask, mask_val, "");
+
+         LLVMValueRef mask_in = LLVMBuildAnd(builder, s_mask, lp_build_const_int_vec(gallivm, type, (1 << s)), "");
+         sample_mask_in = LLVMBuildOr(builder, sample_mask_in, mask_in, "");
       }
    } else {
+      sample_mask_in = lp_build_const_int_vec(gallivm, type, 1);
       mask_ptr = LLVMBuildGEP(builder, mask_store,
                               &loop_state.counter, 1, "mask_ptr");
       mask_val = LLVMBuildLoad(builder, mask_ptr, "");
+
+      LLVMValueRef mask_in = LLVMBuildAnd(builder, mask_val, lp_build_const_int_vec(gallivm, type, 1), "");
+      sample_mask_in = LLVMBuildOr(builder, sample_mask_in, mask_in, "");
    }
 
    /* 'mask' will control execution based on quad's pixel alive/killed state */
@@ -615,23 +659,55 @@ generate_fs_loop(struct gallivm_state *gallivm,
       tmp_s_mask_or = LLVMBuildLoad(builder, s_mask_or, "");
       lp_build_mask_update(&mask, tmp_s_mask_or);
 
-      /* for multisample Z needs to be re interpolated at pixel center */
-      lp_build_interp_soa_update_pos_dyn(interp, gallivm, loop_state.counter, NULL);
+      if (key->min_samples == 1) {
+         /* for multisample Z needs to be re interpolated at pixel center */
+         lp_build_interp_soa_update_pos_dyn(interp, gallivm, loop_state.counter, NULL);
+         lp_build_mask_update(&mask, tmp_s_mask_or);
+      }
    }
 
    LLVMValueRef out_sample_mask_storage = NULL;
    if (shader->info.base.writes_samplemask) {
       out_sample_mask_storage = lp_build_alloca(gallivm, int_vec_type, "write_mask");
+      if (key->min_samples > 1)
+         LLVMBuildStore(builder, LLVMConstNull(int_vec_type), out_sample_mask_storage);
    }
+
+   if (key->multisample && key->min_samples > 1) {
+      lp_build_for_loop_begin(&sample_loop_state, gallivm,
+                              lp_build_const_int32(gallivm, 0),
+                              LLVMIntULT,
+                              lp_build_const_int32(gallivm, key->min_samples),
+                              lp_build_const_int32(gallivm, 1));
+
+      LLVMValueRef s_mask_idx = LLVMBuildMul(builder, sample_loop_state.counter, num_loop, "");
+      s_mask_idx = LLVMBuildAdd(builder, s_mask_idx, loop_state.counter, "");
+      s_mask_ptr = LLVMBuildGEP(builder, mask_store, &s_mask_idx, 1, "");
+      s_mask = LLVMBuildLoad(builder, s_mask_ptr, "");
+      lp_build_mask_force(&mask, s_mask);
+      lp_build_interp_soa_update_pos_dyn(interp, gallivm, loop_state.counter, sample_loop_state.counter);
+      system_values.sample_id = sample_loop_state.counter;
+   } else
+      system_values.sample_id = lp_build_const_int32(gallivm, 0);
+
+   system_values.sample_mask_in = sample_mask_in;
    system_values.sample_pos = sample_pos_array;
 
-   lp_build_interp_soa_update_inputs_dyn(interp, gallivm, loop_state.counter, mask_store, NULL);
+   lp_build_interp_soa_update_inputs_dyn(interp, gallivm, loop_state.counter, mask_store, sample_loop_state.counter);
+
+   struct lp_build_fs_llvm_iface fs_iface = {
+     .base.interp_fn = fs_interp,
+     .interp = interp,
+     .loop_state = &loop_state,
+     .mask_store = mask_store,
+   };
 
    struct lp_build_tgsi_params params;
    memset(&params, 0, sizeof(params));
 
    params.type = type;
    params.mask = &mask;
+   params.fs_iface = &fs_iface.base;
    params.consts_ptr = consts_ptr;
    params.const_sizes_ptr = num_consts_ptr;
    params.system_values = &system_values;
@@ -714,18 +790,51 @@ generate_fs_loop(struct gallivm_state *gallivm,
 
       assert(smaski >= 0);
       output_smask = LLVMBuildLoad(builder, outputs[smaski][0], "smask");
-      /*
-       * Pixel is alive according to the first sample in the mask.
-       */
       output_smask = LLVMBuildBitCast(builder, output_smask, smask_bld.vec_type, "");
-      if (!key->multisample) {
-         output_smask = lp_build_and(&smask_bld, output_smask, smask_bld.one);
-         output_smask = lp_build_cmp(&smask_bld, PIPE_FUNC_NOTEQUAL, output_smask, smask_bld.zero);
-         lp_build_mask_update(&mask, output_smask);
+
+      if (key->min_samples > 1) {
+         /* only the bit corresponding to this sample is to be used. */
+         LLVMValueRef tmp_mask = LLVMBuildLoad(builder, out_sample_mask_storage, "tmp_mask");
+         LLVMValueRef out_smask_idx = LLVMBuildShl(builder, lp_build_const_int32(gallivm, 1), sample_loop_state.counter, "");
+         LLVMValueRef smask_bit = LLVMBuildAnd(builder, output_smask, lp_build_broadcast(gallivm, int_vec_type, out_smask_idx), "");
+         output_smask = LLVMBuildOr(builder, tmp_mask, smask_bit, "");
       }
+
       LLVMBuildStore(builder, output_smask, out_sample_mask_storage);
    }
 
+   /* Color write - per fragment sample */
+   for (attrib = 0; attrib < shader->info.base.num_outputs; ++attrib)
+   {
+      unsigned cbuf = shader->info.base.output_semantic_index[attrib];
+      if ((shader->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR) &&
+           ((cbuf < key->nr_cbufs) || (cbuf == 1 && dual_source_blend)))
+      {
+         for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
+            if(outputs[attrib][chan]) {
+               /* XXX: just initialize outputs to point at colors[] and
+                * skip this.
+                */
+               LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
+               LLVMValueRef color_ptr;
+               LLVMValueRef color_idx = loop_state.counter;
+               if (key->min_samples > 1)
+                  color_idx = LLVMBuildAdd(builder, color_idx,
+                                           LLVMBuildMul(builder, sample_loop_state.counter, num_loop, ""), "");
+               color_ptr = LLVMBuildGEP(builder, out_color[cbuf][chan],
+                                        &color_idx, 1, "");
+               lp_build_name(out, "color%u.%c", attrib, "rgba"[chan]);
+               LLVMBuildStore(builder, out, color_ptr);
+            }
+         }
+      }
+   }
+
+   if (key->multisample && key->min_samples > 1) {
+      LLVMBuildStore(builder, lp_build_mask_value(&mask), s_mask_ptr);
+      lp_build_for_loop_end(&sample_loop_state);
+   }
+
    if (key->multisample) {
       /* execute depth test for each sample */
       lp_build_for_loop_begin(&sample_loop_state, gallivm,
@@ -740,7 +849,8 @@ generate_fs_loop(struct gallivm_state *gallivm,
 
       /* combine the execution mask post fragment shader with the coverage mask. */
       s_mask = LLVMBuildLoad(builder, s_mask_ptr, "");
-      s_mask = LLVMBuildAnd(builder, s_mask, lp_build_mask_value(&mask), "");
+      if (key->min_samples == 1)
+         s_mask = LLVMBuildAnd(builder, s_mask, lp_build_mask_value(&mask), "");
 
       /* if the shader writes sample mask use that */
       if (shader->info.base.writes_samplemask) {
@@ -835,40 +945,18 @@ generate_fs_loop(struct gallivm_state *gallivm,
                                             z_value, s_value);
    }
 
-   if (key->multisample) {
-      /* store the sample mask for this loop */
-      LLVMBuildStore(builder, s_mask, s_mask_ptr);
-      lp_build_for_loop_end(&sample_loop_state);
-   }
-
-   /* Color write  */
-   for (attrib = 0; attrib < shader->info.base.num_outputs; ++attrib)
-   {
-      unsigned cbuf = shader->info.base.output_semantic_index[attrib];
-      if ((shader->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR) &&
-           ((cbuf < key->nr_cbufs) || (cbuf == 1 && dual_source_blend)))
-      {
-         for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
-            if(outputs[attrib][chan]) {
-               /* XXX: just initialize outputs to point at colors[] and
-                * skip this.
-                */
-               LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
-               LLVMValueRef color_ptr;
-               color_ptr = LLVMBuildGEP(builder, out_color[cbuf][chan],
-                                        &loop_state.counter, 1, "");
-               lp_build_name(out, "color%u.%c", attrib, "rgba"[chan]);
-               LLVMBuildStore(builder, out, color_ptr);
-            }
-         }
-      }
-   }
-
    if (key->occlusion_count) {
       LLVMValueRef counter = lp_jit_thread_data_counter(gallivm, thread_data_ptr);
       lp_build_name(counter, "counter");
+
       lp_build_occlusion_count(gallivm, type,
-                               lp_build_mask_value(&mask), counter);
+                               key->multisample ? s_mask : lp_build_mask_value(&mask), counter);
+   }
+
+   if (key->multisample) {
+      /* store the sample mask for this loop */
+      LLVMBuildStore(builder, s_mask, s_mask_ptr);
+      lp_build_for_loop_end(&sample_loop_state);
    }
 
    mask_val = lp_build_mask_end(&mask);
@@ -2676,7 +2764,7 @@ generate_fragment(struct llvmpipe_context *lp,
    struct lp_build_image_soa *image;
    struct lp_build_interp_soa_context interp;
    LLVMValueRef fs_mask[(16 / 4) * LP_MAX_SAMPLES];
-   LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS][16 / 4];
+   LLVMValueRef fs_out_color[LP_MAX_SAMPLES][PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS][16 / 4];
    LLVMValueRef function;
    LLVMValueRef facing;
    unsigned num_fs;
@@ -2731,8 +2819,8 @@ generate_fragment(struct llvmpipe_context *lp,
 
    blend_vec_type = lp_build_vec_type(gallivm, blend_type);
 
-   snprintf(func_name, sizeof(func_name), "fs%u_variant%u_%s",
-            shader->no, variant->no, partial_mask ? "partial" : "whole");
+   snprintf(func_name, sizeof(func_name), "fs_variant_%s",
+            partial_mask ? "partial" : "whole");
 
    arg_types[0] = variant->jit_context_ptr_type;       /* context */
    arg_types[1] = int32_type;                          /* x */
@@ -2765,6 +2853,9 @@ generate_fragment(struct llvmpipe_context *lp,
       if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
          lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
 
+   if (variant->gallivm->cache->data_size)
+      return;
+
    context_ptr  = LLVMGetParam(function, 0);
    x            = LLVMGetParam(function, 1);
    y            = LLVMGetParam(function, 2);
@@ -2825,7 +2916,7 @@ generate_fragment(struct llvmpipe_context *lp,
    }
 
    /* code generated texture sampling */
-   sampler = lp_llvm_sampler_soa_create(key->samplers);
+   sampler = lp_llvm_sampler_soa_create(key->samplers, key->nr_samplers);
    image = lp_llvm_image_soa_create(lp_fs_variant_key_images(key));
 
    num_fs = 16 / fs_type.length; /* number of loops per 4x4 stamp */
@@ -2841,7 +2932,7 @@ generate_fragment(struct llvmpipe_context *lp,
                                                       num_loop_samp, "mask_store");
 
       LLVMTypeRef flt_type = LLVMFloatTypeInContext(gallivm->context);
-      LLVMValueRef glob_sample_pos = LLVMAddGlobal(gallivm->module, flt_type, "");
+      LLVMValueRef glob_sample_pos = LLVMAddGlobal(gallivm->module, LLVMArrayType(flt_type, key->coverage_samples * 2), "");
       LLVMValueRef sample_pos_array;
 
       if (key->multisample && key->coverage_samples == 4) {
@@ -2941,7 +3032,6 @@ generate_fragment(struct llvmpipe_context *lp,
                        thread_data_ptr);
 
       for (i = 0; i < num_fs; i++) {
-         LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
          LLVMValueRef ptr;
          for (unsigned s = 0; s < key->coverage_samples; s++) {
             int idx = (i + (s * num_fs));
@@ -2951,22 +3041,26 @@ generate_fragment(struct llvmpipe_context *lp,
             fs_mask[idx] = LLVMBuildLoad(builder, ptr, "smask");
          }
 
-         /* This is fucked up need to reorganize things */
-         for (cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
-            for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
-               ptr = LLVMBuildGEP(builder,
-                                  color_store[cbuf * !cbuf0_write_all][chan],
-                                  &indexi, 1, "");
-               fs_out_color[cbuf][chan][i] = ptr;
+         for (unsigned s = 0; s < key->min_samples; s++) {
+            /* This is fucked up need to reorganize things */
+            int idx = s * num_fs + i;
+            LLVMValueRef sindexi = lp_build_const_int32(gallivm, idx);
+            for (cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
+               for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
+                  ptr = LLVMBuildGEP(builder,
+                                     color_store[cbuf * !cbuf0_write_all][chan],
+                                     &sindexi, 1, "");
+                  fs_out_color[s][cbuf][chan][i] = ptr;
+               }
             }
-         }
-         if (dual_source_blend) {
-            /* only support one dual source blend target hence always use output 1 */
-            for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
-               ptr = LLVMBuildGEP(builder,
-                                  color_store[1][chan],
-                                  &indexi, 1, "");
-               fs_out_color[1][chan][i] = ptr;
+            if (dual_source_blend) {
+               /* only support one dual source blend target hence always use output 1 */
+               for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
+                  ptr = LLVMBuildGEP(builder,
+                                     color_store[1][chan],
+                                     &sindexi, 1, "");
+                  fs_out_color[s][1][chan][i] = ptr;
+               }
             }
          }
       }
@@ -3004,6 +3098,7 @@ generate_fragment(struct llvmpipe_context *lp,
 
          for (unsigned s = 0; s < key->cbuf_nr_samples[cbuf]; s++) {
             unsigned mask_idx = num_fs * (key->multisample ? s : 0);
+            unsigned out_idx = key->min_samples == 1 ? 0 : s;
             LLVMValueRef out_ptr = color_ptr;;
 
             if (key->multisample) {
@@ -3016,7 +3111,7 @@ generate_fragment(struct llvmpipe_context *lp,
 
             generate_unswizzled_blend(gallivm, cbuf, variant,
                                       key->cbuf_format[cbuf],
-                                      num_fs, fs_type, &fs_mask[mask_idx], fs_out_color,
+                                      num_fs, fs_type, &fs_mask[mask_idx], fs_out_color[out_idx],
                                       context_ptr, out_ptr, stride,
                                       partial_mask, do_branch);
          }
@@ -3042,6 +3137,7 @@ dump_fs_variant_key(struct lp_fragment_shader_variant_key *key)
    if (key->multisample) {
       debug_printf("multisample = 1\n");
       debug_printf("coverage samples = %d\n", key->coverage_samples);
+      debug_printf("min samples = %d\n", key->min_samples);
    }
    for (i = 0; i < key->nr_cbufs; ++i) {
       debug_printf("cbuf_format[%u] = %s\n", i, util_format_name(key->cbuf_format[i]));
@@ -3157,6 +3253,27 @@ lp_debug_fs_variant(struct lp_fragment_shader_variant *variant)
    debug_printf("\n");
 }
 
+static void
+lp_fs_get_ir_cache_key(struct lp_fragment_shader_variant *variant,
+                            unsigned char ir_sha1_cache_key[20])
+{
+   struct blob blob = { 0 };
+   unsigned ir_size;
+   void *ir_binary;
+
+   blob_init(&blob);
+   nir_serialize(&blob, variant->shader->base.ir.nir, true);
+   ir_binary = blob.data;
+   ir_size = blob.size;
+
+   struct mesa_sha1 ctx;
+   _mesa_sha1_init(&ctx);
+   _mesa_sha1_update(&ctx, &variant->key, variant->shader->variant_key_size);
+   _mesa_sha1_update(&ctx, ir_binary, ir_size);
+   _mesa_sha1_final(&ctx, ir_sha1_cache_key);
+
+   blob_finish(&blob);
+}
 
 /**
  * Generate a new fragment shader variant from the shader code and
@@ -3167,11 +3284,14 @@ generate_variant(struct llvmpipe_context *lp,
                  struct lp_fragment_shader *shader,
                  const struct lp_fragment_shader_variant_key *key)
 {
+   struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
    struct lp_fragment_shader_variant *variant;
    const struct util_format_description *cbuf0_format_desc = NULL;
    boolean fullcolormask;
    char module_name[64];
-
+   unsigned char ir_sha1_cache_key[20];
+   struct lp_cached_code cached = { 0 };
+   bool needs_caching = false;
    variant = MALLOC(sizeof *variant + shader->variant_key_size - sizeof variant->key);
    if (!variant)
       return NULL;
@@ -3180,18 +3300,27 @@ generate_variant(struct llvmpipe_context *lp,
    snprintf(module_name, sizeof(module_name), "fs%u_variant%u",
             shader->no, shader->variants_created);
 
-   variant->gallivm = gallivm_create(module_name, lp->context);
+   variant->shader = shader;
+   memcpy(&variant->key, key, shader->variant_key_size);
+
+   if (shader->base.ir.nir) {
+      lp_fs_get_ir_cache_key(variant, ir_sha1_cache_key);
+
+      lp_disk_cache_find_shader(screen, &cached, ir_sha1_cache_key);
+      if (!cached.data_size)
+         needs_caching = true;
+   }
+   variant->gallivm = gallivm_create(module_name, lp->context, &cached);
    if (!variant->gallivm) {
       FREE(variant);
       return NULL;
    }
 
-   variant->shader = shader;
    variant->list_item_global.base = variant;
    variant->list_item_local.base = variant;
    variant->no = shader->variants_created++;
 
-   memcpy(&variant->key, key, shader->variant_key_size);
+
 
    /*
     * Determine whether we are touching all channels in the color buffer.
@@ -3208,6 +3337,7 @@ generate_variant(struct llvmpipe_context *lp,
          fullcolormask &&
          !key->stencil[0].enabled &&
          !key->alpha.enabled &&
+         !key->multisample &&
          !key->blend.alpha_to_coverage &&
          !key->depth.enabled &&
          !shader->info.base.uses_kill &&
@@ -3252,6 +3382,10 @@ generate_variant(struct llvmpipe_context *lp,
       variant->jit_function[RAST_WHOLE] = variant->jit_function[RAST_EDGE_TEST];
    }
 
+   if (needs_caching) {
+      lp_disk_cache_insert_shader(screen, &cached, ir_sha1_cache_key);
+   }
+
    gallivm_free_ir(variant->gallivm);
 
    return variant;
@@ -3666,8 +3800,11 @@ make_variant_key(struct llvmpipe_context *lp,
    }
 
    key->coverage_samples = 1;
-   if (key->multisample)
+   key->min_samples = 1;
+   if (key->multisample) {
       key->coverage_samples = util_framebuffer_get_num_samples(&lp->framebuffer);
+      key->min_samples = lp->min_samples == 1 ? 1 : key->coverage_samples;
+   }
    key->nr_cbufs = lp->framebuffer.nr_cbufs;
 
    if (!key->blend.independent_blend_enable) {