llvmpipe: don't allow branch to end for early Z with multisample
[mesa.git] / src / gallium / drivers / llvmpipe / lp_state_fs.c
index 101877ded3471446fc0d7b4a77b8caadae7e7b6c..8c3fc05fa0449b126240d0e124abf29d1ea30182 100644 (file)
@@ -62,7 +62,7 @@
 #include "util/u_inlines.h"
 #include "util/u_memory.h"
 #include "util/u_pointer.h"
-#include "util/u_format.h"
+#include "util/format/u_format.h"
 #include "util/u_dump.h"
 #include "util/u_string.h"
 #include "util/simple_list.h"
@@ -80,6 +80,7 @@
 #include "gallivm/lp_bld_intr.h"
 #include "gallivm/lp_bld_logic.h"
 #include "gallivm/lp_bld_tgsi.h"
+#include "gallivm/lp_bld_nir.h"
 #include "gallivm/lp_bld_swizzle.h"
 #include "gallivm/lp_bld_flow.h"
 #include "gallivm/lp_bld_debug.h"
 #include "lp_flush.h"
 #include "lp_state_fs.h"
 #include "lp_rast.h"
-
+#include "nir/nir_to_tgsi_info.h"
 
 /** Fragment shader number (for debugging) */
 static unsigned fs_no = 0;
@@ -122,7 +123,8 @@ static LLVMValueRef
 generate_quad_mask(struct gallivm_state *gallivm,
                    struct lp_type fs_type,
                    unsigned first_quad,
-                   LLVMValueRef mask_input) /* int32 */
+                   unsigned sample,
+                   LLVMValueRef mask_input) /* int64 */
 {
    LLVMBuilderRef builder = gallivm->builder;
    struct lp_type mask_type;
@@ -161,6 +163,11 @@ generate_quad_mask(struct gallivm_state *gallivm,
       shift = 0;
    }
 
+   mask_input = LLVMBuildLShr(builder, mask_input, lp_build_const_int64(gallivm, 16 * sample), "");
+   mask_input = LLVMBuildTrunc(builder, mask_input,
+                               i32t, "");
+   mask_input = LLVMBuildAnd(builder, mask_input, lp_build_const_int32(gallivm, 0xffff), "");
+
    mask_input = LLVMBuildLShr(builder,
                               mask_input,
                               LLVMConstInt(i32t, shift, 0),
@@ -286,6 +293,32 @@ lp_build_depth_clamp(struct gallivm_state *gallivm,
    return lp_build_clamp(&f32_bld, z, min_depth, max_depth);
 }
 
+static void
+lp_build_sample_alpha_to_coverage(struct gallivm_state *gallivm,
+                                  struct lp_type type,
+                                  unsigned coverage_samples,
+                                  LLVMValueRef num_loop,
+                                  LLVMValueRef loop_counter,
+                                  LLVMValueRef coverage_mask_store,
+                                  LLVMValueRef alpha)
+{
+   struct lp_build_context bld;
+   LLVMBuilderRef builder = gallivm->builder;
+   float step = 1.0 / coverage_samples;
+
+   lp_build_context_init(&bld, gallivm, type);
+   for (unsigned s = 0; s < coverage_samples; s++) {
+      LLVMValueRef alpha_ref_value = lp_build_const_vec(gallivm, type, step * s);
+      LLVMValueRef test = lp_build_cmp(&bld, PIPE_FUNC_GREATER, alpha, alpha_ref_value);
+
+      LLVMValueRef s_mask_idx = LLVMBuildMul(builder, lp_build_const_int32(gallivm, s), num_loop, "");
+      s_mask_idx = LLVMBuildAdd(builder, s_mask_idx, loop_counter, "");
+      LLVMValueRef s_mask_ptr = LLVMBuildGEP(builder, coverage_mask_store, &s_mask_idx, 1, "");
+      LLVMValueRef s_mask = LLVMBuildLoad(builder, s_mask_ptr, "");
+      s_mask = LLVMBuildAnd(builder, s_mask, test, "");
+      LLVMBuildStore(builder, s_mask, s_mask_ptr);
+   }
+}
 
 /**
  * Generate the fragment shader, depth/stencil test, and alpha tests.
@@ -297,13 +330,16 @@ generate_fs_loop(struct gallivm_state *gallivm,
                  LLVMBuilderRef builder,
                  struct lp_type type,
                  LLVMValueRef context_ptr,
+                 LLVMValueRef sample_pos_array,
                  LLVMValueRef num_loop,
                  struct lp_build_interp_soa_context *interp,
                  const struct lp_build_sampler_soa *sampler,
+                 const struct lp_build_image_soa *image,
                  LLVMValueRef mask_store,
                  LLVMValueRef (*out_color)[4],
-                 LLVMValueRef depth_ptr,
+                 LLVMValueRef depth_base_ptr,
                  LLVMValueRef depth_stride,
+                 LLVMValueRef depth_sample_stride,
                  LLVMValueRef facing,
                  LLVMValueRef thread_data_ptr)
 {
@@ -311,15 +347,17 @@ generate_fs_loop(struct gallivm_state *gallivm,
    const struct tgsi_token *tokens = shader->base.tokens;
    struct lp_type int_type = lp_int_type(type);
    LLVMTypeRef vec_type, int_vec_type;
-   LLVMValueRef mask_ptr, mask_val;
+   LLVMValueRef mask_ptr = NULL, mask_val = NULL;
    LLVMValueRef consts_ptr, num_consts_ptr;
    LLVMValueRef ssbo_ptr, num_ssbo_ptr;
    LLVMValueRef z;
    LLVMValueRef z_value, s_value;
    LLVMValueRef z_fb, s_fb;
+   LLVMValueRef depth_ptr;
    LLVMValueRef stencil_refs[2];
    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
-   struct lp_build_for_loop_state loop_state;
+   LLVMValueRef zs_samples = lp_build_const_int32(gallivm, key->zsbuf_nr_samples);
+   struct lp_build_for_loop_state loop_state, sample_loop_state;
    struct lp_build_mask_context mask;
    /*
     * TODO: figure out if simple_shader optimization is really worthwile to
@@ -340,14 +378,22 @@ generate_fs_loop(struct gallivm_state *gallivm,
 
    memset(&system_values, 0, sizeof(system_values));
 
+   /* truncate then sign extend. */
+   system_values.front_facing = LLVMBuildTrunc(gallivm->builder, facing, LLVMInt1TypeInContext(gallivm->context), "");
+   system_values.front_facing = LLVMBuildSExt(gallivm->builder, system_values.front_facing, LLVMInt32TypeInContext(gallivm->context), "");
+
    if (key->depth.enabled ||
        key->stencil[0].enabled) {
 
       zs_format_desc = util_format_description(key->zsbuf_format);
       assert(zs_format_desc);
 
-      if (!shader->info.base.writes_z && !shader->info.base.writes_stencil) {
-         if (key->alpha.enabled ||
+      if (shader->info.base.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL])
+         depth_mode = EARLY_DEPTH_TEST | EARLY_DEPTH_WRITE;
+      else if (!shader->info.base.writes_z && !shader->info.base.writes_stencil) {
+         if (shader->info.base.writes_memory)
+            depth_mode = LATE_DEPTH_TEST | LATE_DEPTH_WRITE;
+         else if (key->alpha.enabled ||
              key->blend.alpha_to_coverage ||
              shader->info.base.uses_kill ||
              shader->info.base.writes_samplemask) {
@@ -396,16 +442,6 @@ generate_fs_loop(struct gallivm_state *gallivm,
    ssbo_ptr = lp_jit_context_ssbos(gallivm, context_ptr);
    num_ssbo_ptr = lp_jit_context_num_ssbos(gallivm, context_ptr);
 
-   lp_build_for_loop_begin(&loop_state, gallivm,
-                           lp_build_const_int32(gallivm, 0),
-                           LLVMIntULT,
-                           num_loop,
-                           lp_build_const_int32(gallivm, 1));
-
-   mask_ptr = LLVMBuildGEP(builder, mask_store,
-                           &loop_state.counter, 1, "mask_ptr");
-   mask_val = LLVMBuildLoad(builder, mask_ptr, "");
-
    memset(outputs, 0, sizeof outputs);
 
    for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
@@ -426,6 +462,28 @@ generate_fs_loop(struct gallivm_state *gallivm,
       }
    }
 
+   lp_build_for_loop_begin(&loop_state, gallivm,
+                           lp_build_const_int32(gallivm, 0),
+                           LLVMIntULT,
+                           num_loop,
+                           lp_build_const_int32(gallivm, 1));
+
+   if (key->multisample) {
+      /* create shader execution mask by combining all sample masks. */
+      for (unsigned s = 0; s < key->coverage_samples; s++) {
+         LLVMValueRef s_mask_idx = LLVMBuildMul(builder, num_loop, lp_build_const_int32(gallivm, s), "");
+         s_mask_idx = LLVMBuildAdd(builder, s_mask_idx, loop_state.counter, "");
+         LLVMValueRef s_mask = lp_build_pointer_get(builder, mask_store, s_mask_idx);
+         if (s == 0)
+            mask_val = s_mask;
+         else
+            mask_val = LLVMBuildOr(builder, s_mask, mask_val, "");
+      }
+   } else {
+      mask_ptr = LLVMBuildGEP(builder, mask_store,
+                              &loop_state.counter, 1, "mask_ptr");
+      mask_val = LLVMBuildLoad(builder, mask_ptr, "");
+   }
 
    /* 'mask' will control execution based on quad's pixel alive/killed state */
    lp_build_mask_begin(&mask, gallivm, type, mask_val);
@@ -433,9 +491,62 @@ generate_fs_loop(struct gallivm_state *gallivm,
    if (!(depth_mode & EARLY_DEPTH_TEST) && !simple_shader)
       lp_build_mask_check(&mask);
 
-   lp_build_interp_soa_update_pos_dyn(interp, gallivm, loop_state.counter);
+   /* Create storage for recombining sample masks after early Z pass. */
+   LLVMValueRef s_mask_or = lp_build_alloca(gallivm, lp_build_int_vec_type(gallivm, type), "cov_mask_early_depth");
+   LLVMBuildStore(builder, LLVMConstNull(lp_build_int_vec_type(gallivm, type)), s_mask_or);
+
+   LLVMValueRef s_mask = NULL, s_mask_ptr = NULL;
+   LLVMValueRef z_sample_value_store = NULL, s_sample_value_store = NULL;
+   LLVMValueRef z_fb_store = NULL, s_fb_store = NULL;
+   LLVMTypeRef z_type = NULL, z_fb_type = NULL;
+
+   /* Run early depth once per sample */
+   if (key->multisample) {
+
+      if (zs_format_desc) {
+         struct lp_type zs_type = lp_depth_type(zs_format_desc, type.length);
+         struct lp_type z_type = zs_type;
+         struct lp_type s_type = zs_type;
+         if (zs_format_desc->block.bits < type.width)
+            z_type.width = type.width;
+         else if (zs_format_desc->block.bits > 32) {
+            z_type.width = z_type.width / 2;
+            s_type.width = s_type.width / 2;
+            s_type.floating = 0;
+         }
+         z_sample_value_store = lp_build_array_alloca(gallivm, lp_build_int_vec_type(gallivm, type),
+                                                      zs_samples, "z_sample_store");
+         s_sample_value_store = lp_build_array_alloca(gallivm, lp_build_int_vec_type(gallivm, type),
+                                                      zs_samples, "s_sample_store");
+         z_fb_store = lp_build_array_alloca(gallivm, lp_build_vec_type(gallivm, z_type),
+                                            zs_samples, "z_fb_store");
+         s_fb_store = lp_build_array_alloca(gallivm, lp_build_vec_type(gallivm, s_type),
+                                            zs_samples, "s_fb_store");
+      }
+      lp_build_for_loop_begin(&sample_loop_state, gallivm,
+                              lp_build_const_int32(gallivm, 0),
+                              LLVMIntULT, lp_build_const_int32(gallivm, key->coverage_samples),
+                              lp_build_const_int32(gallivm, 1));
+
+      LLVMValueRef s_mask_idx = LLVMBuildMul(builder, sample_loop_state.counter, num_loop, "");
+      s_mask_idx = LLVMBuildAdd(builder, s_mask_idx, loop_state.counter, "");
+      s_mask_ptr = LLVMBuildGEP(builder, mask_store, &s_mask_idx, 1, "");
+
+      s_mask = LLVMBuildLoad(builder, s_mask_ptr, "");
+      s_mask = LLVMBuildAnd(builder, s_mask, mask_val, "");
+   }
+
+
+   /* for multisample Z needs to be interpolated at sample points for testing. */
+   lp_build_interp_soa_update_pos_dyn(interp, gallivm, loop_state.counter, key->multisample ? sample_loop_state.counter : NULL);
    z = interp->pos[2];
 
+   depth_ptr = depth_base_ptr;
+   if (key->multisample) {
+      LLVMValueRef sample_offset = LLVMBuildMul(builder, sample_loop_state.counter, depth_sample_stride, "");
+      depth_ptr = LLVMBuildGEP(builder, depth_ptr, &sample_offset, 1, "");
+   }
+
    if (depth_mode & EARLY_DEPTH_TEST) {
       /*
        * Clamp according to ARB_depth_clamp semantics.
@@ -453,12 +564,13 @@ generate_fs_loop(struct gallivm_state *gallivm,
                                   key->stencil,
                                   type,
                                   zs_format_desc,
-                                  &mask,
+                                  key->multisample ? NULL : &mask,
+                                  &s_mask,
                                   stencil_refs,
                                   z, z_fb, s_fb,
                                   facing,
                                   &z_value, &s_value,
-                                  !simple_shader);
+                                  !simple_shader && !key->multisample);
 
       if (depth_mode & EARLY_DEPTH_WRITE) {
          lp_build_depth_stencil_write_swizzled(gallivm, type,
@@ -472,11 +584,48 @@ generate_fs_loop(struct gallivm_state *gallivm,
        * stencil test otherwise new stencil values may not get written if all
        * fragments got killed by depth/stencil test.
        */
-      if (!simple_shader && key->stencil[0].enabled)
+      if (!simple_shader && key->stencil[0].enabled && !key->multisample)
          lp_build_mask_check(&mask);
+
+      if (key->multisample) {
+         z_fb_type = LLVMTypeOf(z_fb);
+         z_type = LLVMTypeOf(z_value);
+         lp_build_pointer_set(builder, z_sample_value_store, sample_loop_state.counter, LLVMBuildBitCast(builder, z_value, lp_build_int_vec_type(gallivm, type), ""));
+         lp_build_pointer_set(builder, s_sample_value_store, sample_loop_state.counter, LLVMBuildBitCast(builder, s_value, lp_build_int_vec_type(gallivm, type), ""));
+         lp_build_pointer_set(builder, z_fb_store, sample_loop_state.counter, z_fb);
+         lp_build_pointer_set(builder, s_fb_store, sample_loop_state.counter, s_fb);
+      }
    }
 
-   lp_build_interp_soa_update_inputs_dyn(interp, gallivm, loop_state.counter);
+   if (key->multisample) {
+      /*
+       * Store the post-early Z coverage mask.
+       * Recombine the resulting coverage masks post early Z into the fragment
+       * shader execution mask.
+       */
+      LLVMValueRef tmp_s_mask_or = LLVMBuildLoad(builder, s_mask_or, "");
+      tmp_s_mask_or = LLVMBuildOr(builder, tmp_s_mask_or, s_mask, "");
+      LLVMBuildStore(builder, tmp_s_mask_or, s_mask_or);
+
+      LLVMBuildStore(builder, s_mask, s_mask_ptr);
+
+      lp_build_for_loop_end(&sample_loop_state);
+
+      /* recombined all the coverage masks in the shader exec mask. */
+      tmp_s_mask_or = LLVMBuildLoad(builder, s_mask_or, "");
+      lp_build_mask_update(&mask, tmp_s_mask_or);
+
+      /* for multisample Z needs to be re interpolated at pixel center */
+      lp_build_interp_soa_update_pos_dyn(interp, gallivm, loop_state.counter, NULL);
+   }
+
+   LLVMValueRef out_sample_mask_storage = NULL;
+   if (shader->info.base.writes_samplemask) {
+      out_sample_mask_storage = lp_build_alloca(gallivm, int_vec_type, "write_mask");
+   }
+   system_values.sample_pos = sample_pos_array;
+
+   lp_build_interp_soa_update_inputs_dyn(interp, gallivm, loop_state.counter, NULL, NULL);
 
    struct lp_build_tgsi_params params;
    memset(&params, 0, sizeof(params));
@@ -493,10 +642,15 @@ generate_fs_loop(struct gallivm_state *gallivm,
    params.info = &shader->info.base;
    params.ssbo_ptr = ssbo_ptr;
    params.ssbo_sizes_ptr = num_ssbo_ptr;
+   params.image = image;
 
    /* Build the actual shader */
-   lp_build_tgsi_soa(gallivm, tokens, &params,
-                     outputs);
+   if (shader->base.type == PIPE_SHADER_IR_TGSI)
+      lp_build_tgsi_soa(gallivm, tokens, &params,
+                        outputs);
+   else
+      lp_build_nir_soa(gallivm, shader->base.ir.nir, &params,
+                       outputs);
 
    /* Alpha test */
    if (key->alpha.enabled) {
@@ -529,29 +683,82 @@ generate_fs_loop(struct gallivm_state *gallivm,
       if (color0 != -1 && outputs[color0][3]) {
          LLVMValueRef alpha = LLVMBuildLoad(builder, outputs[color0][3], "alpha");
 
-         lp_build_alpha_to_coverage(gallivm, type,
-                                    &mask, alpha,
-                                    (depth_mode & LATE_DEPTH_TEST) != 0);
+         if (!key->multisample) {
+            lp_build_alpha_to_coverage(gallivm, type,
+                                       &mask, alpha,
+                                       (depth_mode & LATE_DEPTH_TEST) != 0);
+         } else {
+            lp_build_sample_alpha_to_coverage(gallivm, type, key->coverage_samples, num_loop,
+                                              loop_state.counter,
+                                              mask_store, alpha);
+         }
+      }
+   }
+   if (key->blend.alpha_to_one && key->multisample) {
+      for (attrib = 0; attrib < shader->info.base.num_outputs; ++attrib) {
+         unsigned cbuf = shader->info.base.output_semantic_index[attrib];
+         if ((shader->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR) &&
+             ((cbuf < key->nr_cbufs) || (cbuf == 1 && dual_source_blend)))
+            if (outputs[cbuf][3]) {
+               LLVMBuildStore(builder, lp_build_const_vec(gallivm, type, 1.0), outputs[cbuf][3]);
+            }
       }
    }
-
    if (shader->info.base.writes_samplemask) {
+      LLVMValueRef output_smask = NULL;
       int smaski = find_output_by_semantic(&shader->info.base,
                                            TGSI_SEMANTIC_SAMPLEMASK,
                                            0);
-      LLVMValueRef smask;
       struct lp_build_context smask_bld;
       lp_build_context_init(&smask_bld, gallivm, int_type);
 
       assert(smaski >= 0);
-      smask = LLVMBuildLoad(builder, outputs[smaski][0], "smask");
+      output_smask = LLVMBuildLoad(builder, outputs[smaski][0], "smask");
       /*
        * Pixel is alive according to the first sample in the mask.
        */
-      smask = LLVMBuildBitCast(builder, smask, smask_bld.vec_type, "");
-      smask = lp_build_and(&smask_bld, smask, smask_bld.one);
-      smask = lp_build_cmp(&smask_bld, PIPE_FUNC_NOTEQUAL, smask, smask_bld.zero);
-      lp_build_mask_update(&mask, smask);
+      output_smask = LLVMBuildBitCast(builder, output_smask, smask_bld.vec_type, "");
+      if (!key->multisample) {
+         output_smask = lp_build_and(&smask_bld, output_smask, smask_bld.one);
+         output_smask = lp_build_cmp(&smask_bld, PIPE_FUNC_NOTEQUAL, output_smask, smask_bld.zero);
+         lp_build_mask_update(&mask, output_smask);
+      }
+      LLVMBuildStore(builder, output_smask, out_sample_mask_storage);
+   }
+
+   if (key->multisample) {
+      /* execute depth test for each sample */
+      lp_build_for_loop_begin(&sample_loop_state, gallivm,
+                              lp_build_const_int32(gallivm, 0),
+                              LLVMIntULT, lp_build_const_int32(gallivm, key->coverage_samples),
+                              lp_build_const_int32(gallivm, 1));
+
+      /* load the per-sample coverage mask */
+      LLVMValueRef s_mask_idx = LLVMBuildMul(builder, sample_loop_state.counter, num_loop, "");
+      s_mask_idx = LLVMBuildAdd(builder, s_mask_idx, loop_state.counter, "");
+      s_mask_ptr = LLVMBuildGEP(builder, mask_store, &s_mask_idx, 1, "");
+
+      /* combine the execution mask post fragment shader with the coverage mask. */
+      s_mask = LLVMBuildLoad(builder, s_mask_ptr, "");
+      s_mask = LLVMBuildAnd(builder, s_mask, lp_build_mask_value(&mask), "");
+
+      /* if the shader writes sample mask use that */
+      if (shader->info.base.writes_samplemask) {
+         LLVMValueRef out_smask_idx = LLVMBuildShl(builder, lp_build_const_int32(gallivm, 1), sample_loop_state.counter, "");
+         out_smask_idx = lp_build_broadcast(gallivm, int_vec_type, out_smask_idx);
+         LLVMValueRef output_smask = LLVMBuildLoad(builder, out_sample_mask_storage, "");
+         LLVMValueRef smask_bit = LLVMBuildAnd(builder, output_smask, out_smask_idx, "");
+         LLVMValueRef cmp = LLVMBuildICmp(builder, LLVMIntNE, smask_bit, lp_build_const_int_vec(gallivm, int_type, 0), "");
+         smask_bit = LLVMBuildSExt(builder, cmp, int_vec_type, "");
+
+         s_mask = LLVMBuildAnd(builder, s_mask, smask_bit, "");
+      }
+   }
+
+   depth_ptr = depth_base_ptr;
+   if (key->multisample) {
+      LLVMValueRef sample_offset = LLVMBuildMul(builder, sample_loop_state.counter, depth_sample_stride, "");
+      depth_ptr = LLVMBuildGEP(builder, depth_ptr, &sample_offset, 1, "");
    }
 
    /* Late Z test */
@@ -592,7 +799,8 @@ generate_fs_loop(struct gallivm_state *gallivm,
                                   key->stencil,
                                   type,
                                   zs_format_desc,
-                                  &mask,
+                                  key->multisample ? NULL : &mask,
+                                  &s_mask,
                                   stencil_refs,
                                   z, z_fb, s_fb,
                                   facing,
@@ -614,13 +822,24 @@ generate_fs_loop(struct gallivm_state *gallivm,
        * depth value, update from zs_value with the new mask value and
        * write that out.
        */
+      if (key->multisample) {
+         z_value = LLVMBuildBitCast(builder, lp_build_pointer_get(builder, z_sample_value_store, sample_loop_state.counter), z_type, "");;
+         s_value = lp_build_pointer_get(builder, s_sample_value_store, sample_loop_state.counter);
+         z_fb = LLVMBuildBitCast(builder, lp_build_pointer_get(builder, z_fb_store, sample_loop_state.counter), z_fb_type, "");
+         s_fb = lp_build_pointer_get(builder, s_fb_store, sample_loop_state.counter);
+      }
       lp_build_depth_stencil_write_swizzled(gallivm, type,
                                             zs_format_desc, key->resource_1d,
-                                            &mask, z_fb, s_fb, loop_state.counter,
+                                            key->multisample ? s_mask : lp_build_mask_value(&mask), z_fb, s_fb, loop_state.counter,
                                             depth_ptr, depth_stride,
                                             z_value, s_value);
    }
 
+   if (key->multisample) {
+      /* store the sample mask for this loop */
+      LLVMBuildStore(builder, s_mask, s_mask_ptr);
+      lp_build_for_loop_end(&sample_loop_state);
+   }
 
    /* Color write  */
    for (attrib = 0; attrib < shader->info.base.num_outputs; ++attrib)
@@ -653,7 +872,8 @@ generate_fs_loop(struct gallivm_state *gallivm,
    }
 
    mask_val = lp_build_mask_end(&mask);
-   LLVMBuildStore(builder, mask_val, mask_ptr);
+   if (!key->multisample)
+      LLVMBuildStore(builder, mask_val, mask_ptr);
    lp_build_for_loop_end(&loop_state);
 }
 
@@ -1393,7 +1613,7 @@ convert_to_blend_type(struct gallivm_state *gallivm,
       for (j = 0; j < src_fmt->nr_channels; ++j) {
          unsigned mask = 0;
          unsigned sa = src_fmt->channel[j].shift;
-#ifdef PIPE_ARCH_LITTLE_ENDIAN
+#if UTIL_ARCH_LITTLE_ENDIAN
          unsigned from_lsb = j;
 #else
          unsigned from_lsb = src_fmt->nr_channels - j - 1;
@@ -1575,7 +1795,8 @@ convert_from_blend_type(struct gallivm_state *gallivm,
       for (j = 0; j < src_fmt->nr_channels; ++j) {
          unsigned mask = 0;
          unsigned sa = src_fmt->channel[j].shift;
-#ifdef PIPE_ARCH_LITTLE_ENDIAN
+         unsigned sz_a = src_fmt->channel[j].size;
+#if UTIL_ARCH_LITTLE_ENDIAN
          unsigned from_lsb = j;
 #else
          unsigned from_lsb = src_fmt->nr_channels - j - 1;
@@ -1603,6 +1824,10 @@ convert_from_blend_type(struct gallivm_state *gallivm,
          if (src_type.norm) {
             chans[j] = scale_bits(gallivm, blend_type.width,
                                   src_fmt->channel[j].size, chans[j], src_type);
+         } else if (!src_type.floating && sz_a < blend_type.width) {
+            LLVMValueRef mask_val = lp_build_const_int_vec(gallivm, src_type, (1UL << sz_a) - 1);
+            LLVMValueRef mask = LLVMBuildICmp(builder, LLVMIntUGT, chans[j], mask_val, "");
+            chans[j] = LLVMBuildSelect(builder, mask, mask_val, chans[j], "");
          }
 
          /* Insert bits */
@@ -2420,14 +2645,14 @@ generate_fragment(struct llvmpipe_context *lp,
                   unsigned partial_mask)
 {
    struct gallivm_state *gallivm = variant->gallivm;
-   const struct lp_fragment_shader_variant_key *key = &variant->key;
+   struct lp_fragment_shader_variant_key *key = &variant->key;
    struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS];
    char func_name[64];
    struct lp_type fs_type;
    struct lp_type blend_type;
    LLVMTypeRef fs_elem_type;
    LLVMTypeRef blend_vec_type;
-   LLVMTypeRef arg_types[13];
+   LLVMTypeRef arg_types[15];
    LLVMTypeRef func_type;
    LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
    LLVMTypeRef int8_type = LLVMInt8TypeInContext(gallivm->context);
@@ -2439,15 +2664,18 @@ generate_fragment(struct llvmpipe_context *lp,
    LLVMValueRef dady_ptr;
    LLVMValueRef color_ptr_ptr;
    LLVMValueRef stride_ptr;
+   LLVMValueRef color_sample_stride_ptr;
    LLVMValueRef depth_ptr;
    LLVMValueRef depth_stride;
+   LLVMValueRef depth_sample_stride;
    LLVMValueRef mask_input;
    LLVMValueRef thread_data_ptr;
    LLVMBasicBlockRef block;
    LLVMBuilderRef builder;
    struct lp_build_sampler_soa *sampler;
+   struct lp_build_image_soa *image;
    struct lp_build_interp_soa_context interp;
-   LLVMValueRef fs_mask[16 / 4];
+   LLVMValueRef fs_mask[(16 / 4) * LP_MAX_SAMPLES];
    LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS][16 / 4];
    LLVMValueRef function;
    LLVMValueRef facing;
@@ -2513,12 +2741,14 @@ generate_fragment(struct llvmpipe_context *lp,
    arg_types[4] = LLVMPointerType(fs_elem_type, 0);    /* a0 */
    arg_types[5] = LLVMPointerType(fs_elem_type, 0);    /* dadx */
    arg_types[6] = LLVMPointerType(fs_elem_type, 0);    /* dady */
-   arg_types[7] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0);  /* color */
+   arg_types[7] = LLVMPointerType(LLVMPointerType(int8_type, 0), 0);  /* color */
    arg_types[8] = LLVMPointerType(int8_type, 0);       /* depth */
-   arg_types[9] = int32_type;                          /* mask_input */
+   arg_types[9] = LLVMInt64TypeInContext(gallivm->context);  /* mask_input */
    arg_types[10] = variant->jit_thread_data_ptr_type;  /* per thread data */
    arg_types[11] = LLVMPointerType(int32_type, 0);     /* stride */
    arg_types[12] = int32_type;                         /* depth_stride */
+   arg_types[13] = LLVMPointerType(int32_type, 0);     /* color sample strides */
+   arg_types[14] = int32_type;                         /* depth sample stride */
 
    func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context),
                                 arg_types, ARRAY_SIZE(arg_types), 0);
@@ -2548,6 +2778,8 @@ generate_fragment(struct llvmpipe_context *lp,
    thread_data_ptr  = LLVMGetParam(function, 10);
    stride_ptr   = LLVMGetParam(function, 11);
    depth_stride = LLVMGetParam(function, 12);
+   color_sample_stride_ptr = LLVMGetParam(function, 13);
+   depth_sample_stride = LLVMGetParam(function, 14);
 
    lp_build_name(context_ptr, "context");
    lp_build_name(x, "x");
@@ -2561,6 +2793,8 @@ generate_fragment(struct llvmpipe_context *lp,
    lp_build_name(thread_data_ptr, "thread_data");
    lp_build_name(stride_ptr, "stride_ptr");
    lp_build_name(depth_stride, "depth_stride");
+   lp_build_name(color_sample_stride_ptr, "color_sample_stride_ptr");
+   lp_build_name(depth_sample_stride, "depth_sample_stride");
 
    /*
     * Function body
@@ -2591,7 +2825,8 @@ generate_fragment(struct llvmpipe_context *lp,
    }
 
    /* code generated texture sampling */
-   sampler = lp_llvm_sampler_soa_create(key->state);
+   sampler = lp_llvm_sampler_soa_create(key->samplers);
+   image = lp_llvm_image_soa_create(lp_fs_variant_key_images(key));
 
    num_fs = 16 / fs_type.length; /* number of loops per 4x4 stamp */
    /* for 1d resources only run "upper half" of stamp */
@@ -2601,8 +2836,29 @@ generate_fragment(struct llvmpipe_context *lp,
    {
       LLVMValueRef num_loop = lp_build_const_int32(gallivm, num_fs);
       LLVMTypeRef mask_type = lp_build_int_vec_type(gallivm, fs_type);
+      LLVMValueRef num_loop_samp = lp_build_const_int32(gallivm, num_fs * key->coverage_samples);
       LLVMValueRef mask_store = lp_build_array_alloca(gallivm, mask_type,
-                                                      num_loop, "mask_store");
+                                                      num_loop_samp, "mask_store");
+
+      LLVMTypeRef flt_type = LLVMFloatTypeInContext(gallivm->context);
+      LLVMValueRef glob_sample_pos = LLVMAddGlobal(gallivm->module, flt_type, "");
+      LLVMValueRef sample_pos_array;
+
+      if (key->multisample && key->coverage_samples == 4) {
+         LLVMValueRef sample_pos_arr[8];
+         for (unsigned i = 0; i < 4; i++) {
+            sample_pos_arr[i * 2] = LLVMConstReal(flt_type, lp_sample_pos_4x[i][0]);
+            sample_pos_arr[i * 2 + 1] = LLVMConstReal(flt_type, lp_sample_pos_4x[i][1]);
+         }
+         sample_pos_array = LLVMConstArray(LLVMFloatTypeInContext(gallivm->context), sample_pos_arr, 8);
+      } else {
+         LLVMValueRef sample_pos_arr[2];
+         sample_pos_arr[0] = LLVMConstReal(flt_type, 0.5);
+         sample_pos_arr[1] = LLVMConstReal(flt_type, 0.5);
+         sample_pos_array = LLVMConstArray(LLVMFloatTypeInContext(gallivm->context), sample_pos_arr, 2);
+      }
+      LLVMSetInitializer(glob_sample_pos, sample_pos_array);
+
       LLVMValueRef color_store[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS];
       boolean pixel_center_integer =
          shader->info.base.properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER];
@@ -2617,25 +2873,53 @@ generate_fragment(struct llvmpipe_context *lp,
                                shader->info.base.num_inputs,
                                inputs,
                                pixel_center_integer,
+                               key->coverage_samples, glob_sample_pos,
+                               num_loop,
                                key->depth_clamp,
                                builder, fs_type,
                                a0_ptr, dadx_ptr, dady_ptr,
                                x, y);
 
       for (i = 0; i < num_fs; i++) {
-         LLVMValueRef mask;
-         LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
-         LLVMValueRef mask_ptr = LLVMBuildGEP(builder, mask_store,
-                                              &indexi, 1, "mask_ptr");
+         if (key->multisample) {
+            LLVMValueRef smask_val = LLVMBuildLoad(builder, lp_jit_context_sample_mask(gallivm, context_ptr), "");
 
-         if (partial_mask) {
-            mask = generate_quad_mask(gallivm, fs_type,
-                                      i*fs_type.length/4, mask_input);
-         }
-         else {
-            mask = lp_build_const_int_vec(gallivm, fs_type, ~0);
+            /*
+             * For multisampling, extract the per-sample mask from the incoming 64-bit mask,
+             * store to the per sample mask storage. Or all of them together to generate
+             * the fragment shader mask. (sample shading TODO).
+             * Take the incoming state coverage mask into account.
+             */
+            for (unsigned s = 0; s < key->coverage_samples; s++) {
+               LLVMValueRef sindexi = lp_build_const_int32(gallivm, i + (s * num_fs));
+               LLVMValueRef sample_mask_ptr = LLVMBuildGEP(builder, mask_store,
+                                                           &sindexi, 1, "sample_mask_ptr");
+               LLVMValueRef s_mask = generate_quad_mask(gallivm, fs_type,
+                                                        i*fs_type.length/4, s, mask_input);
+
+               LLVMValueRef smask_bit = LLVMBuildAnd(builder, smask_val, lp_build_const_int32(gallivm, (1 << s)), "");
+               LLVMValueRef cmp = LLVMBuildICmp(builder, LLVMIntNE, smask_bit, lp_build_const_int32(gallivm, 0), "");
+               smask_bit = LLVMBuildSExt(builder, cmp, int32_type, "");
+               smask_bit = lp_build_broadcast(gallivm, mask_type, smask_bit);
+
+               s_mask = LLVMBuildAnd(builder, s_mask, smask_bit, "");
+               LLVMBuildStore(builder, s_mask, sample_mask_ptr);
+            }
+         } else {
+            LLVMValueRef mask;
+            LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
+            LLVMValueRef mask_ptr = LLVMBuildGEP(builder, mask_store,
+                                                 &indexi, 1, "mask_ptr");
+
+            if (partial_mask) {
+               mask = generate_quad_mask(gallivm, fs_type,
+                                         i*fs_type.length/4, 0, mask_input);
+            }
+            else {
+               mask = lp_build_const_int_vec(gallivm, fs_type, ~0);
+            }
+            LLVMBuildStore(builder, mask, mask_ptr);
          }
-         LLVMBuildStore(builder, mask, mask_ptr);
       }
 
       generate_fs_loop(gallivm,
@@ -2643,21 +2927,30 @@ generate_fragment(struct llvmpipe_context *lp,
                        builder,
                        fs_type,
                        context_ptr,
+                       glob_sample_pos,
                        num_loop,
                        &interp,
                        sampler,
+                       image,
                        mask_store, /* output */
                        color_store,
                        depth_ptr,
                        depth_stride,
+                       depth_sample_stride,
                        facing,
                        thread_data_ptr);
 
       for (i = 0; i < num_fs; i++) {
          LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
-         LLVMValueRef ptr = LLVMBuildGEP(builder, mask_store,
-                                         &indexi, 1, "");
-         fs_mask[i] = LLVMBuildLoad(builder, ptr, "mask");
+         LLVMValueRef ptr;
+         for (unsigned s = 0; s < key->coverage_samples; s++) {
+            int idx = (i + (s * num_fs));
+            LLVMValueRef sindexi = lp_build_const_int32(gallivm, idx);
+            ptr = LLVMBuildGEP(builder, mask_store, &sindexi, 1, "");
+
+            fs_mask[idx] = LLVMBuildLoad(builder, ptr, "smask");
+         }
+
          /* This is fucked up need to reorganize things */
          for (cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
             for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
@@ -2680,13 +2973,14 @@ generate_fragment(struct llvmpipe_context *lp,
    }
 
    sampler->destroy(sampler);
-
+   image->destroy(image);
    /* Loop over color outputs / color buffers to do blending.
     */
    for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
       if (key->cbuf_format[cbuf] != PIPE_FORMAT_NONE) {
          LLVMValueRef color_ptr;
          LLVMValueRef stride;
+         LLVMValueRef sample_stride = NULL;
          LLVMValueRef index = lp_build_const_int32(gallivm, cbuf);
 
          boolean do_branch = ((key->depth.enabled
@@ -2699,17 +2993,33 @@ generate_fragment(struct llvmpipe_context *lp,
                                                 &index, 1, ""),
                                    "");
 
-         lp_build_name(color_ptr, "color_ptr%d", cbuf);
-
          stride = LLVMBuildLoad(builder,
                                 LLVMBuildGEP(builder, stride_ptr, &index, 1, ""),
                                 "");
 
-         generate_unswizzled_blend(gallivm, cbuf, variant,
-                                   key->cbuf_format[cbuf],
-                                   num_fs, fs_type, fs_mask, fs_out_color,
-                                   context_ptr, color_ptr, stride,
-                                   partial_mask, do_branch);
+         if (key->multisample)
+            sample_stride = LLVMBuildLoad(builder,
+                                          LLVMBuildGEP(builder, color_sample_stride_ptr,
+                                                       &index, 1, ""), "");
+
+         for (unsigned s = 0; s < key->cbuf_nr_samples[cbuf]; s++) {
+            unsigned mask_idx = num_fs * (key->multisample ? s : 0);
+            LLVMValueRef out_ptr = color_ptr;;
+
+            if (key->multisample) {
+               LLVMValueRef sample_offset = LLVMBuildMul(builder, sample_stride, lp_build_const_int32(gallivm, s), "");
+               out_ptr = LLVMBuildGEP(builder, out_ptr, &sample_offset, 1, "");
+            }
+            out_ptr = LLVMBuildBitCast(builder, out_ptr, LLVMPointerType(blend_vec_type, 0), "");
+
+            lp_build_name(out_ptr, "color_ptr%d", cbuf);
+
+            generate_unswizzled_blend(gallivm, cbuf, variant,
+                                      key->cbuf_format[cbuf],
+                                      num_fs, fs_type, &fs_mask[mask_idx], fs_out_color,
+                                      context_ptr, out_ptr, stride,
+                                      partial_mask, do_branch);
+         }
       }
    }
 
@@ -2720,7 +3030,7 @@ generate_fragment(struct llvmpipe_context *lp,
 
 
 static void
-dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key)
+dump_fs_variant_key(struct lp_fragment_shader_variant_key *key)
 {
    unsigned i;
 
@@ -2729,11 +3039,17 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key)
    if (key->flatshade) {
       debug_printf("flatshade = 1\n");
    }
+   if (key->multisample) {
+      debug_printf("multisample = 1\n");
+      debug_printf("coverage samples = %d\n", key->coverage_samples);
+   }
    for (i = 0; i < key->nr_cbufs; ++i) {
       debug_printf("cbuf_format[%u] = %s\n", i, util_format_name(key->cbuf_format[i]));
+      debug_printf("cbuf nr_samples[%u] = %d\n", i, key->cbuf_nr_samples[i]);
    }
    if (key->depth.enabled || key->stencil[0].enabled) {
       debug_printf("depth.format = %s\n", util_format_name(key->zsbuf_format));
+      debug_printf("depth nr_samples = %d\n", key->zsbuf_nr_samples);
    }
    if (key->depth.enabled) {
       debug_printf("depth.func = %s\n", util_str_func(key->depth.func, TRUE));
@@ -2775,7 +3091,7 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key)
       debug_printf("blend.alpha_to_coverage is enabled\n");
    }
    for (i = 0; i < key->nr_samplers; ++i) {
-      const struct lp_static_sampler_state *sampler = &key->state[i].sampler_state;
+      const struct lp_static_sampler_state *sampler = &key->samplers[i].sampler_state;
       debug_printf("sampler[%u] = \n", i);
       debug_printf("  .wrap = %s %s %s\n",
                    util_str_tex_wrap(sampler->wrap_s, TRUE),
@@ -2796,7 +3112,7 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key)
       debug_printf("  .apply_max_lod = %u\n", sampler->apply_max_lod);
    }
    for (i = 0; i < key->nr_sampler_views; ++i) {
-      const struct lp_static_texture_state *texture = &key->state[i].texture_state;
+      const struct lp_static_texture_state *texture = &key->samplers[i].texture_state;
       debug_printf("texture[%u] = \n", i);
       debug_printf("  .format = %s\n",
                    util_format_name(texture->format));
@@ -2809,15 +3125,33 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key)
                    texture->pot_height,
                    texture->pot_depth);
    }
+   struct lp_image_static_state *images = lp_fs_variant_key_images(key);
+   for (i = 0; i < key->nr_images; ++i) {
+      const struct lp_static_texture_state *image = &images[i].image_state;
+      debug_printf("image[%u] = \n", i);
+      debug_printf("  .format = %s\n",
+                   util_format_name(image->format));
+      debug_printf("  .target = %s\n",
+                   util_str_tex_target(image->target, TRUE));
+      debug_printf("  .level_zero_only = %u\n",
+                   image->level_zero_only);
+      debug_printf("  .pot = %u %u %u\n",
+                   image->pot_width,
+                   image->pot_height,
+                   image->pot_depth);
+   }
 }
 
 
 void
-lp_debug_fs_variant(const struct lp_fragment_shader_variant *variant)
+lp_debug_fs_variant(struct lp_fragment_shader_variant *variant)
 {
-   debug_printf("llvmpipe: Fragment shader #%u variant #%u:\n", 
+   debug_printf("llvmpipe: Fragment shader #%u variant #%u:\n",
                 variant->shader->no, variant->no);
-   tgsi_dump(variant->shader->base.tokens, 0);
+   if (variant->shader->base.type == PIPE_SHADER_IR_TGSI)
+      tgsi_dump(variant->shader->base.tokens, 0);
+   else
+      nir_print_shader(variant->shader->base.ir.nir, stderr);
    dump_fs_variant_key(&variant->key);
    debug_printf("variant->opaque = %u\n", variant->opaque);
    debug_printf("\n");
@@ -2838,10 +3172,11 @@ generate_variant(struct llvmpipe_context *lp,
    boolean fullcolormask;
    char module_name[64];
 
-   variant = CALLOC_STRUCT(lp_fragment_shader_variant);
+   variant = MALLOC(sizeof *variant + shader->variant_key_size - sizeof variant->key);
    if (!variant)
       return NULL;
 
+   memset(variant, 0, sizeof(*variant));
    snprintf(module_name, sizeof(module_name), "fs%u_variant%u",
             shader->no, shader->variants_created);
 
@@ -2931,6 +3266,7 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
    struct lp_fragment_shader *shader;
    int nr_samplers;
    int nr_sampler_views;
+   int nr_images;
    int i;
 
    shader = CALLOC_STRUCT(lp_fragment_shader);
@@ -2940,11 +3276,17 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
    shader->no = fs_no++;
    make_empty_list(&shader->variants);
 
-   /* get/save the summary info for this shader */
-   lp_build_tgsi_info(templ->tokens, &shader->info);
+   shader->base.type = templ->type;
+   if (templ->type == PIPE_SHADER_IR_TGSI) {
+      /* get/save the summary info for this shader */
+      lp_build_tgsi_info(templ->tokens, &shader->info);
 
-   /* we need to keep a local copy of the tokens */
-   shader->base.tokens = tgsi_dup_tokens(templ->tokens);
+      /* we need to keep a local copy of the tokens */
+      shader->base.tokens = tgsi_dup_tokens(templ->tokens);
+   } else {
+      shader->base.ir.nir = templ->ir.nir;
+      nir_tgsi_scan_shader(templ->ir.nir, &shader->info.base, true);
+   }
 
    shader->draw_data = draw_create_fragment_shader(llvmpipe->draw, templ);
    if (shader->draw_data == NULL) {
@@ -2955,13 +3297,13 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
 
    nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1;
    nr_sampler_views = shader->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
-
-   shader->variant_key_size = Offset(struct lp_fragment_shader_variant_key,
-                                     state[MAX2(nr_samplers, nr_sampler_views)]);
+   nr_images = shader->info.base.file_max[TGSI_FILE_IMAGE] + 1;
+   shader->variant_key_size = lp_fs_variant_key_size(MAX2(nr_samplers, nr_sampler_views), nr_images);
 
    for (i = 0; i < shader->info.base.num_inputs; i++) {
       shader->inputs[i].usage_mask = shader->info.base.input_usage_mask[i];
       shader->inputs[i].cyl_wrap = shader->info.base.input_cylindrical_wrap[i];
+      shader->inputs[i].location = shader->info.base.input_interpolate_loc[i];
 
       switch (shader->info.base.input_interpolate[i]) {
       case TGSI_INTERPOLATE_CONSTANT:
@@ -3023,14 +3365,14 @@ static void
 llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs)
 {
    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
-
-   if (llvmpipe->fs == fs)
+   struct lp_fragment_shader *lp_fs = (struct lp_fragment_shader *)fs;
+   if (llvmpipe->fs == lp_fs)
       return;
 
-   llvmpipe->fs = (struct lp_fragment_shader *) fs;
-
    draw_bind_fragment_shader(llvmpipe->draw,
-                             (llvmpipe->fs ? llvmpipe->fs->draw_data : NULL));
+                             (lp_fs ? lp_fs->draw_data : NULL));
+
+   llvmpipe->fs = lp_fs;
 
    llvmpipe->dirty |= LP_NEW_FS;
 }
@@ -3095,6 +3437,8 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs)
    /* Delete draw module's data */
    draw_delete_fragment_shader(llvmpipe->draw, shader->draw_data);
 
+   if (shader->base.ir.nir)
+      ralloc_free(shader->base.ir.nir);
    assert(shader->variants_cached == 0);
    FREE((void *) shader->base.tokens);
    FREE(shader);
@@ -3124,7 +3468,9 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe,
    }
 
    if (shader == PIPE_SHADER_VERTEX ||
-       shader == PIPE_SHADER_GEOMETRY) {
+       shader == PIPE_SHADER_GEOMETRY ||
+       shader == PIPE_SHADER_TESS_CTRL ||
+       shader == PIPE_SHADER_TESS_EVAL) {
       /* Pass the constants to the 'draw' module */
       const unsigned size = cb ? cb->buffer_size : 0;
       const ubyte *data;
@@ -3145,9 +3491,10 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe,
       draw_set_mapped_constant_buffer(llvmpipe->draw, shader,
                                       index, data, size);
    }
-   else {
+   else if (shader == PIPE_SHADER_COMPUTE)
+      llvmpipe->cs_dirty |= LP_CSNEW_CONSTANTS;
+   else
       llvmpipe->dirty |= LP_NEW_FS_CONSTANTS;
-   }
 
    if (cb && cb->user_buffer) {
       pipe_resource_reference(&constants, NULL);
@@ -3168,7 +3515,9 @@ llvmpipe_set_shader_buffers(struct pipe_context *pipe,
       util_copy_shader_buffer(&llvmpipe->ssbos[shader][i], buffer);
 
       if (shader == PIPE_SHADER_VERTEX ||
-          shader == PIPE_SHADER_GEOMETRY) {
+          shader == PIPE_SHADER_GEOMETRY ||
+          shader == PIPE_SHADER_TESS_CTRL ||
+          shader == PIPE_SHADER_TESS_EVAL) {
          const unsigned size = buffer ? buffer->buffer_size : 0;
          const ubyte *data = NULL;
          if (buffer && buffer->buffer)
@@ -3177,12 +3526,44 @@ llvmpipe_set_shader_buffers(struct pipe_context *pipe,
             data += buffer->buffer_offset;
          draw_set_mapped_shader_buffer(llvmpipe->draw, shader,
                                        i, data, size);
+      } else if (shader == PIPE_SHADER_COMPUTE) {
+        llvmpipe->cs_dirty |= LP_CSNEW_SSBOS;
       } else if (shader == PIPE_SHADER_FRAGMENT) {
          llvmpipe->dirty |= LP_NEW_FS_SSBOS;
       }
    }
 }
 
+static void
+llvmpipe_set_shader_images(struct pipe_context *pipe,
+                            enum pipe_shader_type shader, unsigned start_slot,
+                           unsigned count, const struct pipe_image_view *images)
+{
+   struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+   unsigned i, idx;
+
+   draw_flush(llvmpipe->draw);
+   for (i = start_slot, idx = 0; i < start_slot + count; i++, idx++) {
+      const struct pipe_image_view *image = images ? &images[idx] : NULL;
+
+      util_copy_image_view(&llvmpipe->images[shader][i], image);
+   }
+
+   llvmpipe->num_images[shader] = start_slot + count;
+   if (shader == PIPE_SHADER_VERTEX ||
+       shader == PIPE_SHADER_GEOMETRY ||
+       shader == PIPE_SHADER_TESS_CTRL ||
+       shader == PIPE_SHADER_TESS_EVAL) {
+      draw_set_images(llvmpipe->draw,
+                      shader,
+                      llvmpipe->images[shader],
+                      start_slot + count);
+   } else if (shader == PIPE_SHADER_COMPUTE)
+      llvmpipe->cs_dirty |= LP_CSNEW_IMAGES;
+   else
+      llvmpipe->dirty |= LP_NEW_FS_IMAGES;
+}
+
 /**
  * Return the blend factor equivalent to a destination alpha of one.
  */
@@ -3212,14 +3593,17 @@ force_dst_alpha_one(unsigned factor, boolean clamped_zero)
  * TODO: there is actually no reason to tie this to context state -- the
  * generated code could be cached globally in the screen.
  */
-static void
+static struct lp_fragment_shader_variant_key *
 make_variant_key(struct llvmpipe_context *lp,
                  struct lp_fragment_shader *shader,
-                 struct lp_fragment_shader_variant_key *key)
+                 char *store)
 {
    unsigned i;
+   struct lp_fragment_shader_variant_key *key;
+
+   key = (struct lp_fragment_shader_variant_key *)store;
 
-   memset(key, 0, shader->variant_key_size);
+   memset(key, 0, offsetof(struct lp_fragment_shader_variant_key, samplers[1]));
 
    if (lp->framebuffer.zsbuf) {
       enum pipe_format zsbuf_format = lp->framebuffer.zsbuf->format;
@@ -3239,6 +3623,7 @@ make_variant_key(struct llvmpipe_context *lp,
       if (llvmpipe_resource_is_1d(lp->framebuffer.zsbuf->texture)) {
          key->resource_1d = TRUE;
       }
+      key->zsbuf_nr_samples = util_res_sample_count(lp->framebuffer.zsbuf->texture);
    }
 
    /*
@@ -3271,7 +3656,8 @@ make_variant_key(struct llvmpipe_context *lp,
    /* alpha.ref_value is passed in jit_context */
 
    key->flatshade = lp->rasterizer->flatshade;
-   if (lp->active_occlusion_queries) {
+   key->multisample = lp->rasterizer->multisample;
+   if (lp->active_occlusion_queries && !lp->queries_disabled) {
       key->occlusion_count = TRUE;
    }
 
@@ -3279,6 +3665,9 @@ make_variant_key(struct llvmpipe_context *lp,
       memcpy(&key->blend, lp->blend, sizeof key->blend);
    }
 
+   key->coverage_samples = 1;
+   if (key->multisample)
+      key->coverage_samples = util_framebuffer_get_num_samples(&lp->framebuffer);
    key->nr_cbufs = lp->framebuffer.nr_cbufs;
 
    if (!key->blend.independent_blend_enable) {
@@ -3297,6 +3686,7 @@ make_variant_key(struct llvmpipe_context *lp,
          const struct util_format_description *format_desc;
 
          key->cbuf_format[i] = format;
+         key->cbuf_nr_samples[i] = util_res_sample_count(lp->framebuffer.cbufs[i]->texture);
 
          /*
           * Figure out if this is a 1d resource. Note that OpenGL allows crazy
@@ -3356,6 +3746,7 @@ make_variant_key(struct llvmpipe_context *lp,
       else {
          /* no color buffer for this fragment output */
          key->cbuf_format[i] = PIPE_FORMAT_NONE;
+         key->cbuf_nr_samples[i] = 0;
          blend_rt->colormask = 0x0;
          blend_rt->blend_enable = 0;
       }
@@ -3365,9 +3756,15 @@ make_variant_key(struct llvmpipe_context *lp,
     */
    key->nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1;
 
+   struct lp_sampler_static_state *fs_sampler;
+
+   fs_sampler = key->samplers;
+
+   memset(fs_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *fs_sampler);
+
    for(i = 0; i < key->nr_samplers; ++i) {
       if(shader->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
-         lp_sampler_static_sampler_state(&key->state[i].sampler_state,
+         lp_sampler_static_sampler_state(&fs_sampler[i].sampler_state,
                                          lp->samplers[PIPE_SHADER_FRAGMENT][i]);
       }
    }
@@ -3386,7 +3783,7 @@ make_variant_key(struct llvmpipe_context *lp,
           * used views may be included in the shader key.
           */
          if(shader->info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1u << (i & 31))) {
-            lp_sampler_static_texture_state(&key->state[i].texture_state,
+            lp_sampler_static_texture_state(&fs_sampler[i].texture_state,
                                             lp->sampler_views[PIPE_SHADER_FRAGMENT][i]);
          }
       }
@@ -3395,11 +3792,22 @@ make_variant_key(struct llvmpipe_context *lp,
       key->nr_sampler_views = key->nr_samplers;
       for(i = 0; i < key->nr_sampler_views; ++i) {
          if(shader->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
-            lp_sampler_static_texture_state(&key->state[i].texture_state,
+            lp_sampler_static_texture_state(&fs_sampler[i].texture_state,
                                             lp->sampler_views[PIPE_SHADER_FRAGMENT][i]);
          }
       }
    }
+
+   struct lp_image_static_state *lp_image;
+   lp_image = lp_fs_variant_key_images(key);
+   key->nr_images = shader->info.base.file_max[TGSI_FILE_IMAGE] + 1;
+   for (i = 0; i < key->nr_images; ++i) {
+      if (shader->info.base.file_mask[TGSI_FILE_IMAGE] & (1 << i)) {
+         lp_sampler_static_texture_state_image(&lp_image[i].image_state,
+                                               &lp->images[PIPE_SHADER_FRAGMENT][i]);
+      }
+   }
+   return key;
 }
 
 
@@ -3412,16 +3820,17 @@ void
 llvmpipe_update_fs(struct llvmpipe_context *lp)
 {
    struct lp_fragment_shader *shader = lp->fs;
-   struct lp_fragment_shader_variant_key key;
+   struct lp_fragment_shader_variant_key *key;
    struct lp_fragment_shader_variant *variant = NULL;
    struct lp_fs_variant_list_item *li;
+   char store[LP_FS_MAX_VARIANT_KEY_SIZE];
 
-   make_variant_key(lp, shader, &key);
+   key = make_variant_key(lp, shader, store);
 
    /* Search the variants for one which matches the key */
    li = first_elem(&shader->variants);
    while(!at_end(&shader->variants, li)) {
-      if(memcmp(&li->base->key, &key, shader->variant_key_size) == 0) {
+      if(memcmp(&li->base->key, key, shader->variant_key_size) == 0) {
          variant = li->base;
          break;
       }
@@ -3493,7 +3902,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp)
        * Generate the new variant.
        */
       t0 = os_time_get();
-      variant = generate_variant(lp, shader, &key);
+      variant = generate_variant(lp, shader, key);
       t1 = os_time_get();
       dt = t1 - t0;
       LP_COUNT_ADD(llvm_compile_time, dt);
@@ -3527,6 +3936,7 @@ llvmpipe_init_fs_funcs(struct llvmpipe_context *llvmpipe)
    llvmpipe->pipe.set_constant_buffer = llvmpipe_set_constant_buffer;
 
    llvmpipe->pipe.set_shader_buffers = llvmpipe_set_shader_buffers;
+   llvmpipe->pipe.set_shader_images = llvmpipe_set_shader_images;
 }