llvmpipe: pass number of samplers into llvm sampler code.
[mesa.git] / src / gallium / drivers / llvmpipe / lp_state_fs.c
index 9a43f01738ba303c22a234dd6f49cd325165d979..1d57f2553d24c49df93f9ac2947fece784828e5e 100644 (file)
 #include "util/u_inlines.h"
 #include "util/u_memory.h"
 #include "util/u_pointer.h"
-#include "util/u_format.h"
+#include "util/format/u_format.h"
 #include "util/u_dump.h"
 #include "util/u_string.h"
 #include "util/simple_list.h"
 #include "util/u_dual_blend.h"
-#include "os/os_time.h"
+#include "util/os_time.h"
 #include "pipe/p_shader_tokens.h"
 #include "draw/draw_context.h"
 #include "tgsi/tgsi_dump.h"
 #include "gallivm/lp_bld_intr.h"
 #include "gallivm/lp_bld_logic.h"
 #include "gallivm/lp_bld_tgsi.h"
+#include "gallivm/lp_bld_nir.h"
 #include "gallivm/lp_bld_swizzle.h"
 #include "gallivm/lp_bld_flow.h"
 #include "gallivm/lp_bld_debug.h"
 #include "gallivm/lp_bld_arit.h"
+#include "gallivm/lp_bld_bitarit.h"
 #include "gallivm/lp_bld_pack.h"
 #include "gallivm/lp_bld_format.h"
 #include "gallivm/lp_bld_quad.h"
 #include "lp_flush.h"
 #include "lp_state_fs.h"
 #include "lp_rast.h"
+#include "nir/nir_to_tgsi_info.h"
 
-
+#include "lp_screen.h"
+#include "compiler/nir/nir_serialize.h"
+#include "util/mesa-sha1.h"
 /** Fragment shader number (for debugging) */
 static unsigned fs_no = 0;
 
@@ -121,7 +126,8 @@ static LLVMValueRef
 generate_quad_mask(struct gallivm_state *gallivm,
                    struct lp_type fs_type,
                    unsigned first_quad,
-                   LLVMValueRef mask_input) /* int32 */
+                   unsigned sample,
+                   LLVMValueRef mask_input) /* int64 */
 {
    LLVMBuilderRef builder = gallivm->builder;
    struct lp_type mask_type;
@@ -160,6 +166,11 @@ generate_quad_mask(struct gallivm_state *gallivm,
       shift = 0;
    }
 
+   mask_input = LLVMBuildLShr(builder, mask_input, lp_build_const_int64(gallivm, 16 * sample), "");
+   mask_input = LLVMBuildTrunc(builder, mask_input,
+                               i32t, "");
+   mask_input = LLVMBuildAnd(builder, mask_input, lp_build_const_int32(gallivm, 0xffff), "");
+
    mask_input = LLVMBuildLShr(builder,
                               mask_input,
                               LLVMConstInt(i32t, shift, 0),
@@ -285,6 +296,59 @@ lp_build_depth_clamp(struct gallivm_state *gallivm,
    return lp_build_clamp(&f32_bld, z, min_depth, max_depth);
 }
 
+static void
+lp_build_sample_alpha_to_coverage(struct gallivm_state *gallivm,
+                                  struct lp_type type,
+                                  unsigned coverage_samples,
+                                  LLVMValueRef num_loop,
+                                  LLVMValueRef loop_counter,
+                                  LLVMValueRef coverage_mask_store,
+                                  LLVMValueRef alpha)
+{
+   struct lp_build_context bld;
+   LLVMBuilderRef builder = gallivm->builder;
+   float step = 1.0 / coverage_samples;
+
+   lp_build_context_init(&bld, gallivm, type);
+   for (unsigned s = 0; s < coverage_samples; s++) {
+      LLVMValueRef alpha_ref_value = lp_build_const_vec(gallivm, type, step * s);
+      LLVMValueRef test = lp_build_cmp(&bld, PIPE_FUNC_GREATER, alpha, alpha_ref_value);
+
+      LLVMValueRef s_mask_idx = LLVMBuildMul(builder, lp_build_const_int32(gallivm, s), num_loop, "");
+      s_mask_idx = LLVMBuildAdd(builder, s_mask_idx, loop_counter, "");
+      LLVMValueRef s_mask_ptr = LLVMBuildGEP(builder, coverage_mask_store, &s_mask_idx, 1, "");
+      LLVMValueRef s_mask = LLVMBuildLoad(builder, s_mask_ptr, "");
+      s_mask = LLVMBuildAnd(builder, s_mask, test, "");
+      LLVMBuildStore(builder, s_mask, s_mask_ptr);
+   }
+};
+
+struct lp_build_fs_llvm_iface {
+   struct lp_build_fs_iface base;
+   struct lp_build_interp_soa_context *interp;
+   struct lp_build_for_loop_state *loop_state;
+   LLVMValueRef mask_store;
+};
+
+static LLVMValueRef fs_interp(const struct lp_build_fs_iface *iface,
+                              struct lp_build_context *bld,
+                              unsigned attrib, unsigned chan,
+                              bool centroid, bool sample,
+                              LLVMValueRef attrib_indir,
+                              LLVMValueRef offsets[2])
+{
+   struct lp_build_fs_llvm_iface *fs_iface = (struct lp_build_fs_llvm_iface *)iface;
+   struct lp_build_interp_soa_context *interp = fs_iface->interp;
+   unsigned loc = TGSI_INTERPOLATE_LOC_CENTER;
+   if (centroid)
+      loc = TGSI_INTERPOLATE_LOC_CENTROID;
+   if (sample)
+      loc = TGSI_INTERPOLATE_LOC_SAMPLE;
+
+   return lp_build_interp_soa(interp, bld->gallivm, fs_iface->loop_state->counter,
+                              fs_iface->mask_store,
+                              attrib, chan, loc, attrib_indir, offsets);
+}
 
 /**
  * Generate the fragment shader, depth/stencil test, and alpha tests.
@@ -296,13 +360,16 @@ generate_fs_loop(struct gallivm_state *gallivm,
                  LLVMBuilderRef builder,
                  struct lp_type type,
                  LLVMValueRef context_ptr,
+                 LLVMValueRef sample_pos_array,
                  LLVMValueRef num_loop,
                  struct lp_build_interp_soa_context *interp,
-                 struct lp_build_sampler_soa *sampler,
+                 const struct lp_build_sampler_soa *sampler,
+                 const struct lp_build_image_soa *image,
                  LLVMValueRef mask_store,
                  LLVMValueRef (*out_color)[4],
-                 LLVMValueRef depth_ptr,
+                 LLVMValueRef depth_base_ptr,
                  LLVMValueRef depth_stride,
+                 LLVMValueRef depth_sample_stride,
                  LLVMValueRef facing,
                  LLVMValueRef thread_data_ptr)
 {
@@ -310,14 +377,17 @@ generate_fs_loop(struct gallivm_state *gallivm,
    const struct tgsi_token *tokens = shader->base.tokens;
    struct lp_type int_type = lp_int_type(type);
    LLVMTypeRef vec_type, int_vec_type;
-   LLVMValueRef mask_ptr, mask_val;
+   LLVMValueRef mask_ptr = NULL, mask_val = NULL;
    LLVMValueRef consts_ptr, num_consts_ptr;
+   LLVMValueRef ssbo_ptr, num_ssbo_ptr;
    LLVMValueRef z;
    LLVMValueRef z_value, s_value;
    LLVMValueRef z_fb, s_fb;
+   LLVMValueRef depth_ptr;
    LLVMValueRef stencil_refs[2];
    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
-   struct lp_build_for_loop_state loop_state;
+   LLVMValueRef zs_samples = lp_build_const_int32(gallivm, key->zsbuf_nr_samples);
+   struct lp_build_for_loop_state loop_state, sample_loop_state;
    struct lp_build_mask_context mask;
    /*
     * TODO: figure out if simple_shader optimization is really worthwile to
@@ -338,16 +408,25 @@ generate_fs_loop(struct gallivm_state *gallivm,
 
    memset(&system_values, 0, sizeof(system_values));
 
+   /* truncate then sign extend. */
+   system_values.front_facing = LLVMBuildTrunc(gallivm->builder, facing, LLVMInt1TypeInContext(gallivm->context), "");
+   system_values.front_facing = LLVMBuildSExt(gallivm->builder, system_values.front_facing, LLVMInt32TypeInContext(gallivm->context), "");
+
    if (key->depth.enabled ||
        key->stencil[0].enabled) {
 
       zs_format_desc = util_format_description(key->zsbuf_format);
       assert(zs_format_desc);
 
-      if (!shader->info.base.writes_z && !shader->info.base.writes_stencil) {
-         if (key->alpha.enabled ||
+      if (shader->info.base.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL])
+         depth_mode = EARLY_DEPTH_TEST | EARLY_DEPTH_WRITE;
+      else if (!shader->info.base.writes_z && !shader->info.base.writes_stencil) {
+         if (shader->info.base.writes_memory)
+            depth_mode = LATE_DEPTH_TEST | LATE_DEPTH_WRITE;
+         else if (key->alpha.enabled ||
              key->blend.alpha_to_coverage ||
-             shader->info.base.uses_kill) {
+             shader->info.base.uses_kill ||
+             shader->info.base.writes_samplemask) {
             /* With alpha test and kill, can do the depth test early
              * and hopefully eliminate some quads.  But need to do a
              * special deferred depth write once the final mask value
@@ -390,24 +469,22 @@ generate_fs_loop(struct gallivm_state *gallivm,
    consts_ptr = lp_jit_context_constants(gallivm, context_ptr);
    num_consts_ptr = lp_jit_context_num_constants(gallivm, context_ptr);
 
-   lp_build_for_loop_begin(&loop_state, gallivm,
-                           lp_build_const_int32(gallivm, 0),
-                           LLVMIntULT,
-                           num_loop,
-                           lp_build_const_int32(gallivm, 1));
-
-   mask_ptr = LLVMBuildGEP(builder, mask_store,
-                           &loop_state.counter, 1, "mask_ptr");
-   mask_val = LLVMBuildLoad(builder, mask_ptr, "");
+   ssbo_ptr = lp_jit_context_ssbos(gallivm, context_ptr);
+   num_ssbo_ptr = lp_jit_context_num_ssbos(gallivm, context_ptr);
 
    memset(outputs, 0, sizeof outputs);
 
+   /* Allocate color storage for each fragment sample */
+   LLVMValueRef color_store_size = num_loop;
+   if (key->min_samples > 1)
+      color_store_size = LLVMBuildMul(builder, num_loop, lp_build_const_int32(gallivm, key->min_samples), "");
+
    for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
       for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
          out_color[cbuf][chan] = lp_build_array_alloca(gallivm,
                                                        lp_build_vec_type(gallivm,
                                                                          type),
-                                                       num_loop, "color");
+                                                       color_store_size, "color");
       }
    }
    if (dual_source_blend) {
@@ -416,10 +493,41 @@ generate_fs_loop(struct gallivm_state *gallivm,
          out_color[1][chan] = lp_build_array_alloca(gallivm,
                                                     lp_build_vec_type(gallivm,
                                                                       type),
-                                                    num_loop, "color1");
+                                                    color_store_size, "color1");
       }
    }
 
+   lp_build_for_loop_begin(&loop_state, gallivm,
+                           lp_build_const_int32(gallivm, 0),
+                           LLVMIntULT,
+                           num_loop,
+                           lp_build_const_int32(gallivm, 1));
+
+   LLVMValueRef sample_mask_in;
+   if (key->multisample) {
+      sample_mask_in = lp_build_const_int_vec(gallivm, type, 0);
+      /* create shader execution mask by combining all sample masks. */
+      for (unsigned s = 0; s < key->coverage_samples; s++) {
+         LLVMValueRef s_mask_idx = LLVMBuildMul(builder, num_loop, lp_build_const_int32(gallivm, s), "");
+         s_mask_idx = LLVMBuildAdd(builder, s_mask_idx, loop_state.counter, "");
+         LLVMValueRef s_mask = lp_build_pointer_get(builder, mask_store, s_mask_idx);
+         if (s == 0)
+            mask_val = s_mask;
+         else
+            mask_val = LLVMBuildOr(builder, s_mask, mask_val, "");
+
+         LLVMValueRef mask_in = LLVMBuildAnd(builder, s_mask, lp_build_const_int_vec(gallivm, type, (1 << s)), "");
+         sample_mask_in = LLVMBuildOr(builder, sample_mask_in, mask_in, "");
+      }
+   } else {
+      sample_mask_in = lp_build_const_int_vec(gallivm, type, 1);
+      mask_ptr = LLVMBuildGEP(builder, mask_store,
+                              &loop_state.counter, 1, "mask_ptr");
+      mask_val = LLVMBuildLoad(builder, mask_ptr, "");
+
+      LLVMValueRef mask_in = LLVMBuildAnd(builder, mask_val, lp_build_const_int_vec(gallivm, type, 1), "");
+      sample_mask_in = LLVMBuildOr(builder, sample_mask_in, mask_in, "");
+   }
 
    /* 'mask' will control execution based on quad's pixel alive/killed state */
    lp_build_mask_begin(&mask, gallivm, type, mask_val);
@@ -427,9 +535,62 @@ generate_fs_loop(struct gallivm_state *gallivm,
    if (!(depth_mode & EARLY_DEPTH_TEST) && !simple_shader)
       lp_build_mask_check(&mask);
 
-   lp_build_interp_soa_update_pos_dyn(interp, gallivm, loop_state.counter);
+   /* Create storage for recombining sample masks after early Z pass. */
+   LLVMValueRef s_mask_or = lp_build_alloca(gallivm, lp_build_int_vec_type(gallivm, type), "cov_mask_early_depth");
+   LLVMBuildStore(builder, LLVMConstNull(lp_build_int_vec_type(gallivm, type)), s_mask_or);
+
+   LLVMValueRef s_mask = NULL, s_mask_ptr = NULL;
+   LLVMValueRef z_sample_value_store = NULL, s_sample_value_store = NULL;
+   LLVMValueRef z_fb_store = NULL, s_fb_store = NULL;
+   LLVMTypeRef z_type = NULL, z_fb_type = NULL;
+
+   /* Run early depth once per sample */
+   if (key->multisample) {
+
+      if (zs_format_desc) {
+         struct lp_type zs_type = lp_depth_type(zs_format_desc, type.length);
+         struct lp_type z_type = zs_type;
+         struct lp_type s_type = zs_type;
+         if (zs_format_desc->block.bits < type.width)
+            z_type.width = type.width;
+         else if (zs_format_desc->block.bits > 32) {
+            z_type.width = z_type.width / 2;
+            s_type.width = s_type.width / 2;
+            s_type.floating = 0;
+         }
+         z_sample_value_store = lp_build_array_alloca(gallivm, lp_build_int_vec_type(gallivm, type),
+                                                      zs_samples, "z_sample_store");
+         s_sample_value_store = lp_build_array_alloca(gallivm, lp_build_int_vec_type(gallivm, type),
+                                                      zs_samples, "s_sample_store");
+         z_fb_store = lp_build_array_alloca(gallivm, lp_build_vec_type(gallivm, z_type),
+                                            zs_samples, "z_fb_store");
+         s_fb_store = lp_build_array_alloca(gallivm, lp_build_vec_type(gallivm, s_type),
+                                            zs_samples, "s_fb_store");
+      }
+      lp_build_for_loop_begin(&sample_loop_state, gallivm,
+                              lp_build_const_int32(gallivm, 0),
+                              LLVMIntULT, lp_build_const_int32(gallivm, key->coverage_samples),
+                              lp_build_const_int32(gallivm, 1));
+
+      LLVMValueRef s_mask_idx = LLVMBuildMul(builder, sample_loop_state.counter, num_loop, "");
+      s_mask_idx = LLVMBuildAdd(builder, s_mask_idx, loop_state.counter, "");
+      s_mask_ptr = LLVMBuildGEP(builder, mask_store, &s_mask_idx, 1, "");
+
+      s_mask = LLVMBuildLoad(builder, s_mask_ptr, "");
+      s_mask = LLVMBuildAnd(builder, s_mask, mask_val, "");
+   }
+
+
+   /* for multisample Z needs to be interpolated at sample points for testing. */
+   lp_build_interp_soa_update_pos_dyn(interp, gallivm, loop_state.counter, key->multisample ? sample_loop_state.counter : NULL);
    z = interp->pos[2];
 
+   depth_ptr = depth_base_ptr;
+   if (key->multisample) {
+      LLVMValueRef sample_offset = LLVMBuildMul(builder, sample_loop_state.counter, depth_sample_stride, "");
+      depth_ptr = LLVMBuildGEP(builder, depth_ptr, &sample_offset, 1, "");
+   }
+
    if (depth_mode & EARLY_DEPTH_TEST) {
       /*
        * Clamp according to ARB_depth_clamp semantics.
@@ -447,12 +608,13 @@ generate_fs_loop(struct gallivm_state *gallivm,
                                   key->stencil,
                                   type,
                                   zs_format_desc,
-                                  &mask,
+                                  key->multisample ? NULL : &mask,
+                                  &s_mask,
                                   stencil_refs,
                                   z, z_fb, s_fb,
                                   facing,
                                   &z_value, &s_value,
-                                  !simple_shader);
+                                  !simple_shader && !key->multisample);
 
       if (depth_mode & EARLY_DEPTH_WRITE) {
          lp_build_depth_stencil_write_swizzled(gallivm, type,
@@ -466,18 +628,105 @@ generate_fs_loop(struct gallivm_state *gallivm,
        * stencil test otherwise new stencil values may not get written if all
        * fragments got killed by depth/stencil test.
        */
-      if (!simple_shader && key->stencil[0].enabled)
+      if (!simple_shader && key->stencil[0].enabled && !key->multisample)
          lp_build_mask_check(&mask);
+
+      if (key->multisample) {
+         z_fb_type = LLVMTypeOf(z_fb);
+         z_type = LLVMTypeOf(z_value);
+         lp_build_pointer_set(builder, z_sample_value_store, sample_loop_state.counter, LLVMBuildBitCast(builder, z_value, lp_build_int_vec_type(gallivm, type), ""));
+         lp_build_pointer_set(builder, s_sample_value_store, sample_loop_state.counter, LLVMBuildBitCast(builder, s_value, lp_build_int_vec_type(gallivm, type), ""));
+         lp_build_pointer_set(builder, z_fb_store, sample_loop_state.counter, z_fb);
+         lp_build_pointer_set(builder, s_fb_store, sample_loop_state.counter, s_fb);
+      }
    }
 
-   lp_build_interp_soa_update_inputs_dyn(interp, gallivm, loop_state.counter);
+   if (key->multisample) {
+      /*
+       * Store the post-early Z coverage mask.
+       * Recombine the resulting coverage masks post early Z into the fragment
+       * shader execution mask.
+       */
+      LLVMValueRef tmp_s_mask_or = LLVMBuildLoad(builder, s_mask_or, "");
+      tmp_s_mask_or = LLVMBuildOr(builder, tmp_s_mask_or, s_mask, "");
+      LLVMBuildStore(builder, tmp_s_mask_or, s_mask_or);
+
+      LLVMBuildStore(builder, s_mask, s_mask_ptr);
+
+      lp_build_for_loop_end(&sample_loop_state);
+
+      /* recombined all the coverage masks in the shader exec mask. */
+      tmp_s_mask_or = LLVMBuildLoad(builder, s_mask_or, "");
+      lp_build_mask_update(&mask, tmp_s_mask_or);
+
+      if (key->min_samples == 1) {
+         /* for multisample Z needs to be re interpolated at pixel center */
+         lp_build_interp_soa_update_pos_dyn(interp, gallivm, loop_state.counter, NULL);
+         lp_build_mask_update(&mask, tmp_s_mask_or);
+      }
+   }
+
+   LLVMValueRef out_sample_mask_storage = NULL;
+   if (shader->info.base.writes_samplemask) {
+      out_sample_mask_storage = lp_build_alloca(gallivm, int_vec_type, "write_mask");
+      if (key->min_samples > 1)
+         LLVMBuildStore(builder, LLVMConstNull(int_vec_type), out_sample_mask_storage);
+   }
+
+   if (key->multisample && key->min_samples > 1) {
+      lp_build_for_loop_begin(&sample_loop_state, gallivm,
+                              lp_build_const_int32(gallivm, 0),
+                              LLVMIntULT,
+                              lp_build_const_int32(gallivm, key->min_samples),
+                              lp_build_const_int32(gallivm, 1));
+
+      LLVMValueRef s_mask_idx = LLVMBuildMul(builder, sample_loop_state.counter, num_loop, "");
+      s_mask_idx = LLVMBuildAdd(builder, s_mask_idx, loop_state.counter, "");
+      s_mask_ptr = LLVMBuildGEP(builder, mask_store, &s_mask_idx, 1, "");
+      s_mask = LLVMBuildLoad(builder, s_mask_ptr, "");
+      lp_build_mask_force(&mask, s_mask);
+      lp_build_interp_soa_update_pos_dyn(interp, gallivm, loop_state.counter, sample_loop_state.counter);
+      system_values.sample_id = sample_loop_state.counter;
+   } else
+      system_values.sample_id = lp_build_const_int32(gallivm, 0);
+
+   system_values.sample_mask_in = sample_mask_in;
+   system_values.sample_pos = sample_pos_array;
+
+   lp_build_interp_soa_update_inputs_dyn(interp, gallivm, loop_state.counter, mask_store, sample_loop_state.counter);
+
+   struct lp_build_fs_llvm_iface fs_iface = {
+     .base.interp_fn = fs_interp,
+     .interp = interp,
+     .loop_state = &loop_state,
+     .mask_store = mask_store,
+   };
+
+   struct lp_build_tgsi_params params;
+   memset(&params, 0, sizeof(params));
+
+   params.type = type;
+   params.mask = &mask;
+   params.fs_iface = &fs_iface.base;
+   params.consts_ptr = consts_ptr;
+   params.const_sizes_ptr = num_consts_ptr;
+   params.system_values = &system_values;
+   params.inputs = interp->inputs;
+   params.context_ptr = context_ptr;
+   params.thread_data_ptr = thread_data_ptr;
+   params.sampler = sampler;
+   params.info = &shader->info.base;
+   params.ssbo_ptr = ssbo_ptr;
+   params.ssbo_sizes_ptr = num_ssbo_ptr;
+   params.image = image;
 
    /* Build the actual shader */
-   lp_build_tgsi_soa(gallivm, tokens, type, &mask,
-                     consts_ptr, num_consts_ptr, &system_values,
-                     interp->inputs,
-                     outputs, context_ptr, thread_data_ptr,
-                     sampler, &shader->info.base, NULL);
+   if (shader->base.type == PIPE_SHADER_IR_TGSI)
+      lp_build_tgsi_soa(gallivm, tokens, &params,
+                        outputs);
+   else
+      lp_build_nir_soa(gallivm, shader->base.ir.nir, &params,
+                       outputs);
 
    /* Alpha test */
    if (key->alpha.enabled) {
@@ -510,12 +759,118 @@ generate_fs_loop(struct gallivm_state *gallivm,
       if (color0 != -1 && outputs[color0][3]) {
          LLVMValueRef alpha = LLVMBuildLoad(builder, outputs[color0][3], "alpha");
 
-         lp_build_alpha_to_coverage(gallivm, type,
-                                    &mask, alpha,
-                                    (depth_mode & LATE_DEPTH_TEST) != 0);
+         if (!key->multisample) {
+            lp_build_alpha_to_coverage(gallivm, type,
+                                       &mask, alpha,
+                                       (depth_mode & LATE_DEPTH_TEST) != 0);
+         } else {
+            lp_build_sample_alpha_to_coverage(gallivm, type, key->coverage_samples, num_loop,
+                                              loop_state.counter,
+                                              mask_store, alpha);
+         }
+      }
+   }
+   if (key->blend.alpha_to_one && key->multisample) {
+      for (attrib = 0; attrib < shader->info.base.num_outputs; ++attrib) {
+         unsigned cbuf = shader->info.base.output_semantic_index[attrib];
+         if ((shader->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR) &&
+             ((cbuf < key->nr_cbufs) || (cbuf == 1 && dual_source_blend)))
+            if (outputs[cbuf][3]) {
+               LLVMBuildStore(builder, lp_build_const_vec(gallivm, type, 1.0), outputs[cbuf][3]);
+            }
+      }
+   }
+   if (shader->info.base.writes_samplemask) {
+      LLVMValueRef output_smask = NULL;
+      int smaski = find_output_by_semantic(&shader->info.base,
+                                           TGSI_SEMANTIC_SAMPLEMASK,
+                                           0);
+      struct lp_build_context smask_bld;
+      lp_build_context_init(&smask_bld, gallivm, int_type);
+
+      assert(smaski >= 0);
+      output_smask = LLVMBuildLoad(builder, outputs[smaski][0], "smask");
+      output_smask = LLVMBuildBitCast(builder, output_smask, smask_bld.vec_type, "");
+
+      if (key->min_samples > 1) {
+         /* only the bit corresponding to this sample is to be used. */
+         LLVMValueRef tmp_mask = LLVMBuildLoad(builder, out_sample_mask_storage, "tmp_mask");
+         LLVMValueRef out_smask_idx = LLVMBuildShl(builder, lp_build_const_int32(gallivm, 1), sample_loop_state.counter, "");
+         LLVMValueRef smask_bit = LLVMBuildAnd(builder, output_smask, lp_build_broadcast(gallivm, int_vec_type, out_smask_idx), "");
+         output_smask = LLVMBuildOr(builder, tmp_mask, smask_bit, "");
+      }
+
+      LLVMBuildStore(builder, output_smask, out_sample_mask_storage);
+   }
+
+   /* Color write - per fragment sample */
+   for (attrib = 0; attrib < shader->info.base.num_outputs; ++attrib)
+   {
+      unsigned cbuf = shader->info.base.output_semantic_index[attrib];
+      if ((shader->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR) &&
+           ((cbuf < key->nr_cbufs) || (cbuf == 1 && dual_source_blend)))
+      {
+         for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
+            if(outputs[attrib][chan]) {
+               /* XXX: just initialize outputs to point at colors[] and
+                * skip this.
+                */
+               LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
+               LLVMValueRef color_ptr;
+               LLVMValueRef color_idx = loop_state.counter;
+               if (key->min_samples > 1)
+                  color_idx = LLVMBuildAdd(builder, color_idx,
+                                           LLVMBuildMul(builder, sample_loop_state.counter, num_loop, ""), "");
+               color_ptr = LLVMBuildGEP(builder, out_color[cbuf][chan],
+                                        &color_idx, 1, "");
+               lp_build_name(out, "color%u.%c", attrib, "rgba"[chan]);
+               LLVMBuildStore(builder, out, color_ptr);
+            }
+         }
       }
    }
 
+   if (key->multisample && key->min_samples > 1) {
+      LLVMBuildStore(builder, lp_build_mask_value(&mask), s_mask_ptr);
+      lp_build_for_loop_end(&sample_loop_state);
+   }
+
+   if (key->multisample) {
+      /* execute depth test for each sample */
+      lp_build_for_loop_begin(&sample_loop_state, gallivm,
+                              lp_build_const_int32(gallivm, 0),
+                              LLVMIntULT, lp_build_const_int32(gallivm, key->coverage_samples),
+                              lp_build_const_int32(gallivm, 1));
+
+      /* load the per-sample coverage mask */
+      LLVMValueRef s_mask_idx = LLVMBuildMul(builder, sample_loop_state.counter, num_loop, "");
+      s_mask_idx = LLVMBuildAdd(builder, s_mask_idx, loop_state.counter, "");
+      s_mask_ptr = LLVMBuildGEP(builder, mask_store, &s_mask_idx, 1, "");
+
+      /* combine the execution mask post fragment shader with the coverage mask. */
+      s_mask = LLVMBuildLoad(builder, s_mask_ptr, "");
+      if (key->min_samples == 1)
+         s_mask = LLVMBuildAnd(builder, s_mask, lp_build_mask_value(&mask), "");
+
+      /* if the shader writes sample mask use that */
+      if (shader->info.base.writes_samplemask) {
+         LLVMValueRef out_smask_idx = LLVMBuildShl(builder, lp_build_const_int32(gallivm, 1), sample_loop_state.counter, "");
+         out_smask_idx = lp_build_broadcast(gallivm, int_vec_type, out_smask_idx);
+         LLVMValueRef output_smask = LLVMBuildLoad(builder, out_sample_mask_storage, "");
+         LLVMValueRef smask_bit = LLVMBuildAnd(builder, output_smask, out_smask_idx, "");
+         LLVMValueRef cmp = LLVMBuildICmp(builder, LLVMIntNE, smask_bit, lp_build_const_int_vec(gallivm, int_type, 0), "");
+         smask_bit = LLVMBuildSExt(builder, cmp, int_vec_type, "");
+
+         s_mask = LLVMBuildAnd(builder, s_mask, smask_bit, "");
+      }
+   }
+
+   depth_ptr = depth_base_ptr;
+   if (key->multisample) {
+      LLVMValueRef sample_offset = LLVMBuildMul(builder, sample_loop_state.counter, depth_sample_stride, "");
+      depth_ptr = LLVMBuildGEP(builder, depth_ptr, &sample_offset, 1, "");
+   }
+
    /* Late Z test */
    if (depth_mode & LATE_DEPTH_TEST) {
       int pos0 = find_output_by_semantic(&shader->info.base,
@@ -554,7 +909,8 @@ generate_fs_loop(struct gallivm_state *gallivm,
                                   key->stencil,
                                   type,
                                   zs_format_desc,
-                                  &mask,
+                                  key->multisample ? NULL : &mask,
+                                  &s_mask,
                                   stencil_refs,
                                   z, z_fb, s_fb,
                                   facing,
@@ -576,46 +932,36 @@ generate_fs_loop(struct gallivm_state *gallivm,
        * depth value, update from zs_value with the new mask value and
        * write that out.
        */
+      if (key->multisample) {
+         z_value = LLVMBuildBitCast(builder, lp_build_pointer_get(builder, z_sample_value_store, sample_loop_state.counter), z_type, "");;
+         s_value = lp_build_pointer_get(builder, s_sample_value_store, sample_loop_state.counter);
+         z_fb = LLVMBuildBitCast(builder, lp_build_pointer_get(builder, z_fb_store, sample_loop_state.counter), z_fb_type, "");
+         s_fb = lp_build_pointer_get(builder, s_fb_store, sample_loop_state.counter);
+      }
       lp_build_depth_stencil_write_swizzled(gallivm, type,
                                             zs_format_desc, key->resource_1d,
-                                            &mask, z_fb, s_fb, loop_state.counter,
+                                            key->multisample ? s_mask : lp_build_mask_value(&mask), z_fb, s_fb, loop_state.counter,
                                             depth_ptr, depth_stride,
                                             z_value, s_value);
    }
 
-
-   /* Color write  */
-   for (attrib = 0; attrib < shader->info.base.num_outputs; ++attrib)
-   {
-      unsigned cbuf = shader->info.base.output_semantic_index[attrib];
-      if ((shader->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR) &&
-           ((cbuf < key->nr_cbufs) || (cbuf == 1 && dual_source_blend)))
-      {
-         for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
-            if(outputs[attrib][chan]) {
-               /* XXX: just initialize outputs to point at colors[] and
-                * skip this.
-                */
-               LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
-               LLVMValueRef color_ptr;
-               color_ptr = LLVMBuildGEP(builder, out_color[cbuf][chan],
-                                        &loop_state.counter, 1, "");
-               lp_build_name(out, "color%u.%c", attrib, "rgba"[chan]);
-               LLVMBuildStore(builder, out, color_ptr);
-            }
-         }
-      }
-   }
-
    if (key->occlusion_count) {
       LLVMValueRef counter = lp_jit_thread_data_counter(gallivm, thread_data_ptr);
       lp_build_name(counter, "counter");
+
       lp_build_occlusion_count(gallivm, type,
-                               lp_build_mask_value(&mask), counter);
+                               key->multisample ? s_mask : lp_build_mask_value(&mask), counter);
+   }
+
+   if (key->multisample) {
+      /* store the sample mask for this loop */
+      LLVMBuildStore(builder, s_mask, s_mask_ptr);
+      lp_build_for_loop_end(&sample_loop_state);
    }
 
    mask_val = lp_build_mask_end(&mask);
-   LLVMBuildStore(builder, mask_val, mask_ptr);
+   if (!key->multisample)
+      LLVMBuildStore(builder, mask_val, mask_ptr);
    lp_build_for_loop_end(&loop_state);
 }
 
@@ -1355,7 +1701,7 @@ convert_to_blend_type(struct gallivm_state *gallivm,
       for (j = 0; j < src_fmt->nr_channels; ++j) {
          unsigned mask = 0;
          unsigned sa = src_fmt->channel[j].shift;
-#ifdef PIPE_ARCH_LITTLE_ENDIAN
+#if UTIL_ARCH_LITTLE_ENDIAN
          unsigned from_lsb = j;
 #else
          unsigned from_lsb = src_fmt->nr_channels - j - 1;
@@ -1537,7 +1883,8 @@ convert_from_blend_type(struct gallivm_state *gallivm,
       for (j = 0; j < src_fmt->nr_channels; ++j) {
          unsigned mask = 0;
          unsigned sa = src_fmt->channel[j].shift;
-#ifdef PIPE_ARCH_LITTLE_ENDIAN
+         unsigned sz_a = src_fmt->channel[j].size;
+#if UTIL_ARCH_LITTLE_ENDIAN
          unsigned from_lsb = j;
 #else
          unsigned from_lsb = src_fmt->nr_channels - j - 1;
@@ -1565,6 +1912,10 @@ convert_from_blend_type(struct gallivm_state *gallivm,
          if (src_type.norm) {
             chans[j] = scale_bits(gallivm, blend_type.width,
                                   src_fmt->channel[j].size, chans[j], src_type);
+         } else if (!src_type.floating && sz_a < blend_type.width) {
+            LLVMValueRef mask_val = lp_build_const_int_vec(gallivm, src_type, (1UL << sz_a) - 1);
+            LLVMValueRef mask = LLVMBuildICmp(builder, LLVMIntUGT, chans[j], mask_val, "");
+            chans[j] = LLVMBuildSelect(builder, mask, mask_val, chans[j], "");
          }
 
          /* Insert bits */
@@ -2219,7 +2570,7 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
 
    if (dst_count > src_count) {
       if ((dst_type.width == 8 || dst_type.width == 16) &&
-          util_is_power_of_two(dst_type.length) &&
+          util_is_power_of_two_or_zero(dst_type.length) &&
           dst_type.length * dst_type.width < 128) {
          /*
           * Never try to load values as 4xi8 which we will then
@@ -2382,14 +2733,14 @@ generate_fragment(struct llvmpipe_context *lp,
                   unsigned partial_mask)
 {
    struct gallivm_state *gallivm = variant->gallivm;
-   const struct lp_fragment_shader_variant_key *key = &variant->key;
+   struct lp_fragment_shader_variant_key *key = &variant->key;
    struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS];
    char func_name[64];
    struct lp_type fs_type;
    struct lp_type blend_type;
    LLVMTypeRef fs_elem_type;
    LLVMTypeRef blend_vec_type;
-   LLVMTypeRef arg_types[13];
+   LLVMTypeRef arg_types[15];
    LLVMTypeRef func_type;
    LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
    LLVMTypeRef int8_type = LLVMInt8TypeInContext(gallivm->context);
@@ -2401,16 +2752,19 @@ generate_fragment(struct llvmpipe_context *lp,
    LLVMValueRef dady_ptr;
    LLVMValueRef color_ptr_ptr;
    LLVMValueRef stride_ptr;
+   LLVMValueRef color_sample_stride_ptr;
    LLVMValueRef depth_ptr;
    LLVMValueRef depth_stride;
+   LLVMValueRef depth_sample_stride;
    LLVMValueRef mask_input;
    LLVMValueRef thread_data_ptr;
    LLVMBasicBlockRef block;
    LLVMBuilderRef builder;
    struct lp_build_sampler_soa *sampler;
+   struct lp_build_image_soa *image;
    struct lp_build_interp_soa_context interp;
-   LLVMValueRef fs_mask[16 / 4];
-   LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS][16 / 4];
+   LLVMValueRef fs_mask[(16 / 4) * LP_MAX_SAMPLES];
+   LLVMValueRef fs_out_color[LP_MAX_SAMPLES][PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS][16 / 4];
    LLVMValueRef function;
    LLVMValueRef facing;
    unsigned num_fs;
@@ -2465,8 +2819,8 @@ generate_fragment(struct llvmpipe_context *lp,
 
    blend_vec_type = lp_build_vec_type(gallivm, blend_type);
 
-   util_snprintf(func_name, sizeof(func_name), "fs%u_variant%u_%s",
-                 shader->no, variant->no, partial_mask ? "partial" : "whole");
+   snprintf(func_name, sizeof(func_name), "fs_variant_%s",
+            partial_mask ? "partial" : "whole");
 
    arg_types[0] = variant->jit_context_ptr_type;       /* context */
    arg_types[1] = int32_type;                          /* x */
@@ -2475,12 +2829,14 @@ generate_fragment(struct llvmpipe_context *lp,
    arg_types[4] = LLVMPointerType(fs_elem_type, 0);    /* a0 */
    arg_types[5] = LLVMPointerType(fs_elem_type, 0);    /* dadx */
    arg_types[6] = LLVMPointerType(fs_elem_type, 0);    /* dady */
-   arg_types[7] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0);  /* color */
+   arg_types[7] = LLVMPointerType(LLVMPointerType(int8_type, 0), 0);  /* color */
    arg_types[8] = LLVMPointerType(int8_type, 0);       /* depth */
-   arg_types[9] = int32_type;                          /* mask_input */
+   arg_types[9] = LLVMInt64TypeInContext(gallivm->context);  /* mask_input */
    arg_types[10] = variant->jit_thread_data_ptr_type;  /* per thread data */
    arg_types[11] = LLVMPointerType(int32_type, 0);     /* stride */
    arg_types[12] = int32_type;                         /* depth_stride */
+   arg_types[13] = LLVMPointerType(int32_type, 0);     /* color sample strides */
+   arg_types[14] = int32_type;                         /* depth sample stride */
 
    func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context),
                                 arg_types, ARRAY_SIZE(arg_types), 0);
@@ -2497,6 +2853,9 @@ generate_fragment(struct llvmpipe_context *lp,
       if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
          lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
 
+   if (variant->gallivm->cache->data_size)
+      return;
+
    context_ptr  = LLVMGetParam(function, 0);
    x            = LLVMGetParam(function, 1);
    y            = LLVMGetParam(function, 2);
@@ -2510,6 +2869,8 @@ generate_fragment(struct llvmpipe_context *lp,
    thread_data_ptr  = LLVMGetParam(function, 10);
    stride_ptr   = LLVMGetParam(function, 11);
    depth_stride = LLVMGetParam(function, 12);
+   color_sample_stride_ptr = LLVMGetParam(function, 13);
+   depth_sample_stride = LLVMGetParam(function, 14);
 
    lp_build_name(context_ptr, "context");
    lp_build_name(x, "x");
@@ -2523,6 +2884,8 @@ generate_fragment(struct llvmpipe_context *lp,
    lp_build_name(thread_data_ptr, "thread_data");
    lp_build_name(stride_ptr, "stride_ptr");
    lp_build_name(depth_stride, "depth_stride");
+   lp_build_name(color_sample_stride_ptr, "color_sample_stride_ptr");
+   lp_build_name(depth_sample_stride, "depth_sample_stride");
 
    /*
     * Function body
@@ -2533,8 +2896,28 @@ generate_fragment(struct llvmpipe_context *lp,
    assert(builder);
    LLVMPositionBuilderAtEnd(builder, block);
 
+   /*
+    * Must not count ps invocations if there's a null shader.
+    * (It would be ok to count with null shader if there's d/s tests,
+    * but only if there's d/s buffers too, which is different
+    * to implicit rasterization disable which must not depend
+    * on the d/s buffers.)
+    * Could use popcount on mask, but pixel accuracy is not required.
+    * Could disable if there's no stats query, but maybe not worth it.
+    */
+   if (shader->info.base.num_instructions > 1) {
+      LLVMValueRef invocs, val;
+      invocs = lp_jit_thread_data_invocations(gallivm, thread_data_ptr);
+      val = LLVMBuildLoad(builder, invocs, "");
+      val = LLVMBuildAdd(builder, val,
+                         LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 1, 0),
+                         "invoc_count");
+      LLVMBuildStore(builder, val, invocs);
+   }
+
    /* code generated texture sampling */
-   sampler = lp_llvm_sampler_soa_create(key->state);
+   sampler = lp_llvm_sampler_soa_create(key->samplers, key->nr_samplers);
+   image = lp_llvm_image_soa_create(lp_fs_variant_key_images(key));
 
    num_fs = 16 / fs_type.length; /* number of loops per 4x4 stamp */
    /* for 1d resources only run "upper half" of stamp */
@@ -2544,8 +2927,29 @@ generate_fragment(struct llvmpipe_context *lp,
    {
       LLVMValueRef num_loop = lp_build_const_int32(gallivm, num_fs);
       LLVMTypeRef mask_type = lp_build_int_vec_type(gallivm, fs_type);
+      LLVMValueRef num_loop_samp = lp_build_const_int32(gallivm, num_fs * key->coverage_samples);
       LLVMValueRef mask_store = lp_build_array_alloca(gallivm, mask_type,
-                                                      num_loop, "mask_store");
+                                                      num_loop_samp, "mask_store");
+
+      LLVMTypeRef flt_type = LLVMFloatTypeInContext(gallivm->context);
+      LLVMValueRef glob_sample_pos = LLVMAddGlobal(gallivm->module, LLVMArrayType(flt_type, key->coverage_samples * 2), "");
+      LLVMValueRef sample_pos_array;
+
+      if (key->multisample && key->coverage_samples == 4) {
+         LLVMValueRef sample_pos_arr[8];
+         for (unsigned i = 0; i < 4; i++) {
+            sample_pos_arr[i * 2] = LLVMConstReal(flt_type, lp_sample_pos_4x[i][0]);
+            sample_pos_arr[i * 2 + 1] = LLVMConstReal(flt_type, lp_sample_pos_4x[i][1]);
+         }
+         sample_pos_array = LLVMConstArray(LLVMFloatTypeInContext(gallivm->context), sample_pos_arr, 8);
+      } else {
+         LLVMValueRef sample_pos_arr[2];
+         sample_pos_arr[0] = LLVMConstReal(flt_type, 0.5);
+         sample_pos_arr[1] = LLVMConstReal(flt_type, 0.5);
+         sample_pos_array = LLVMConstArray(LLVMFloatTypeInContext(gallivm->context), sample_pos_arr, 2);
+      }
+      LLVMSetInitializer(glob_sample_pos, sample_pos_array);
+
       LLVMValueRef color_store[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS];
       boolean pixel_center_integer =
          shader->info.base.properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER];
@@ -2560,25 +2964,53 @@ generate_fragment(struct llvmpipe_context *lp,
                                shader->info.base.num_inputs,
                                inputs,
                                pixel_center_integer,
+                               key->coverage_samples, glob_sample_pos,
+                               num_loop,
                                key->depth_clamp,
                                builder, fs_type,
                                a0_ptr, dadx_ptr, dady_ptr,
                                x, y);
 
       for (i = 0; i < num_fs; i++) {
-         LLVMValueRef mask;
-         LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
-         LLVMValueRef mask_ptr = LLVMBuildGEP(builder, mask_store,
-                                              &indexi, 1, "mask_ptr");
-
-         if (partial_mask) {
-            mask = generate_quad_mask(gallivm, fs_type,
-                                      i*fs_type.length/4, mask_input);
-         }
-         else {
-            mask = lp_build_const_int_vec(gallivm, fs_type, ~0);
+         if (key->multisample) {
+            LLVMValueRef smask_val = LLVMBuildLoad(builder, lp_jit_context_sample_mask(gallivm, context_ptr), "");
+
+            /*
+             * For multisampling, extract the per-sample mask from the incoming 64-bit mask,
+             * store to the per sample mask storage. Or all of them together to generate
+             * the fragment shader mask. (sample shading TODO).
+             * Take the incoming state coverage mask into account.
+             */
+            for (unsigned s = 0; s < key->coverage_samples; s++) {
+               LLVMValueRef sindexi = lp_build_const_int32(gallivm, i + (s * num_fs));
+               LLVMValueRef sample_mask_ptr = LLVMBuildGEP(builder, mask_store,
+                                                           &sindexi, 1, "sample_mask_ptr");
+               LLVMValueRef s_mask = generate_quad_mask(gallivm, fs_type,
+                                                        i*fs_type.length/4, s, mask_input);
+
+               LLVMValueRef smask_bit = LLVMBuildAnd(builder, smask_val, lp_build_const_int32(gallivm, (1 << s)), "");
+               LLVMValueRef cmp = LLVMBuildICmp(builder, LLVMIntNE, smask_bit, lp_build_const_int32(gallivm, 0), "");
+               smask_bit = LLVMBuildSExt(builder, cmp, int32_type, "");
+               smask_bit = lp_build_broadcast(gallivm, mask_type, smask_bit);
+
+               s_mask = LLVMBuildAnd(builder, s_mask, smask_bit, "");
+               LLVMBuildStore(builder, s_mask, sample_mask_ptr);
+            }
+         } else {
+            LLVMValueRef mask;
+            LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
+            LLVMValueRef mask_ptr = LLVMBuildGEP(builder, mask_store,
+                                                 &indexi, 1, "mask_ptr");
+
+            if (partial_mask) {
+               mask = generate_quad_mask(gallivm, fs_type,
+                                         i*fs_type.length/4, 0, mask_input);
+            }
+            else {
+               mask = lp_build_const_int_vec(gallivm, fs_type, ~0);
+            }
+            LLVMBuildStore(builder, mask, mask_ptr);
          }
-         LLVMBuildStore(builder, mask, mask_ptr);
       }
 
       generate_fs_loop(gallivm,
@@ -2586,50 +3018,63 @@ generate_fragment(struct llvmpipe_context *lp,
                        builder,
                        fs_type,
                        context_ptr,
+                       glob_sample_pos,
                        num_loop,
                        &interp,
                        sampler,
+                       image,
                        mask_store, /* output */
                        color_store,
                        depth_ptr,
                        depth_stride,
+                       depth_sample_stride,
                        facing,
                        thread_data_ptr);
 
       for (i = 0; i < num_fs; i++) {
-         LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
-         LLVMValueRef ptr = LLVMBuildGEP(builder, mask_store,
-                                         &indexi, 1, "");
-         fs_mask[i] = LLVMBuildLoad(builder, ptr, "mask");
-         /* This is fucked up need to reorganize things */
-         for (cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
-            for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
-               ptr = LLVMBuildGEP(builder,
-                                  color_store[cbuf * !cbuf0_write_all][chan],
-                                  &indexi, 1, "");
-               fs_out_color[cbuf][chan][i] = ptr;
-            }
+         LLVMValueRef ptr;
+         for (unsigned s = 0; s < key->coverage_samples; s++) {
+            int idx = (i + (s * num_fs));
+            LLVMValueRef sindexi = lp_build_const_int32(gallivm, idx);
+            ptr = LLVMBuildGEP(builder, mask_store, &sindexi, 1, "");
+
+            fs_mask[idx] = LLVMBuildLoad(builder, ptr, "smask");
          }
-         if (dual_source_blend) {
-            /* only support one dual source blend target hence always use output 1 */
-            for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
-               ptr = LLVMBuildGEP(builder,
-                                  color_store[1][chan],
-                                  &indexi, 1, "");
-               fs_out_color[1][chan][i] = ptr;
+
+         for (unsigned s = 0; s < key->min_samples; s++) {
+            /* This is fucked up need to reorganize things */
+            int idx = s * num_fs + i;
+            LLVMValueRef sindexi = lp_build_const_int32(gallivm, idx);
+            for (cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
+               for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
+                  ptr = LLVMBuildGEP(builder,
+                                     color_store[cbuf * !cbuf0_write_all][chan],
+                                     &sindexi, 1, "");
+                  fs_out_color[s][cbuf][chan][i] = ptr;
+               }
+            }
+            if (dual_source_blend) {
+               /* only support one dual source blend target hence always use output 1 */
+               for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
+                  ptr = LLVMBuildGEP(builder,
+                                     color_store[1][chan],
+                                     &sindexi, 1, "");
+                  fs_out_color[s][1][chan][i] = ptr;
+               }
             }
          }
       }
    }
 
    sampler->destroy(sampler);
-
+   image->destroy(image);
    /* Loop over color outputs / color buffers to do blending.
     */
    for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
       if (key->cbuf_format[cbuf] != PIPE_FORMAT_NONE) {
          LLVMValueRef color_ptr;
          LLVMValueRef stride;
+         LLVMValueRef sample_stride = NULL;
          LLVMValueRef index = lp_build_const_int32(gallivm, cbuf);
 
          boolean do_branch = ((key->depth.enabled
@@ -2642,17 +3087,34 @@ generate_fragment(struct llvmpipe_context *lp,
                                                 &index, 1, ""),
                                    "");
 
-         lp_build_name(color_ptr, "color_ptr%d", cbuf);
-
          stride = LLVMBuildLoad(builder,
                                 LLVMBuildGEP(builder, stride_ptr, &index, 1, ""),
                                 "");
 
-         generate_unswizzled_blend(gallivm, cbuf, variant,
-                                   key->cbuf_format[cbuf],
-                                   num_fs, fs_type, fs_mask, fs_out_color,
-                                   context_ptr, color_ptr, stride,
-                                   partial_mask, do_branch);
+         if (key->multisample)
+            sample_stride = LLVMBuildLoad(builder,
+                                          LLVMBuildGEP(builder, color_sample_stride_ptr,
+                                                       &index, 1, ""), "");
+
+         for (unsigned s = 0; s < key->cbuf_nr_samples[cbuf]; s++) {
+            unsigned mask_idx = num_fs * (key->multisample ? s : 0);
+            unsigned out_idx = key->min_samples == 1 ? 0 : s;
+            LLVMValueRef out_ptr = color_ptr;;
+
+            if (key->multisample) {
+               LLVMValueRef sample_offset = LLVMBuildMul(builder, sample_stride, lp_build_const_int32(gallivm, s), "");
+               out_ptr = LLVMBuildGEP(builder, out_ptr, &sample_offset, 1, "");
+            }
+            out_ptr = LLVMBuildBitCast(builder, out_ptr, LLVMPointerType(blend_vec_type, 0), "");
+
+            lp_build_name(out_ptr, "color_ptr%d", cbuf);
+
+            generate_unswizzled_blend(gallivm, cbuf, variant,
+                                      key->cbuf_format[cbuf],
+                                      num_fs, fs_type, &fs_mask[mask_idx], fs_out_color[out_idx],
+                                      context_ptr, out_ptr, stride,
+                                      partial_mask, do_branch);
+         }
       }
    }
 
@@ -2663,7 +3125,7 @@ generate_fragment(struct llvmpipe_context *lp,
 
 
 static void
-dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key)
+dump_fs_variant_key(struct lp_fragment_shader_variant_key *key)
 {
    unsigned i;
 
@@ -2672,11 +3134,18 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key)
    if (key->flatshade) {
       debug_printf("flatshade = 1\n");
    }
+   if (key->multisample) {
+      debug_printf("multisample = 1\n");
+      debug_printf("coverage samples = %d\n", key->coverage_samples);
+      debug_printf("min samples = %d\n", key->min_samples);
+   }
    for (i = 0; i < key->nr_cbufs; ++i) {
       debug_printf("cbuf_format[%u] = %s\n", i, util_format_name(key->cbuf_format[i]));
+      debug_printf("cbuf nr_samples[%u] = %d\n", i, key->cbuf_nr_samples[i]);
    }
    if (key->depth.enabled || key->stencil[0].enabled) {
       debug_printf("depth.format = %s\n", util_format_name(key->zsbuf_format));
+      debug_printf("depth nr_samples = %d\n", key->zsbuf_nr_samples);
    }
    if (key->depth.enabled) {
       debug_printf("depth.func = %s\n", util_str_func(key->depth.func, TRUE));
@@ -2718,7 +3187,7 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key)
       debug_printf("blend.alpha_to_coverage is enabled\n");
    }
    for (i = 0; i < key->nr_samplers; ++i) {
-      const struct lp_static_sampler_state *sampler = &key->state[i].sampler_state;
+      const struct lp_static_sampler_state *sampler = &key->samplers[i].sampler_state;
       debug_printf("sampler[%u] = \n", i);
       debug_printf("  .wrap = %s %s %s\n",
                    util_str_tex_wrap(sampler->wrap_s, TRUE),
@@ -2739,7 +3208,7 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key)
       debug_printf("  .apply_max_lod = %u\n", sampler->apply_max_lod);
    }
    for (i = 0; i < key->nr_sampler_views; ++i) {
-      const struct lp_static_texture_state *texture = &key->state[i].texture_state;
+      const struct lp_static_texture_state *texture = &key->samplers[i].texture_state;
       debug_printf("texture[%u] = \n", i);
       debug_printf("  .format = %s\n",
                    util_format_name(texture->format));
@@ -2752,20 +3221,59 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key)
                    texture->pot_height,
                    texture->pot_depth);
    }
+   struct lp_image_static_state *images = lp_fs_variant_key_images(key);
+   for (i = 0; i < key->nr_images; ++i) {
+      const struct lp_static_texture_state *image = &images[i].image_state;
+      debug_printf("image[%u] = \n", i);
+      debug_printf("  .format = %s\n",
+                   util_format_name(image->format));
+      debug_printf("  .target = %s\n",
+                   util_str_tex_target(image->target, TRUE));
+      debug_printf("  .level_zero_only = %u\n",
+                   image->level_zero_only);
+      debug_printf("  .pot = %u %u %u\n",
+                   image->pot_width,
+                   image->pot_height,
+                   image->pot_depth);
+   }
 }
 
 
 void
-lp_debug_fs_variant(const struct lp_fragment_shader_variant *variant)
+lp_debug_fs_variant(struct lp_fragment_shader_variant *variant)
 {
-   debug_printf("llvmpipe: Fragment shader #%u variant #%u:\n", 
+   debug_printf("llvmpipe: Fragment shader #%u variant #%u:\n",
                 variant->shader->no, variant->no);
-   tgsi_dump(variant->shader->base.tokens, 0);
+   if (variant->shader->base.type == PIPE_SHADER_IR_TGSI)
+      tgsi_dump(variant->shader->base.tokens, 0);
+   else
+      nir_print_shader(variant->shader->base.ir.nir, stderr);
    dump_fs_variant_key(&variant->key);
    debug_printf("variant->opaque = %u\n", variant->opaque);
    debug_printf("\n");
 }
 
+static void
+lp_fs_get_ir_cache_key(struct lp_fragment_shader_variant *variant,
+                            unsigned char ir_sha1_cache_key[20])
+{
+   struct blob blob = { 0 };
+   unsigned ir_size;
+   void *ir_binary;
+
+   blob_init(&blob);
+   nir_serialize(&blob, variant->shader->base.ir.nir, true);
+   ir_binary = blob.data;
+   ir_size = blob.size;
+
+   struct mesa_sha1 ctx;
+   _mesa_sha1_init(&ctx);
+   _mesa_sha1_update(&ctx, &variant->key, variant->shader->variant_key_size);
+   _mesa_sha1_update(&ctx, ir_binary, ir_size);
+   _mesa_sha1_final(&ctx, ir_sha1_cache_key);
+
+   blob_finish(&blob);
+}
 
 /**
  * Generate a new fragment shader variant from the shader code and
@@ -2776,30 +3284,43 @@ generate_variant(struct llvmpipe_context *lp,
                  struct lp_fragment_shader *shader,
                  const struct lp_fragment_shader_variant_key *key)
 {
+   struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
    struct lp_fragment_shader_variant *variant;
-   const struct util_format_description *cbuf0_format_desc;
+   const struct util_format_description *cbuf0_format_desc = NULL;
    boolean fullcolormask;
    char module_name[64];
-
-   variant = CALLOC_STRUCT(lp_fragment_shader_variant);
+   unsigned char ir_sha1_cache_key[20];
+   struct lp_cached_code cached = { 0 };
+   bool needs_caching = false;
+   variant = MALLOC(sizeof *variant + shader->variant_key_size - sizeof variant->key);
    if (!variant)
       return NULL;
 
-   util_snprintf(module_name, sizeof(module_name), "fs%u_variant%u",
-                 shader->no, shader->variants_created);
+   memset(variant, 0, sizeof(*variant));
+   snprintf(module_name, sizeof(module_name), "fs%u_variant%u",
+            shader->no, shader->variants_created);
+
+   variant->shader = shader;
+   memcpy(&variant->key, key, shader->variant_key_size);
 
-   variant->gallivm = gallivm_create(module_name, lp->context);
+   if (shader->base.ir.nir) {
+      lp_fs_get_ir_cache_key(variant, ir_sha1_cache_key);
+
+      lp_disk_cache_find_shader(screen, &cached, ir_sha1_cache_key);
+      if (!cached.data_size)
+         needs_caching = true;
+   }
+   variant->gallivm = gallivm_create(module_name, lp->context, &cached);
    if (!variant->gallivm) {
       FREE(variant);
       return NULL;
    }
 
-   variant->shader = shader;
    variant->list_item_global.base = variant;
    variant->list_item_local.base = variant;
    variant->no = shader->variants_created++;
 
-   memcpy(&variant->key, key, shader->variant_key_size);
+
 
    /*
     * Determine whether we are touching all channels in the color buffer.
@@ -2816,18 +3337,13 @@ generate_variant(struct llvmpipe_context *lp,
          fullcolormask &&
          !key->stencil[0].enabled &&
          !key->alpha.enabled &&
+         !key->multisample &&
          !key->blend.alpha_to_coverage &&
          !key->depth.enabled &&
-         !shader->info.base.uses_kill
+         !shader->info.base.uses_kill &&
+         !shader->info.base.writes_samplemask
       ? TRUE : FALSE;
 
-   if ((shader->info.base.num_tokens <= 1) &&
-       !key->depth.enabled && !key->stencil[0].enabled) {
-      variant->ps_inv_multiplier = 0;
-   } else {
-      variant->ps_inv_multiplier = 1;
-   }
-
    if ((LP_DEBUG & DEBUG_FS) || (gallivm_debug & GALLIVM_DEBUG_IR)) {
       lp_debug_fs_variant(variant);
    }
@@ -2866,6 +3382,10 @@ generate_variant(struct llvmpipe_context *lp,
       variant->jit_function[RAST_WHOLE] = variant->jit_function[RAST_EDGE_TEST];
    }
 
+   if (needs_caching) {
+      lp_disk_cache_insert_shader(screen, &cached, ir_sha1_cache_key);
+   }
+
    gallivm_free_ir(variant->gallivm);
 
    return variant;
@@ -2880,6 +3400,7 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
    struct lp_fragment_shader *shader;
    int nr_samplers;
    int nr_sampler_views;
+   int nr_images;
    int i;
 
    shader = CALLOC_STRUCT(lp_fragment_shader);
@@ -2889,11 +3410,17 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
    shader->no = fs_no++;
    make_empty_list(&shader->variants);
 
-   /* get/save the summary info for this shader */
-   lp_build_tgsi_info(templ->tokens, &shader->info);
+   shader->base.type = templ->type;
+   if (templ->type == PIPE_SHADER_IR_TGSI) {
+      /* get/save the summary info for this shader */
+      lp_build_tgsi_info(templ->tokens, &shader->info);
 
-   /* we need to keep a local copy of the tokens */
-   shader->base.tokens = tgsi_dup_tokens(templ->tokens);
+      /* we need to keep a local copy of the tokens */
+      shader->base.tokens = tgsi_dup_tokens(templ->tokens);
+   } else {
+      shader->base.ir.nir = templ->ir.nir;
+      nir_tgsi_scan_shader(templ->ir.nir, &shader->info.base, true);
+   }
 
    shader->draw_data = draw_create_fragment_shader(llvmpipe->draw, templ);
    if (shader->draw_data == NULL) {
@@ -2904,13 +3431,13 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
 
    nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1;
    nr_sampler_views = shader->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
-
-   shader->variant_key_size = Offset(struct lp_fragment_shader_variant_key,
-                                     state[MAX2(nr_samplers, nr_sampler_views)]);
+   nr_images = shader->info.base.file_max[TGSI_FILE_IMAGE] + 1;
+   shader->variant_key_size = lp_fs_variant_key_size(MAX2(nr_samplers, nr_sampler_views), nr_images);
 
    for (i = 0; i < shader->info.base.num_inputs; i++) {
       shader->inputs[i].usage_mask = shader->info.base.input_usage_mask[i];
       shader->inputs[i].cyl_wrap = shader->info.base.input_cylindrical_wrap[i];
+      shader->inputs[i].location = shader->info.base.input_interpolate_loc[i];
 
       switch (shader->info.base.input_interpolate[i]) {
       case TGSI_INTERPOLATE_CONSTANT:
@@ -2972,14 +3499,14 @@ static void
 llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs)
 {
    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
-
-   if (llvmpipe->fs == fs)
+   struct lp_fragment_shader *lp_fs = (struct lp_fragment_shader *)fs;
+   if (llvmpipe->fs == lp_fs)
       return;
 
-   llvmpipe->fs = (struct lp_fragment_shader *) fs;
-
    draw_bind_fragment_shader(llvmpipe->draw,
-                             (llvmpipe->fs ? llvmpipe->fs->draw_data : NULL));
+                             (lp_fs ? lp_fs->draw_data : NULL));
+
+   llvmpipe->fs = lp_fs;
 
    llvmpipe->dirty |= LP_NEW_FS;
 }
@@ -2989,7 +3516,7 @@ llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs)
  * Remove shader variant from two lists: the shader's variant list
  * and the context's variant list.
  */
-void
+static void
 llvmpipe_remove_shader_variant(struct llvmpipe_context *lp,
                                struct lp_fragment_shader_variant *variant)
 {
@@ -3044,6 +3571,8 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs)
    /* Delete draw module's data */
    draw_delete_fragment_shader(llvmpipe->draw, shader->draw_data);
 
+   if (shader->base.ir.nir)
+      ralloc_free(shader->base.ir.nir);
    assert(shader->variants_cached == 0);
    FREE((void *) shader->base.tokens);
    FREE(shader);
@@ -3073,7 +3602,9 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe,
    }
 
    if (shader == PIPE_SHADER_VERTEX ||
-       shader == PIPE_SHADER_GEOMETRY) {
+       shader == PIPE_SHADER_GEOMETRY ||
+       shader == PIPE_SHADER_TESS_CTRL ||
+       shader == PIPE_SHADER_TESS_EVAL) {
       /* Pass the constants to the 'draw' module */
       const unsigned size = cb ? cb->buffer_size : 0;
       const ubyte *data;
@@ -3094,15 +3625,78 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe,
       draw_set_mapped_constant_buffer(llvmpipe->draw, shader,
                                       index, data, size);
    }
-   else {
+   else if (shader == PIPE_SHADER_COMPUTE)
+      llvmpipe->cs_dirty |= LP_CSNEW_CONSTANTS;
+   else
       llvmpipe->dirty |= LP_NEW_FS_CONSTANTS;
-   }
 
    if (cb && cb->user_buffer) {
       pipe_resource_reference(&constants, NULL);
    }
 }
 
+static void
+llvmpipe_set_shader_buffers(struct pipe_context *pipe,
+                            enum pipe_shader_type shader, unsigned start_slot,
+                            unsigned count, const struct pipe_shader_buffer *buffers,
+                            unsigned writable_bitmask)
+{
+   struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+   unsigned i, idx;
+   for (i = start_slot, idx = 0; i < start_slot + count; i++, idx++) {
+      const struct pipe_shader_buffer *buffer = buffers ? &buffers[idx] : NULL;
+
+      util_copy_shader_buffer(&llvmpipe->ssbos[shader][i], buffer);
+
+      if (shader == PIPE_SHADER_VERTEX ||
+          shader == PIPE_SHADER_GEOMETRY ||
+          shader == PIPE_SHADER_TESS_CTRL ||
+          shader == PIPE_SHADER_TESS_EVAL) {
+         const unsigned size = buffer ? buffer->buffer_size : 0;
+         const ubyte *data = NULL;
+         if (buffer && buffer->buffer)
+            data = (ubyte *) llvmpipe_resource_data(buffer->buffer);
+         if (data)
+            data += buffer->buffer_offset;
+         draw_set_mapped_shader_buffer(llvmpipe->draw, shader,
+                                       i, data, size);
+      } else if (shader == PIPE_SHADER_COMPUTE) {
+        llvmpipe->cs_dirty |= LP_CSNEW_SSBOS;
+      } else if (shader == PIPE_SHADER_FRAGMENT) {
+         llvmpipe->dirty |= LP_NEW_FS_SSBOS;
+      }
+   }
+}
+
+static void
+llvmpipe_set_shader_images(struct pipe_context *pipe,
+                            enum pipe_shader_type shader, unsigned start_slot,
+                           unsigned count, const struct pipe_image_view *images)
+{
+   struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+   unsigned i, idx;
+
+   draw_flush(llvmpipe->draw);
+   for (i = start_slot, idx = 0; i < start_slot + count; i++, idx++) {
+      const struct pipe_image_view *image = images ? &images[idx] : NULL;
+
+      util_copy_image_view(&llvmpipe->images[shader][i], image);
+   }
+
+   llvmpipe->num_images[shader] = start_slot + count;
+   if (shader == PIPE_SHADER_VERTEX ||
+       shader == PIPE_SHADER_GEOMETRY ||
+       shader == PIPE_SHADER_TESS_CTRL ||
+       shader == PIPE_SHADER_TESS_EVAL) {
+      draw_set_images(llvmpipe->draw,
+                      shader,
+                      llvmpipe->images[shader],
+                      start_slot + count);
+   } else if (shader == PIPE_SHADER_COMPUTE)
+      llvmpipe->cs_dirty |= LP_CSNEW_IMAGES;
+   else
+      llvmpipe->dirty |= LP_NEW_FS_IMAGES;
+}
 
 /**
  * Return the blend factor equivalent to a destination alpha of one.
@@ -3133,14 +3727,17 @@ force_dst_alpha_one(unsigned factor, boolean clamped_zero)
  * TODO: there is actually no reason to tie this to context state -- the
  * generated code could be cached globally in the screen.
  */
-static void
+static struct lp_fragment_shader_variant_key *
 make_variant_key(struct llvmpipe_context *lp,
                  struct lp_fragment_shader *shader,
-                 struct lp_fragment_shader_variant_key *key)
+                 char *store)
 {
    unsigned i;
+   struct lp_fragment_shader_variant_key *key;
+
+   key = (struct lp_fragment_shader_variant_key *)store;
 
-   memset(key, 0, shader->variant_key_size);
+   memset(key, 0, offsetof(struct lp_fragment_shader_variant_key, samplers[1]));
 
    if (lp->framebuffer.zsbuf) {
       enum pipe_format zsbuf_format = lp->framebuffer.zsbuf->format;
@@ -3160,6 +3757,7 @@ make_variant_key(struct llvmpipe_context *lp,
       if (llvmpipe_resource_is_1d(lp->framebuffer.zsbuf->texture)) {
          key->resource_1d = TRUE;
       }
+      key->zsbuf_nr_samples = util_res_sample_count(lp->framebuffer.zsbuf->texture);
    }
 
    /*
@@ -3178,7 +3776,7 @@ make_variant_key(struct llvmpipe_context *lp,
    if (lp->rasterizer->clip_halfz) {
       key->depth_clamp = 1;
    } else {
-      key->depth_clamp = (lp->rasterizer->depth_clip == 0) ? 1 : 0;
+      key->depth_clamp = (lp->rasterizer->depth_clip_near == 0) ? 1 : 0;
    }
 
    /* alpha test only applies if render buffer 0 is non-integer (or does not exist) */
@@ -3192,7 +3790,8 @@ make_variant_key(struct llvmpipe_context *lp,
    /* alpha.ref_value is passed in jit_context */
 
    key->flatshade = lp->rasterizer->flatshade;
-   if (lp->active_occlusion_queries) {
+   key->multisample = lp->rasterizer->multisample;
+   if (lp->active_occlusion_queries && !lp->queries_disabled) {
       key->occlusion_count = TRUE;
    }
 
@@ -3200,6 +3799,12 @@ make_variant_key(struct llvmpipe_context *lp,
       memcpy(&key->blend, lp->blend, sizeof key->blend);
    }
 
+   key->coverage_samples = 1;
+   key->min_samples = 1;
+   if (key->multisample) {
+      key->coverage_samples = util_framebuffer_get_num_samples(&lp->framebuffer);
+      key->min_samples = lp->min_samples == 1 ? 1 : key->coverage_samples;
+   }
    key->nr_cbufs = lp->framebuffer.nr_cbufs;
 
    if (!key->blend.independent_blend_enable) {
@@ -3218,6 +3823,7 @@ make_variant_key(struct llvmpipe_context *lp,
          const struct util_format_description *format_desc;
 
          key->cbuf_format[i] = format;
+         key->cbuf_nr_samples[i] = util_res_sample_count(lp->framebuffer.cbufs[i]->texture);
 
          /*
           * Figure out if this is a 1d resource. Note that OpenGL allows crazy
@@ -3277,6 +3883,7 @@ make_variant_key(struct llvmpipe_context *lp,
       else {
          /* no color buffer for this fragment output */
          key->cbuf_format[i] = PIPE_FORMAT_NONE;
+         key->cbuf_nr_samples[i] = 0;
          blend_rt->colormask = 0x0;
          blend_rt->blend_enable = 0;
       }
@@ -3286,9 +3893,15 @@ make_variant_key(struct llvmpipe_context *lp,
     */
    key->nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1;
 
+   struct lp_sampler_static_state *fs_sampler;
+
+   fs_sampler = key->samplers;
+
+   memset(fs_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *fs_sampler);
+
    for(i = 0; i < key->nr_samplers; ++i) {
       if(shader->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
-         lp_sampler_static_sampler_state(&key->state[i].sampler_state,
+         lp_sampler_static_sampler_state(&fs_sampler[i].sampler_state,
                                          lp->samplers[PIPE_SHADER_FRAGMENT][i]);
       }
    }
@@ -3301,8 +3914,13 @@ make_variant_key(struct llvmpipe_context *lp,
    if (shader->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
       key->nr_sampler_views = shader->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
       for(i = 0; i < key->nr_sampler_views; ++i) {
-         if(shader->info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1 << i)) {
-            lp_sampler_static_texture_state(&key->state[i].texture_state,
+         /*
+          * Note sview may exceed what's representable by file_mask.
+          * This will still work, the only downside is that not actually
+          * used views may be included in the shader key.
+          */
+         if(shader->info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1u << (i & 31))) {
+            lp_sampler_static_texture_state(&fs_sampler[i].texture_state,
                                             lp->sampler_views[PIPE_SHADER_FRAGMENT][i]);
          }
       }
@@ -3311,11 +3929,22 @@ make_variant_key(struct llvmpipe_context *lp,
       key->nr_sampler_views = key->nr_samplers;
       for(i = 0; i < key->nr_sampler_views; ++i) {
          if(shader->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
-            lp_sampler_static_texture_state(&key->state[i].texture_state,
+            lp_sampler_static_texture_state(&fs_sampler[i].texture_state,
                                             lp->sampler_views[PIPE_SHADER_FRAGMENT][i]);
          }
       }
    }
+
+   struct lp_image_static_state *lp_image;
+   lp_image = lp_fs_variant_key_images(key);
+   key->nr_images = shader->info.base.file_max[TGSI_FILE_IMAGE] + 1;
+   for (i = 0; i < key->nr_images; ++i) {
+      if (shader->info.base.file_mask[TGSI_FILE_IMAGE] & (1 << i)) {
+         lp_sampler_static_texture_state_image(&lp_image[i].image_state,
+                                               &lp->images[PIPE_SHADER_FRAGMENT][i]);
+      }
+   }
+   return key;
 }
 
 
@@ -3328,16 +3957,17 @@ void
 llvmpipe_update_fs(struct llvmpipe_context *lp)
 {
    struct lp_fragment_shader *shader = lp->fs;
-   struct lp_fragment_shader_variant_key key;
+   struct lp_fragment_shader_variant_key *key;
    struct lp_fragment_shader_variant *variant = NULL;
    struct lp_fs_variant_list_item *li;
+   char store[LP_FS_MAX_VARIANT_KEY_SIZE];
 
-   make_variant_key(lp, shader, &key);
+   key = make_variant_key(lp, shader, store);
 
    /* Search the variants for one which matches the key */
    li = first_elem(&shader->variants);
    while(!at_end(&shader->variants, li)) {
-      if(memcmp(&li->base->key, &key, shader->variant_key_size) == 0) {
+      if(memcmp(&li->base->key, key, shader->variant_key_size) == 0) {
          variant = li->base;
          break;
       }
@@ -3409,7 +4039,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp)
        * Generate the new variant.
        */
       t0 = os_time_get();
-      variant = generate_variant(lp, shader, &key);
+      variant = generate_variant(lp, shader, key);
       t1 = os_time_get();
       dt = t1 - t0;
       LP_COUNT_ADD(llvm_compile_time, dt);
@@ -3441,19 +4071,9 @@ llvmpipe_init_fs_funcs(struct llvmpipe_context *llvmpipe)
    llvmpipe->pipe.delete_fs_state = llvmpipe_delete_fs_state;
 
    llvmpipe->pipe.set_constant_buffer = llvmpipe_set_constant_buffer;
+
+   llvmpipe->pipe.set_shader_buffers = llvmpipe_set_shader_buffers;
+   llvmpipe->pipe.set_shader_images = llvmpipe_set_shader_images;
 }
 
-/*
- * Rasterization is disabled if there is no pixel shader and
- * both depth and stencil testing are disabled:
- * http://msdn.microsoft.com/en-us/library/windows/desktop/bb205125
- */
-boolean
-llvmpipe_rasterization_disabled(struct llvmpipe_context *lp)
-{
-   boolean null_fs = !lp->fs || lp->fs->info.base.num_tokens <= 1;
 
-   return (null_fs &&
-           !lp->depth_stencil->depth.enabled &&
-           !lp->depth_stencil->stencil[0].enabled);
-}