llvmpipe: Support Z16_UNORM as depth-stencil format.

[mesa.git] / src / gallium / drivers / llvmpipe / lp_state_fs.c
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c

index 83b902de959f831c462a324201ee63565fb10856..00f3b6990e3fd0c67299200935c560beac47d117 100644 (file)
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -335,7 +335,7 @@ generate_fs(struct gallivm_state *gallivm,
                                    !simple_shader);
  
        if (depth_mode & EARLY_DEPTH_WRITE) {
-         lp_build_depth_write(builder, zs_format_desc, depth_ptr, zs_value);
+         lp_build_depth_write(gallivm, type, zs_format_desc, depth_ptr, zs_value);
        }
     }
  
@@ -392,7 +392,7 @@ generate_fs(struct gallivm_state *gallivm,
                                    !simple_shader);
        /* Late Z write */
        if (depth_mode & LATE_DEPTH_WRITE) {
-         lp_build_depth_write(builder, zs_format_desc, depth_ptr, zs_value);
+         lp_build_depth_write(gallivm, type, zs_format_desc, depth_ptr, zs_value);
        }
     }
     else if ((depth_mode & EARLY_DEPTH_TEST) &&
@@ -574,7 +574,7 @@ generate_fs_loop(struct gallivm_state *gallivm,
                                    !simple_shader);
  
        if (depth_mode & EARLY_DEPTH_WRITE) {
-         lp_build_depth_write(builder, zs_format_desc, depth_ptr_i, zs_value);
+         lp_build_depth_write(gallivm, type, zs_format_desc, depth_ptr_i, zs_value);
        }
     }
  
@@ -631,7 +631,7 @@ generate_fs_loop(struct gallivm_state *gallivm,
                                    !simple_shader);
        /* Late Z write */
        if (depth_mode & LATE_DEPTH_WRITE) {
-         lp_build_depth_write(builder, zs_format_desc, depth_ptr_i, zs_value);
+         lp_build_depth_write(gallivm, type, zs_format_desc, depth_ptr_i, zs_value);
        }
     }
     else if ((depth_mode & EARLY_DEPTH_TEST) &&
@@ -1143,7 +1143,10 @@ convert_to_blend_type(struct gallivm_state *gallivm,
                                   "");
  
           /* Scale bits */
-         chans[j] = scale_bits(gallivm, src_fmt->channel[j].size, blend_type.width, chans[j], src_type);
+         if (src_type.norm) {
+            chans[j] = scale_bits(gallivm, src_fmt->channel[j].size,
+                                  blend_type.width, chans[j], src_type);
+         }
  
           /* Insert bits into correct position */
           chans[j] = LLVMBuildShl(builder,
@@ -1250,7 +1253,10 @@ convert_from_blend_type(struct gallivm_state *gallivm,
                                   "");
  
           /* Scale down bits */
-         chans[j] = scale_bits(gallivm, blend_type.width, src_fmt->channel[j].size, chans[j], src_type);
+         if (src_type.norm) {
+            chans[j] = scale_bits(gallivm, blend_type.width,
+                                  src_fmt->channel[j].size, chans[j], src_type);
+         }
  
           /* Insert bits */
           chans[j] = LLVMBuildShl(builder,
@@ -1438,6 +1444,25 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
        }
     }
  
+   if (util_format_is_pure_integer(out_format)) {
+      /*
+       * In this case fs_type was really ints or uints disguised as floats,
+       * fix that up now.
+       */
+      fs_type.floating = 0;
+      fs_type.sign = dst_type.sign;
+      for (i = 0; i < num_fs; ++i) {
+         for (j = 0; j < dst_channels; ++j) {
+            fs_src[i][j] = LLVMBuildBitCast(builder, fs_src[i][j],
+                                            lp_build_vec_type(gallivm, fs_type), "");
+         }
+         if (dst_channels == 3 && !has_alpha) {
+            fs_src[i][3] = LLVMBuildBitCast(builder, fs_src[i][3],
+                                            lp_build_vec_type(gallivm, fs_type), "");
+         }
+      }
+   }
+
  
     /*
      * Pixel twiddle from fragment shader order to memory order
@@ -1879,7 +1904,7 @@ generate_fragment(struct llvmpipe_context *lp,
     LLVMPositionBuilderAtEnd(builder, block);
  
     /* code generated texture sampling */
-   sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr);
+   sampler = lp_llvm_sampler_soa_create(key->state, context_ptr);
  
     zs_format_desc = util_format_description(key->zsbuf_format);
  
@@ -2088,32 +2113,39 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key)
     }
     debug_printf("blend.colormask = 0x%x\n", key->blend.rt[0].colormask);
     for (i = 0; i < key->nr_samplers; ++i) {
+      const struct lp_static_sampler_state *sampler = &key->state[i].sampler_state;
        debug_printf("sampler[%u] = \n", i);
-      debug_printf("  .format = %s\n",
-                   util_format_name(key->sampler[i].format));
-      debug_printf("  .target = %s\n",
-                   util_dump_tex_target(key->sampler[i].target, TRUE));
-      debug_printf("  .pot = %u %u %u\n",
-                   key->sampler[i].pot_width,
-                   key->sampler[i].pot_height,
-                   key->sampler[i].pot_depth);
        debug_printf("  .wrap = %s %s %s\n",
-                   util_dump_tex_wrap(key->sampler[i].wrap_s, TRUE),
-                   util_dump_tex_wrap(key->sampler[i].wrap_t, TRUE),
-                   util_dump_tex_wrap(key->sampler[i].wrap_r, TRUE));
+                   util_dump_tex_wrap(sampler->wrap_s, TRUE),
+                   util_dump_tex_wrap(sampler->wrap_t, TRUE),
+                   util_dump_tex_wrap(sampler->wrap_r, TRUE));
        debug_printf("  .min_img_filter = %s\n",
-                   util_dump_tex_filter(key->sampler[i].min_img_filter, TRUE));
+                   util_dump_tex_filter(sampler->min_img_filter, TRUE));
        debug_printf("  .min_mip_filter = %s\n",
-                   util_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE));
+                   util_dump_tex_mipfilter(sampler->min_mip_filter, TRUE));
        debug_printf("  .mag_img_filter = %s\n",
-                   util_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE));
-      if (key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE)
-         debug_printf("  .compare_func = %s\n", util_dump_func(key->sampler[i].compare_func, TRUE));
-      debug_printf("  .normalized_coords = %u\n", key->sampler[i].normalized_coords);
-      debug_printf("  .min_max_lod_equal = %u\n", key->sampler[i].min_max_lod_equal);
-      debug_printf("  .lod_bias_non_zero = %u\n", key->sampler[i].lod_bias_non_zero);
-      debug_printf("  .apply_min_lod = %u\n", key->sampler[i].apply_min_lod);
-      debug_printf("  .apply_max_lod = %u\n", key->sampler[i].apply_max_lod);
+                   util_dump_tex_filter(sampler->mag_img_filter, TRUE));
+      if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE)
+         debug_printf("  .compare_func = %s\n", util_dump_func(sampler->compare_func, TRUE));
+      debug_printf("  .normalized_coords = %u\n", sampler->normalized_coords);
+      debug_printf("  .min_max_lod_equal = %u\n", sampler->min_max_lod_equal);
+      debug_printf("  .lod_bias_non_zero = %u\n", sampler->lod_bias_non_zero);
+      debug_printf("  .apply_min_lod = %u\n", sampler->apply_min_lod);
+      debug_printf("  .apply_max_lod = %u\n", sampler->apply_max_lod);
+   }
+   for (i = 0; i < key->nr_sampler_views; ++i) {
+      const struct lp_static_texture_state *texture = &key->state[i].texture_state;
+      debug_printf("texture[%u] = \n", i);
+      debug_printf("  .format = %s\n",
+                   util_format_name(texture->format));
+      debug_printf("  .target = %s\n",
+                   util_dump_tex_target(texture->target, TRUE));
+      debug_printf("  .level_zero_only = %u\n",
+                   texture->level_zero_only);
+      debug_printf("  .pot = %u %u %u\n",
+                   texture->pot_width,
+                   texture->pot_height,
+                   texture->pot_depth);
     }
  }
  
@@ -2226,6 +2258,7 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
     struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
     struct lp_fragment_shader *shader;
     int nr_samplers;
+   int nr_sampler_views;
     int i;
  
     shader = CALLOC_STRUCT(lp_fragment_shader);
@@ -2249,9 +2282,10 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
     }
  
     nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1;
+   nr_sampler_views = shader->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
  
     shader->variant_key_size = Offset(struct lp_fragment_shader_variant_key,
-                                    sampler[nr_samplers]);
+                                     state[MAX2(nr_samplers, nr_sampler_views)]);
  
     for (i = 0; i < shader->info.base.num_inputs; i++) {
        shader->inputs[i].usage_mask = shader->info.base.input_usage_mask[i];
@@ -2498,7 +2532,11 @@ make_variant_key(struct llvmpipe_context *lp,
        }
     }
  
-   key->alpha.enabled = lp->depth_stencil->alpha.enabled;
+   /* alpha test only applies if render buffer 0 is non-integer (or does not exist) */
+   if (!lp->framebuffer.nr_cbufs ||
+       !util_format_is_pure_integer(lp->framebuffer.cbufs[0]->format)) {
+      key->alpha.enabled = lp->depth_stencil->alpha.enabled;
+   }
     if(key->alpha.enabled)
        key->alpha.func = lp->depth_stencil->alpha.func;
     /* alpha.ref_value is passed in jit_context */
@@ -2538,6 +2576,13 @@ make_variant_key(struct llvmpipe_context *lp,
         */
        blend_rt->colormask &= util_format_colormask(format_desc);
  
+      /*
+       * Disable blend for integer formats.
+       */
+      if (util_format_is_pure_integer(format)) {
+         blend_rt->blend_enable = 0;
+      }
+
        /*
         * Our swizzled render tiles always have an alpha channel, but the linear
         * render target format often does not, so force here the dst alpha to be
@@ -2569,9 +2614,32 @@ make_variant_key(struct llvmpipe_context *lp,
  
     for(i = 0; i < key->nr_samplers; ++i) {
        if(shader->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
-         lp_sampler_static_state(&key->sampler[i],
-                                lp->sampler_views[PIPE_SHADER_FRAGMENT][i],
-                                lp->samplers[PIPE_SHADER_FRAGMENT][i]);
+         lp_sampler_static_sampler_state(&key->state[i].sampler_state,
+                                         lp->samplers[PIPE_SHADER_FRAGMENT][i]);
+      }
+   }
+
+   /*
+    * XXX If TGSI_FILE_SAMPLER_VIEW exists assume all texture opcodes
+    * are dx10-style? Can't really have mixed opcodes, at least not
+    * if we want to skip the holes here (without rescanning tgsi).
+    */
+   if (shader->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
+      key->nr_sampler_views = shader->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
+      for(i = 0; i < key->nr_sampler_views; ++i) {
+         if(shader->info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1 << i)) {
+            lp_sampler_static_texture_state(&key->state[i].texture_state,
+                                            lp->sampler_views[PIPE_SHADER_FRAGMENT][i]);
+         }
+      }
+   }
+   else {
+      key->nr_sampler_views = key->nr_samplers;
+      for(i = 0; i < key->nr_sampler_views; ++i) {
+         if(shader->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
+            lp_sampler_static_texture_state(&key->state[i].texture_state,
+                                            lp->sampler_views[PIPE_SHADER_FRAGMENT][i]);
+         }
        }
     }
  }