llvmpipe: implement scissor testing
[mesa.git] / src / gallium / drivers / llvmpipe / lp_state_fs.c
index 6816db4387ff2526f3d4520fb6f64573e7af1800..d12d3f6091aeb10ab56d6744daba541128766bc1 100644 (file)
@@ -204,21 +204,15 @@ generate_tri_edge_mask(LLVMBuilderRef builder,
                        LLVMValueRef step1_ptr,  /* ivec4 */
                        LLVMValueRef step2_ptr)  /* ivec4 */
 {
-   /*
-     c0_vec = splat(c0)
-     c1_vec = splat(c1)
-     c2_vec = splat(c2)
-     m0_vec = step0_ptr[i] > c0_vec
-     m1_vec = step1_ptr[i] > c1_vec
-     m2_vec = step2_ptr[i] > c2_vec
-     mask = m0_vec & m1_vec & m2_vec
-    */
-   struct lp_build_flow_context *flow;
+#define OPTIMIZE_IN_OUT_TEST 0
+#if OPTIMIZE_IN_OUT_TEST
    struct lp_build_if_state ifctx;
+   LLVMValueRef not_draw_all;
+#endif
+   struct lp_build_flow_context *flow;
    struct lp_type i32_type;
    LLVMTypeRef i32vec4_type, mask_type;
    LLVMValueRef c0_vec, c1_vec, c2_vec;
-   LLVMValueRef not_draw_all;
    LLVMValueRef in_out_mask;
 
    assert(i < 4);
@@ -237,15 +231,14 @@ generate_tri_edge_mask(LLVMBuilderRef builder,
 
    /*
     * Use a conditional here to do detailed pixel in/out testing.
-    * We only have to do this if c0 != {INT_MIN, INT_MIN, INT_MIN, INT_MIN}
+    * We only have to do this if c0 != INT_MIN.
     */
    flow = lp_build_flow_create(builder);
    lp_build_flow_scope_begin(flow);
 
    {
-#define OPTIMIZE_IN_OUT_TEST 1
 #if OPTIMIZE_IN_OUT_TEST
-
+      /* not_draw_all = (c0 != INT_MIN) */
       not_draw_all = LLVMBuildICmp(builder,
                                    LLVMIntNE,
                                    c0,
@@ -257,6 +250,7 @@ generate_tri_edge_mask(LLVMBuilderRef builder,
 
       lp_build_flow_scope_declare(flow, &in_out_mask);
 
+      /* if (not_draw_all) {... */
       lp_build_if(&ifctx, flow, builder, not_draw_all);
 #endif
       {
@@ -275,27 +269,24 @@ generate_tri_edge_mask(LLVMBuilderRef builder,
          lp_build_name(c1_vec, "edgeconst1vec");
          lp_build_name(c2_vec, "edgeconst2vec");
 
-
+         /* load step0vec, step1, step2 vec from memory */
          index = LLVMConstInt(LLVMInt32Type(), i, 0);
          step0_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step0_ptr, &index, 1, ""), "");
          step1_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step1_ptr, &index, 1, ""), "");
          step2_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step2_ptr, &index, 1, ""), "");
-
          lp_build_name(step0_vec, "step0vec");
          lp_build_name(step1_vec, "step1vec");
          lp_build_name(step2_vec, "step2vec");
 
+         /* m0_vec = step0_ptr[i] > c0_vec */
          m0_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step0_vec, c0_vec);
          m1_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step1_vec, c1_vec);
          m2_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step2_vec, c2_vec);
 
+         /* in_out_mask = m0_vec & m1_vec & m2_vec */
          m = LLVMBuildAnd(builder, m0_vec, m1_vec, "");
          in_out_mask = LLVMBuildAnd(builder, m, m2_vec, "");
          lp_build_name(in_out_mask, "inoutmaskvec");
-
-         /* This is the initial alive/dead pixel mask.  Additional bits will get cleared
-          * when the Z test fails, etc.
-          */
       }
 #if OPTIMIZE_IN_OUT_TEST
       lp_build_endif(&ifctx);
@@ -305,10 +296,59 @@ generate_tri_edge_mask(LLVMBuilderRef builder,
    lp_build_flow_scope_end(flow);
    lp_build_flow_destroy(flow);
 
+   /* This is the initial alive/dead pixel mask for a quad of four pixels.
+    * It's an int[4] vector with each word set to 0 or ~0.
+    * Words will get cleared when pixels faile the Z test, etc.
+    */
    *mask = in_out_mask;
 }
 
 
+static LLVMValueRef
+generate_scissor_test(LLVMBuilderRef builder,
+                      LLVMValueRef context_ptr,
+                      const struct lp_build_interp_soa_context *interp,
+                      struct lp_type type)
+{
+   LLVMTypeRef vec_type = lp_build_vec_type(type);
+   LLVMValueRef xpos = interp->pos[0], ypos = interp->pos[1];
+   LLVMValueRef xmin, ymin, xmax, ymax;
+   LLVMValueRef m0, m1, m2, m3, m;
+
+   /* xpos, ypos contain the window coords for the four pixels in the quad */
+   assert(xpos);
+   assert(ypos);
+
+   /* get the current scissor bounds, convert to vectors */
+   xmin = lp_jit_context_scissor_xmin_value(builder, context_ptr);
+   xmin = lp_build_broadcast(builder, vec_type, xmin);
+
+   ymin = lp_jit_context_scissor_ymin_value(builder, context_ptr);
+   ymin = lp_build_broadcast(builder, vec_type, ymin);
+
+   xmax = lp_jit_context_scissor_xmax_value(builder, context_ptr);
+   xmax = lp_build_broadcast(builder, vec_type, xmax);
+
+   ymax = lp_jit_context_scissor_ymax_value(builder, context_ptr);
+   ymax = lp_build_broadcast(builder, vec_type, ymax);
+
+   /* compare the fragment's position coordinates against the scissor bounds */
+   m0 = lp_build_compare(builder, type, PIPE_FUNC_GEQUAL, xpos, xmin);
+   m1 = lp_build_compare(builder, type, PIPE_FUNC_GEQUAL, ypos, ymin);
+   m2 = lp_build_compare(builder, type, PIPE_FUNC_LESS, xpos, xmax);
+   m3 = lp_build_compare(builder, type, PIPE_FUNC_LESS, ypos, ymax);
+
+   /* AND all the masks together */
+   m = LLVMBuildAnd(builder, m0, m1, "");
+   m = LLVMBuildAnd(builder, m, m2, "");
+   m = LLVMBuildAnd(builder, m, m3, "");
+
+   lp_build_name(m, "scissormask");
+
+   return m;
+}
+
+
 /**
  * Generate the fragment shader, depth/stencil test, and alpha tests.
  * \param i  which quad in the tile, in range [0,3]
@@ -377,6 +417,11 @@ generate_fs(struct llvmpipe_context *lp,
    /* 'mask' will control execution based on quad's pixel alive/killed state */
    lp_build_mask_begin(&mask, flow, type, *pmask);
 
+   if (key->scissor) {
+      LLVMValueRef smask =
+         generate_scissor_test(builder, context_ptr, interp, type);
+      lp_build_mask_update(&mask, smask);
+   }
 
    early_depth_test =
       key->depth.enabled &&
@@ -515,13 +560,13 @@ generate_blend(const struct pipe_blend_state *blend,
  * pixels at at time.  The block contains 2x2 quads.  Each quad contains
  * 2x2 pixels.
  */
-static struct lp_fragment_shader_variant *
+static void
 generate_fragment(struct llvmpipe_context *lp,
                   struct lp_fragment_shader *shader,
-                  const struct lp_fragment_shader_variant_key *key)
+                  struct lp_fragment_shader_variant *variant)
 {
    struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
-   struct lp_fragment_shader_variant *variant;
+   const struct lp_fragment_shader_variant_key *key = &variant->key;
    struct lp_type fs_type;
    struct lp_type blend_type;
    LLVMTypeRef fs_elem_type;
@@ -556,64 +601,6 @@ generate_fragment(struct llvmpipe_context *lp,
    unsigned chan;
    unsigned cbuf;
 
-   if (LP_DEBUG & DEBUG_JIT) {
-      tgsi_dump(shader->base.tokens, 0);
-      if(key->depth.enabled) {
-         debug_printf("depth.format = %s\n", pf_name(key->zsbuf_format));
-         debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE));
-         debug_printf("depth.writemask = %u\n", key->depth.writemask);
-      }
-      if(key->alpha.enabled) {
-         debug_printf("alpha.func = %s\n", debug_dump_func(key->alpha.func, TRUE));
-         debug_printf("alpha.ref_value = %f\n", key->alpha.ref_value);
-      }
-      if(key->blend.logicop_enable) {
-         debug_printf("blend.logicop_func = %u\n", key->blend.logicop_func);
-      }
-      else if(key->blend.blend_enable) {
-         debug_printf("blend.rgb_func = %s\n",   debug_dump_blend_func  (key->blend.rgb_func, TRUE));
-         debug_printf("rgb_src_factor = %s\n",   debug_dump_blend_factor(key->blend.rgb_src_factor, TRUE));
-         debug_printf("rgb_dst_factor = %s\n",   debug_dump_blend_factor(key->blend.rgb_dst_factor, TRUE));
-         debug_printf("alpha_func = %s\n",       debug_dump_blend_func  (key->blend.alpha_func, TRUE));
-         debug_printf("alpha_src_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_src_factor, TRUE));
-         debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE));
-      }
-      debug_printf("blend.colormask = 0x%x\n", key->blend.colormask);
-      for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) {
-         if(key->sampler[i].format) {
-            debug_printf("sampler[%u] = \n", i);
-            debug_printf("  .format = %s\n",
-                         pf_name(key->sampler[i].format));
-            debug_printf("  .target = %s\n",
-                         debug_dump_tex_target(key->sampler[i].target, TRUE));
-            debug_printf("  .pot = %u %u %u\n",
-                         key->sampler[i].pot_width,
-                         key->sampler[i].pot_height,
-                         key->sampler[i].pot_depth);
-            debug_printf("  .wrap = %s %s %s\n",
-                         debug_dump_tex_wrap(key->sampler[i].wrap_s, TRUE),
-                         debug_dump_tex_wrap(key->sampler[i].wrap_t, TRUE),
-                         debug_dump_tex_wrap(key->sampler[i].wrap_r, TRUE));
-            debug_printf("  .min_img_filter = %s\n",
-                         debug_dump_tex_filter(key->sampler[i].min_img_filter, TRUE));
-            debug_printf("  .min_mip_filter = %s\n",
-                         debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE));
-            debug_printf("  .mag_img_filter = %s\n",
-                         debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE));
-            if(key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE)
-               debug_printf("  .compare_func = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE));
-            debug_printf("  .normalized_coords = %u\n", key->sampler[i].normalized_coords);
-            debug_printf("  .prefilter = %u\n", key->sampler[i].prefilter);
-         }
-      }
-   }
-
-   variant = CALLOC_STRUCT(lp_fragment_shader_variant);
-   if(!variant)
-      return NULL;
-
-   variant->shader = shader;
-   memcpy(&variant->key, key, sizeof *key);
 
    /* TODO: actually pick these based on the fs and color buffer
     * characteristics. */
@@ -828,6 +815,78 @@ generate_fragment(struct llvmpipe_context *lp,
 
    variant->next = shader->variants;
    shader->variants = variant;
+}
+
+
+static struct lp_fragment_shader_variant *
+generate_variant(struct llvmpipe_context *lp,
+                 struct lp_fragment_shader *shader,
+                 const struct lp_fragment_shader_variant_key *key)
+{
+   struct lp_fragment_shader_variant *variant;
+
+   if (LP_DEBUG & DEBUG_JIT) {
+      unsigned i;
+
+      tgsi_dump(shader->base.tokens, 0);
+      if(key->depth.enabled) {
+         debug_printf("depth.format = %s\n", pf_name(key->zsbuf_format));
+         debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE));
+         debug_printf("depth.writemask = %u\n", key->depth.writemask);
+      }
+      if(key->alpha.enabled) {
+         debug_printf("alpha.func = %s\n", debug_dump_func(key->alpha.func, TRUE));
+         debug_printf("alpha.ref_value = %f\n", key->alpha.ref_value);
+      }
+      if(key->blend.logicop_enable) {
+         debug_printf("blend.logicop_func = %u\n", key->blend.logicop_func);
+      }
+      else if(key->blend.blend_enable) {
+         debug_printf("blend.rgb_func = %s\n",   debug_dump_blend_func  (key->blend.rgb_func, TRUE));
+         debug_printf("rgb_src_factor = %s\n",   debug_dump_blend_factor(key->blend.rgb_src_factor, TRUE));
+         debug_printf("rgb_dst_factor = %s\n",   debug_dump_blend_factor(key->blend.rgb_dst_factor, TRUE));
+         debug_printf("alpha_func = %s\n",       debug_dump_blend_func  (key->blend.alpha_func, TRUE));
+         debug_printf("alpha_src_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_src_factor, TRUE));
+         debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE));
+      }
+      debug_printf("blend.colormask = 0x%x\n", key->blend.colormask);
+      for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) {
+         if(key->sampler[i].format) {
+            debug_printf("sampler[%u] = \n", i);
+            debug_printf("  .format = %s\n",
+                         pf_name(key->sampler[i].format));
+            debug_printf("  .target = %s\n",
+                         debug_dump_tex_target(key->sampler[i].target, TRUE));
+            debug_printf("  .pot = %u %u %u\n",
+                         key->sampler[i].pot_width,
+                         key->sampler[i].pot_height,
+                         key->sampler[i].pot_depth);
+            debug_printf("  .wrap = %s %s %s\n",
+                         debug_dump_tex_wrap(key->sampler[i].wrap_s, TRUE),
+                         debug_dump_tex_wrap(key->sampler[i].wrap_t, TRUE),
+                         debug_dump_tex_wrap(key->sampler[i].wrap_r, TRUE));
+            debug_printf("  .min_img_filter = %s\n",
+                         debug_dump_tex_filter(key->sampler[i].min_img_filter, TRUE));
+            debug_printf("  .min_mip_filter = %s\n",
+                         debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE));
+            debug_printf("  .mag_img_filter = %s\n",
+                         debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE));
+            if(key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE)
+               debug_printf("  .compare_func = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE));
+            debug_printf("  .normalized_coords = %u\n", key->sampler[i].normalized_coords);
+            debug_printf("  .prefilter = %u\n", key->sampler[i].prefilter);
+         }
+      }
+   }
+
+   variant = CALLOC_STRUCT(lp_fragment_shader_variant);
+   if(!variant)
+      return NULL;
+
+   variant->shader = shader;
+   memcpy(&variant->key, key, sizeof *key);
+
+   generate_fragment(lp, shader, variant);
 
    return variant;
 }
@@ -959,6 +1018,7 @@ make_variant_key(struct llvmpipe_context *lp,
    /* alpha.ref_value is passed in jit_context */
 
    key->flatshade = lp->rasterizer->flatshade;
+   key->scissor = lp->rasterizer->scissor;
 
    if (lp->framebuffer.nr_cbufs) {
       memcpy(&key->blend, lp->blend, sizeof key->blend);
@@ -990,12 +1050,17 @@ make_variant_key(struct llvmpipe_context *lp,
 }
 
 
+/**
+ * Update fragment state.  This is called just prior to drawing
+ * something when some fragment-related state has changed.
+ */
 void 
 llvmpipe_update_fs(struct llvmpipe_context *lp)
 {
    struct lp_fragment_shader *shader = lp->fs;
    struct lp_fragment_shader_variant_key key;
    struct lp_fragment_shader_variant *variant;
+   boolean opaque;
 
    make_variant_key(lp, shader, &key);
 
@@ -1008,10 +1073,22 @@ llvmpipe_update_fs(struct llvmpipe_context *lp)
    }
 
    if(!variant)
-      variant = generate_fragment(lp, shader, &key);
+      variant = generate_variant(lp, shader, &key);
 
    shader->current = variant;
 
+   /* TODO: put this in the variant */
+   /* TODO: most of these can be relaxed, in particular the colormask */
+   opaque = !key.blend.logicop_enable &&
+            !key.blend.blend_enable &&
+            key.blend.colormask == 0xf &&
+            !key.alpha.enabled &&
+            !key.depth.enabled &&
+            !key.scissor &&
+            !shader->info.uses_kill
+            ? TRUE : FALSE;
+
    lp_setup_set_fs_function(lp->setup, 
-                            shader->current->jit_function);
+                            shader->current->jit_function,
+                            opaque);
 }