LLVMValueRef step1_ptr, /* ivec4 */
LLVMValueRef step2_ptr) /* ivec4 */
{
- /*
- c0_vec = splat(c0)
- c1_vec = splat(c1)
- c2_vec = splat(c2)
- m0_vec = step0_ptr[i] > c0_vec
- m1_vec = step1_ptr[i] > c1_vec
- m2_vec = step2_ptr[i] > c2_vec
- mask = m0_vec & m1_vec & m2_vec
- */
- struct lp_build_flow_context *flow;
+#define OPTIMIZE_IN_OUT_TEST 0
+#if OPTIMIZE_IN_OUT_TEST
struct lp_build_if_state ifctx;
+ LLVMValueRef not_draw_all;
+#endif
+ struct lp_build_flow_context *flow;
struct lp_type i32_type;
LLVMTypeRef i32vec4_type, mask_type;
LLVMValueRef c0_vec, c1_vec, c2_vec;
- LLVMValueRef not_draw_all;
LLVMValueRef in_out_mask;
assert(i < 4);
/*
* Use a conditional here to do detailed pixel in/out testing.
- * We only have to do this if c0 != {INT_MIN, INT_MIN, INT_MIN, INT_MIN}
+ * We only have to do this if c0 != INT_MIN.
*/
flow = lp_build_flow_create(builder);
lp_build_flow_scope_begin(flow);
{
-#define OPTIMIZE_IN_OUT_TEST 1
#if OPTIMIZE_IN_OUT_TEST
-
+ /* not_draw_all = (c0 != INT_MIN) */
not_draw_all = LLVMBuildICmp(builder,
LLVMIntNE,
c0,
lp_build_flow_scope_declare(flow, &in_out_mask);
+ /* if (not_draw_all) {... */
lp_build_if(&ifctx, flow, builder, not_draw_all);
#endif
{
lp_build_name(c1_vec, "edgeconst1vec");
lp_build_name(c2_vec, "edgeconst2vec");
-
+ /* load step0vec, step1, step2 vec from memory */
index = LLVMConstInt(LLVMInt32Type(), i, 0);
step0_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step0_ptr, &index, 1, ""), "");
step1_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step1_ptr, &index, 1, ""), "");
step2_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step2_ptr, &index, 1, ""), "");
-
lp_build_name(step0_vec, "step0vec");
lp_build_name(step1_vec, "step1vec");
lp_build_name(step2_vec, "step2vec");
+ /* m0_vec = step0_ptr[i] > c0_vec */
m0_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step0_vec, c0_vec);
m1_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step1_vec, c1_vec);
m2_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step2_vec, c2_vec);
+ /* in_out_mask = m0_vec & m1_vec & m2_vec */
m = LLVMBuildAnd(builder, m0_vec, m1_vec, "");
in_out_mask = LLVMBuildAnd(builder, m, m2_vec, "");
lp_build_name(in_out_mask, "inoutmaskvec");
-
- /* This is the initial alive/dead pixel mask. Additional bits will get cleared
- * when the Z test fails, etc.
- */
}
#if OPTIMIZE_IN_OUT_TEST
lp_build_endif(&ifctx);
lp_build_flow_scope_end(flow);
lp_build_flow_destroy(flow);
+ /* This is the initial alive/dead pixel mask for a quad of four pixels.
+ * It's an int[4] vector with each word set to 0 or ~0.
+ * Words will get cleared when pixels faile the Z test, etc.
+ */
*mask = in_out_mask;
}
+static LLVMValueRef
+generate_scissor_test(LLVMBuilderRef builder,
+ LLVMValueRef context_ptr,
+ const struct lp_build_interp_soa_context *interp,
+ struct lp_type type)
+{
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
+ LLVMValueRef xpos = interp->pos[0], ypos = interp->pos[1];
+ LLVMValueRef xmin, ymin, xmax, ymax;
+ LLVMValueRef m0, m1, m2, m3, m;
+
+ /* xpos, ypos contain the window coords for the four pixels in the quad */
+ assert(xpos);
+ assert(ypos);
+
+ /* get the current scissor bounds, convert to vectors */
+ xmin = lp_jit_context_scissor_xmin_value(builder, context_ptr);
+ xmin = lp_build_broadcast(builder, vec_type, xmin);
+
+ ymin = lp_jit_context_scissor_ymin_value(builder, context_ptr);
+ ymin = lp_build_broadcast(builder, vec_type, ymin);
+
+ xmax = lp_jit_context_scissor_xmax_value(builder, context_ptr);
+ xmax = lp_build_broadcast(builder, vec_type, xmax);
+
+ ymax = lp_jit_context_scissor_ymax_value(builder, context_ptr);
+ ymax = lp_build_broadcast(builder, vec_type, ymax);
+
+ /* compare the fragment's position coordinates against the scissor bounds */
+ m0 = lp_build_compare(builder, type, PIPE_FUNC_GEQUAL, xpos, xmin);
+ m1 = lp_build_compare(builder, type, PIPE_FUNC_GEQUAL, ypos, ymin);
+ m2 = lp_build_compare(builder, type, PIPE_FUNC_LESS, xpos, xmax);
+ m3 = lp_build_compare(builder, type, PIPE_FUNC_LESS, ypos, ymax);
+
+ /* AND all the masks together */
+ m = LLVMBuildAnd(builder, m0, m1, "");
+ m = LLVMBuildAnd(builder, m, m2, "");
+ m = LLVMBuildAnd(builder, m, m3, "");
+
+ lp_build_name(m, "scissormask");
+
+ return m;
+}
+
+
/**
* Generate the fragment shader, depth/stencil test, and alpha tests.
* \param i which quad in the tile, in range [0,3]
/* 'mask' will control execution based on quad's pixel alive/killed state */
lp_build_mask_begin(&mask, flow, type, *pmask);
+ if (key->scissor) {
+ LLVMValueRef smask =
+ generate_scissor_test(builder, context_ptr, interp, type);
+ lp_build_mask_update(&mask, smask);
+ }
early_depth_test =
key->depth.enabled &&
* pixels at at time. The block contains 2x2 quads. Each quad contains
* 2x2 pixels.
*/
-static struct lp_fragment_shader_variant *
+static void
generate_fragment(struct llvmpipe_context *lp,
struct lp_fragment_shader *shader,
- const struct lp_fragment_shader_variant_key *key)
+ struct lp_fragment_shader_variant *variant)
{
struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
- struct lp_fragment_shader_variant *variant;
+ const struct lp_fragment_shader_variant_key *key = &variant->key;
struct lp_type fs_type;
struct lp_type blend_type;
LLVMTypeRef fs_elem_type;
unsigned chan;
unsigned cbuf;
- if (LP_DEBUG & DEBUG_JIT) {
- tgsi_dump(shader->base.tokens, 0);
- if(key->depth.enabled) {
- debug_printf("depth.format = %s\n", pf_name(key->zsbuf_format));
- debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE));
- debug_printf("depth.writemask = %u\n", key->depth.writemask);
- }
- if(key->alpha.enabled) {
- debug_printf("alpha.func = %s\n", debug_dump_func(key->alpha.func, TRUE));
- debug_printf("alpha.ref_value = %f\n", key->alpha.ref_value);
- }
- if(key->blend.logicop_enable) {
- debug_printf("blend.logicop_func = %u\n", key->blend.logicop_func);
- }
- else if(key->blend.blend_enable) {
- debug_printf("blend.rgb_func = %s\n", debug_dump_blend_func (key->blend.rgb_func, TRUE));
- debug_printf("rgb_src_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_src_factor, TRUE));
- debug_printf("rgb_dst_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_dst_factor, TRUE));
- debug_printf("alpha_func = %s\n", debug_dump_blend_func (key->blend.alpha_func, TRUE));
- debug_printf("alpha_src_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_src_factor, TRUE));
- debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE));
- }
- debug_printf("blend.colormask = 0x%x\n", key->blend.colormask);
- for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) {
- if(key->sampler[i].format) {
- debug_printf("sampler[%u] = \n", i);
- debug_printf(" .format = %s\n",
- pf_name(key->sampler[i].format));
- debug_printf(" .target = %s\n",
- debug_dump_tex_target(key->sampler[i].target, TRUE));
- debug_printf(" .pot = %u %u %u\n",
- key->sampler[i].pot_width,
- key->sampler[i].pot_height,
- key->sampler[i].pot_depth);
- debug_printf(" .wrap = %s %s %s\n",
- debug_dump_tex_wrap(key->sampler[i].wrap_s, TRUE),
- debug_dump_tex_wrap(key->sampler[i].wrap_t, TRUE),
- debug_dump_tex_wrap(key->sampler[i].wrap_r, TRUE));
- debug_printf(" .min_img_filter = %s\n",
- debug_dump_tex_filter(key->sampler[i].min_img_filter, TRUE));
- debug_printf(" .min_mip_filter = %s\n",
- debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE));
- debug_printf(" .mag_img_filter = %s\n",
- debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE));
- if(key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE)
- debug_printf(" .compare_func = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE));
- debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords);
- debug_printf(" .prefilter = %u\n", key->sampler[i].prefilter);
- }
- }
- }
-
- variant = CALLOC_STRUCT(lp_fragment_shader_variant);
- if(!variant)
- return NULL;
-
- variant->shader = shader;
- memcpy(&variant->key, key, sizeof *key);
/* TODO: actually pick these based on the fs and color buffer
* characteristics. */
variant->next = shader->variants;
shader->variants = variant;
+}
+
+
+static struct lp_fragment_shader_variant *
+generate_variant(struct llvmpipe_context *lp,
+ struct lp_fragment_shader *shader,
+ const struct lp_fragment_shader_variant_key *key)
+{
+ struct lp_fragment_shader_variant *variant;
+
+ if (LP_DEBUG & DEBUG_JIT) {
+ unsigned i;
+
+ tgsi_dump(shader->base.tokens, 0);
+ if(key->depth.enabled) {
+ debug_printf("depth.format = %s\n", pf_name(key->zsbuf_format));
+ debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE));
+ debug_printf("depth.writemask = %u\n", key->depth.writemask);
+ }
+ if(key->alpha.enabled) {
+ debug_printf("alpha.func = %s\n", debug_dump_func(key->alpha.func, TRUE));
+ debug_printf("alpha.ref_value = %f\n", key->alpha.ref_value);
+ }
+ if(key->blend.logicop_enable) {
+ debug_printf("blend.logicop_func = %u\n", key->blend.logicop_func);
+ }
+ else if(key->blend.blend_enable) {
+ debug_printf("blend.rgb_func = %s\n", debug_dump_blend_func (key->blend.rgb_func, TRUE));
+ debug_printf("rgb_src_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_src_factor, TRUE));
+ debug_printf("rgb_dst_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_dst_factor, TRUE));
+ debug_printf("alpha_func = %s\n", debug_dump_blend_func (key->blend.alpha_func, TRUE));
+ debug_printf("alpha_src_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_src_factor, TRUE));
+ debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE));
+ }
+ debug_printf("blend.colormask = 0x%x\n", key->blend.colormask);
+ for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) {
+ if(key->sampler[i].format) {
+ debug_printf("sampler[%u] = \n", i);
+ debug_printf(" .format = %s\n",
+ pf_name(key->sampler[i].format));
+ debug_printf(" .target = %s\n",
+ debug_dump_tex_target(key->sampler[i].target, TRUE));
+ debug_printf(" .pot = %u %u %u\n",
+ key->sampler[i].pot_width,
+ key->sampler[i].pot_height,
+ key->sampler[i].pot_depth);
+ debug_printf(" .wrap = %s %s %s\n",
+ debug_dump_tex_wrap(key->sampler[i].wrap_s, TRUE),
+ debug_dump_tex_wrap(key->sampler[i].wrap_t, TRUE),
+ debug_dump_tex_wrap(key->sampler[i].wrap_r, TRUE));
+ debug_printf(" .min_img_filter = %s\n",
+ debug_dump_tex_filter(key->sampler[i].min_img_filter, TRUE));
+ debug_printf(" .min_mip_filter = %s\n",
+ debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE));
+ debug_printf(" .mag_img_filter = %s\n",
+ debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE));
+ if(key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE)
+ debug_printf(" .compare_func = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE));
+ debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords);
+ debug_printf(" .prefilter = %u\n", key->sampler[i].prefilter);
+ }
+ }
+ }
+
+ variant = CALLOC_STRUCT(lp_fragment_shader_variant);
+ if(!variant)
+ return NULL;
+
+ variant->shader = shader;
+ memcpy(&variant->key, key, sizeof *key);
+
+ generate_fragment(lp, shader, variant);
return variant;
}
/* alpha.ref_value is passed in jit_context */
key->flatshade = lp->rasterizer->flatshade;
+ key->scissor = lp->rasterizer->scissor;
if (lp->framebuffer.nr_cbufs) {
memcpy(&key->blend, lp->blend, sizeof key->blend);
}
+/**
+ * Update fragment state. This is called just prior to drawing
+ * something when some fragment-related state has changed.
+ */
void
llvmpipe_update_fs(struct llvmpipe_context *lp)
{
struct lp_fragment_shader *shader = lp->fs;
struct lp_fragment_shader_variant_key key;
struct lp_fragment_shader_variant *variant;
+ boolean opaque;
make_variant_key(lp, shader, &key);
}
if(!variant)
- variant = generate_fragment(lp, shader, &key);
+ variant = generate_variant(lp, shader, &key);
shader->current = variant;
+ /* TODO: put this in the variant */
+ /* TODO: most of these can be relaxed, in particular the colormask */
+ opaque = !key.blend.logicop_enable &&
+ !key.blend.blend_enable &&
+ key.blend.colormask == 0xf &&
+ !key.alpha.enabled &&
+ !key.depth.enabled &&
+ !key.scissor &&
+ !shader->info.uses_kill
+ ? TRUE : FALSE;
+
lp_setup_set_fs_function(lp->setup,
- shader->current->jit_function);
+ shader->current->jit_function,
+ opaque);
}