X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_wm.c;h=2c9a6a07fbc2ce687f417c9d00e838fe9e420db6;hb=005c8e01062e8e88a86904b955d5422742bd32e7;hp=c4b2157db553c7d43260a7f196826f7b57417a2b;hpb=1f32c665c8af0622e2bbf451edb999ffbcd7d0fe;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index c4b2157db55..1faf2eaa346 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -1,338 +1,533 @@ /* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - + * Copyright (C) Intel Corp. 2006. All Rights Reserved. + * Intel funded Tungsten Graphics to + * develop this 3D driver. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ #include "brw_context.h" #include "brw_wm.h" #include "brw_state.h" +#include "brw_shader.h" +#include "main/enums.h" #include "main/formats.h" +#include "main/fbobject.h" #include "main/samplerobj.h" +#include "main/framebuffer.h" +#include "program/prog_parameter.h" +#include "program/program.h" +#include "intel_mipmap_tree.h" + +#include "util/ralloc.h" -/** Return number of src args for given instruction */ -GLuint brw_wm_nr_args( GLuint opcode ) +/** + * Return a bitfield where bit n is set if barycentric interpolation mode n + * (see enum brw_wm_barycentric_interp_mode) is needed by the fragment shader. + */ +unsigned +brw_compute_barycentric_interp_modes(struct brw_context *brw, + bool shade_model_flat, + bool persample_shading, + const struct gl_fragment_program *fprog) { - switch (opcode) { - case WM_FRONTFACING: - case WM_PIXELXY: - return 0; - case WM_CINTERP: - case WM_WPOSXY: - case WM_DELTAXY: - return 1; - case WM_LINTERP: - case WM_PIXELW: - return 2; - case WM_FB_WRITE: - case WM_PINTERP: - return 3; - default: - assert(opcode < MAX_OPCODE); - return _mesa_num_inst_src_regs(opcode); + unsigned barycentric_interp_modes = 0; + int attr; + + /* Loop through all fragment shader inputs to figure out what interpolation + * modes are in use, and set the appropriate bits in + * barycentric_interp_modes. + */ + for (attr = 0; attr < VARYING_SLOT_MAX; ++attr) { + enum glsl_interp_qualifier interp_qualifier = + fprog->InterpQualifier[attr]; + bool is_centroid = (fprog->IsCentroid & BITFIELD64_BIT(attr)) && + !persample_shading; + bool is_sample = (fprog->IsSample & BITFIELD64_BIT(attr)) || + persample_shading; + bool is_gl_Color = attr == VARYING_SLOT_COL0 || attr == VARYING_SLOT_COL1; + + /* Ignore unused inputs. */ + if (!(fprog->Base.InputsRead & BITFIELD64_BIT(attr))) + continue; + + /* Ignore WPOS and FACE, because they don't require interpolation. */ + if (attr == VARYING_SLOT_POS || attr == VARYING_SLOT_FACE) + continue; + + /* Determine the set (or sets) of barycentric coordinates needed to + * interpolate this variable. Note that when + * brw->needs_unlit_centroid_workaround is set, centroid interpolation + * uses PIXEL interpolation for unlit pixels and CENTROID interpolation + * for lit pixels, so we need both sets of barycentric coordinates. + */ + if (interp_qualifier == INTERP_QUALIFIER_NOPERSPECTIVE) { + if (is_centroid) { + barycentric_interp_modes |= + 1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC; + } else if (is_sample) { + barycentric_interp_modes |= + 1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC; + } + if ((!is_centroid && !is_sample) || + brw->needs_unlit_centroid_workaround) { + barycentric_interp_modes |= + 1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC; + } + } else if (interp_qualifier == INTERP_QUALIFIER_SMOOTH || + (!(shade_model_flat && is_gl_Color) && + interp_qualifier == INTERP_QUALIFIER_NONE)) { + if (is_centroid) { + barycentric_interp_modes |= + 1 << BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC; + } else if (is_sample) { + barycentric_interp_modes |= + 1 << BRW_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC; + } + if ((!is_centroid && !is_sample) || + brw->needs_unlit_centroid_workaround) { + barycentric_interp_modes |= + 1 << BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC; + } + } } -} + return barycentric_interp_modes; +} -GLuint brw_wm_is_scalar_result( GLuint opcode ) +static uint8_t +computed_depth_mode(struct gl_fragment_program *fp) { - switch (opcode) { - case OPCODE_COS: - case OPCODE_EX2: - case OPCODE_LG2: - case OPCODE_POW: - case OPCODE_RCP: - case OPCODE_RSQ: - case OPCODE_SIN: - case OPCODE_DP2: - case OPCODE_DP3: - case OPCODE_DP4: - case OPCODE_DPH: - case OPCODE_DST: - return 1; - - default: - return 0; + if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { + switch (fp->FragDepthLayout) { + case FRAG_DEPTH_LAYOUT_NONE: + case FRAG_DEPTH_LAYOUT_ANY: + return BRW_PSCDEPTH_ON; + case FRAG_DEPTH_LAYOUT_GREATER: + return BRW_PSCDEPTH_ON_GE; + case FRAG_DEPTH_LAYOUT_LESS: + return BRW_PSCDEPTH_ON_LE; + case FRAG_DEPTH_LAYOUT_UNCHANGED: + return BRW_PSCDEPTH_OFF; + } } + return BRW_PSCDEPTH_OFF; } +bool +brw_wm_prog_data_compare(const void *in_a, const void *in_b) +{ + const struct brw_wm_prog_data *a = in_a; + const struct brw_wm_prog_data *b = in_b; + + /* Compare the base structure. */ + if (!brw_stage_prog_data_compare(&a->base, &b->base)) + return false; + + /* Compare the rest of the structure. */ + const unsigned offset = sizeof(struct brw_stage_prog_data); + if (memcmp(((char *) a) + offset, ((char *) b) + offset, + sizeof(struct brw_wm_prog_data) - offset)) + return false; + + return true; +} /** - * Do GPU code generation for non-GLSL shader. non-GLSL shaders have - * no flow control instructions so we can more readily do SSA-style - * optimizations. + * All Mesa program -> GPU code generation goes through this function. + * Depending on the instructions used (i.e. flow control instructions) + * we'll use one of two code generators. */ -static void -brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) +bool +brw_codegen_wm_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_fragment_program *fp, + struct brw_wm_prog_key *key) { - /* Augment fragment program. Add instructions for pre- and - * post-fragment-program tasks such as interpolation and fogging. - */ - brw_wm_pass_fp(c); + struct gl_context *ctx = &brw->ctx; + void *mem_ctx = ralloc_context(NULL); + struct brw_wm_prog_data prog_data; + const GLuint *program; + struct brw_shader *fs = NULL; + GLuint program_size; + bool start_busy = false; + double start_time = 0; - /* Translate to intermediate representation. Build register usage - * chains. - */ - brw_wm_pass0(c); + if (prog) + fs = (struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; - /* Dead code removal. + memset(&prog_data, 0, sizeof(prog_data)); + /* key->alpha_test_func means simulating alpha testing via discards, + * so the shader definitely kills pixels. */ - brw_wm_pass1(c); + prog_data.uses_kill = fp->program.UsesKill || key->alpha_test_func; + prog_data.uses_omask = + fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK); + prog_data.computed_depth_mode = computed_depth_mode(&fp->program); + + prog_data.early_fragment_tests = fs && fs->base.EarlyFragmentTests; - /* Register allocation. - * Divide by two because we operate on 16 pixels at a time and require - * two GRF entries for each logical shader register. + /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */ + if (!prog) + prog_data.base.use_alt_mode = true; + + /* Allocate the references to the uniforms that will end up in the + * prog_data associated with the compiled program, and which will be freed + * by the state cache. */ - c->grf_limit = BRW_WM_MAX_GRF / 2; + int param_count; + if (fs) { + param_count = fs->base.num_uniform_components + + fs->base.NumImages * BRW_IMAGE_PARAM_SIZE; + prog_data.base.nr_image_params = fs->base.NumImages; + } else { + param_count = fp->program.Base.Parameters->NumParameters * 4; + } + /* The backend also sometimes adds params for texture size. */ + param_count += 2 * ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; + prog_data.base.param = + rzalloc_array(NULL, const gl_constant_value *, param_count); + prog_data.base.pull_param = + rzalloc_array(NULL, const gl_constant_value *, param_count); + prog_data.base.image_param = + rzalloc_array(NULL, struct brw_image_param, + prog_data.base.nr_image_params); + prog_data.base.nr_params = param_count; + + prog_data.barycentric_interp_modes = + brw_compute_barycentric_interp_modes(brw, key->flat_shade, + key->persample_shading, + &fp->program); + + if (unlikely(brw->perf_debug)) { + start_busy = (brw->batch.last_bo && + drm_intel_bo_busy(brw->batch.last_bo)); + start_time = get_time(); + } - brw_wm_pass2(c); + program = brw_wm_fs_emit(brw, mem_ctx, key, &prog_data, + &fp->program, prog, &program_size); + if (program == NULL) { + ralloc_free(mem_ctx); + return false; + } - /* how many general-purpose registers are used */ - c->prog_data.total_grf = c->max_wm_grf; + if (unlikely(brw->perf_debug) && fs) { + if (fs->compiled_once) + brw_wm_debug_recompile(brw, prog, key); + fs->compiled_once = true; - /* Emit GEN4 code. - */ - brw_wm_emit(c); + if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) { + perf_debug("FS compile took %.03f ms and stalled the GPU\n", + (get_time() - start_time) * 1000); + } + } + + if (prog_data.base.total_scratch) { + brw_get_scratch_bo(brw, &brw->wm.base.scratch_bo, + prog_data.base.total_scratch * brw->max_wm_threads); + } + + if (unlikely(INTEL_DEBUG & DEBUG_WM)) + fprintf(stderr, "\n"); + + brw_upload_cache(&brw->cache, BRW_CACHE_FS_PROG, + key, sizeof(struct brw_wm_prog_key), + program, program_size, + &prog_data, sizeof(prog_data), + &brw->wm.base.prog_offset, &brw->wm.prog_data); + + ralloc_free(mem_ctx); + + return true; } -static void -brw_wm_payload_setup(struct brw_context *brw, - struct brw_wm_compile *c) +static bool +key_debug(struct brw_context *brw, const char *name, int a, int b) { - struct intel_context *intel = &brw->intel; - bool uses_depth = (c->fp->program.Base.InputsRead & - (1 << FRAG_ATTRIB_WPOS)) != 0; - - if (intel->gen >= 6) { - /* R0-1: masks, pixel X/Y coordinates. */ - c->nr_payload_regs = 2; - /* R2: only for 32-pixel dispatch.*/ - /* R3-4: perspective pixel location barycentric */ - c->nr_payload_regs += 2; - /* R5-6: perspective pixel location bary for dispatch width != 8 */ - if (c->dispatch_width == 16) { - c->nr_payload_regs += 2; - } - /* R7-10: perspective centroid barycentric */ - /* R11-14: perspective sample barycentric */ - /* R15-18: linear pixel location barycentric */ - /* R19-22: linear centroid barycentric */ - /* R23-26: linear sample barycentric */ - - /* R27: interpolated depth if uses source depth */ - if (uses_depth) { - c->source_depth_reg = c->nr_payload_regs; - c->nr_payload_regs++; - if (c->dispatch_width == 16) { - /* R28: interpolated depth if not 8-wide. */ - c->nr_payload_regs++; - } - } - /* R29: interpolated W set if GEN6_WM_USES_SOURCE_W. - */ - if (uses_depth) { - c->source_w_reg = c->nr_payload_regs; - c->nr_payload_regs++; - if (c->dispatch_width == 16) { - /* R30: interpolated W if not 8-wide. */ - c->nr_payload_regs++; - } - } - /* R31: MSAA position offsets. */ - /* R32-: bary for 32-pixel. */ - /* R58-59: interp W for 32-pixel. */ - - if (c->fp->program.Base.OutputsWritten & - BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { - c->source_depth_to_render_target = GL_TRUE; - c->computes_depth = GL_TRUE; - } + if (a != b) { + perf_debug(" %s %d->%d\n", name, a, b); + return true; } else { - brw_wm_lookup_iz(intel, c); + return false; } } -/** - * All Mesa program -> GPU code generation goes through this function. - * Depending on the instructions used (i.e. flow control instructions) - * we'll use one of two code generators. - */ -static void do_wm_prog( struct brw_context *brw, - struct brw_fragment_program *fp, - struct brw_wm_prog_key *key) +bool +brw_debug_recompile_sampler_key(struct brw_context *brw, + const struct brw_sampler_prog_key_data *old_key, + const struct brw_sampler_prog_key_data *key) { - struct intel_context *intel = &brw->intel; - struct brw_wm_compile *c; - const GLuint *program; - GLuint program_size; + bool found = false; - c = brw->wm.compile_data; - if (c == NULL) { - brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data)); - c = brw->wm.compile_data; - if (c == NULL) { - /* Ouch - big out of memory problem. Can't continue - * without triggering a segfault, no way to signal, - * so just return. - */ - return; - } - c->instruction = calloc(1, BRW_WM_MAX_INSN * sizeof(*c->instruction)); - c->prog_instructions = calloc(1, BRW_WM_MAX_INSN * - sizeof(*c->prog_instructions)); - c->vreg = calloc(1, BRW_WM_MAX_VREG * sizeof(*c->vreg)); - c->refs = calloc(1, BRW_WM_MAX_REF * sizeof(*c->refs)); - } else { - void *instruction = c->instruction; - void *prog_instructions = c->prog_instructions; - void *vreg = c->vreg; - void *refs = c->refs; - memset(c, 0, sizeof(*brw->wm.compile_data)); - c->instruction = instruction; - c->prog_instructions = prog_instructions; - c->vreg = vreg; - c->refs = refs; + for (unsigned int i = 0; i < MAX_SAMPLERS; i++) { + found |= key_debug(brw, "EXT_texture_swizzle or DEPTH_TEXTURE_MODE", + old_key->swizzles[i], key->swizzles[i]); + } + found |= key_debug(brw, "GL_CLAMP enabled on any texture unit's 1st coordinate", + old_key->gl_clamp_mask[0], key->gl_clamp_mask[0]); + found |= key_debug(brw, "GL_CLAMP enabled on any texture unit's 2nd coordinate", + old_key->gl_clamp_mask[1], key->gl_clamp_mask[1]); + found |= key_debug(brw, "GL_CLAMP enabled on any texture unit's 3rd coordinate", + old_key->gl_clamp_mask[2], key->gl_clamp_mask[2]); + found |= key_debug(brw, "gather channel quirk on any texture unit", + old_key->gather_channel_quirk_mask, key->gather_channel_quirk_mask); + found |= key_debug(brw, "compressed multisample layout", + old_key->compressed_multisample_layout_mask, + key->compressed_multisample_layout_mask); + + for (unsigned int i = 0; i < MAX_SAMPLERS; i++) { + found |= key_debug(brw, "textureGather workarounds", + old_key->gen6_gather_wa[i], key->gen6_gather_wa[i]); } - memcpy(&c->key, key, sizeof(*key)); - - c->fp = fp; - c->env_param = brw->intel.ctx.FragmentProgram.Parameters; - - brw_init_compile(brw, &c->func); - brw_wm_payload_setup(brw, c); + return found; +} - if (!brw_wm_fs_emit(brw, c)) { - /* - * Shader which use GLSL features such as flow control are handled - * differently from "simple" shaders. - */ - c->dispatch_width = 16; - brw_wm_payload_setup(brw, c); - brw_wm_non_glsl_emit(brw, c); - } - c->prog_data.dispatch_width = c->dispatch_width; +void +brw_wm_debug_recompile(struct brw_context *brw, + struct gl_shader_program *prog, + const struct brw_wm_prog_key *key) +{ + struct brw_cache_item *c = NULL; + const struct brw_wm_prog_key *old_key = NULL; + bool found = false; - /* Scratch space is used for register spilling */ - if (c->last_scratch) { - uint32_t total_scratch; + perf_debug("Recompiling fragment shader for program %d\n", prog->Name); - /* Per-thread scratch space is power-of-two sized. */ - for (c->prog_data.total_scratch = 1024; - c->prog_data.total_scratch <= c->last_scratch; - c->prog_data.total_scratch *= 2) { - /* empty */ - } - total_scratch = c->prog_data.total_scratch * brw->wm_max_threads; + for (unsigned int i = 0; i < brw->cache.size; i++) { + for (c = brw->cache.items[i]; c; c = c->next) { + if (c->cache_id == BRW_CACHE_FS_PROG) { + old_key = c->key; - if (brw->wm.scratch_bo && total_scratch > brw->wm.scratch_bo->size) { - drm_intel_bo_unreference(brw->wm.scratch_bo); - brw->wm.scratch_bo = NULL; - } - if (brw->wm.scratch_bo == NULL) { - brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr, - "wm scratch", - total_scratch, - 4096); + if (old_key->program_string_id == key->program_string_id) + break; + } } + if (c) + break; } - else { - c->prog_data.total_scratch = 0; + + if (!c) { + perf_debug(" Didn't find previous compile in the shader cache for debug\n"); + return; } - if (unlikely(INTEL_DEBUG & DEBUG_WM)) - fprintf(stderr, "\n"); + found |= key_debug(brw, "alphatest, computed depth, depth test, or " + "depth write", + old_key->iz_lookup, key->iz_lookup); + found |= key_debug(brw, "depth statistics", + old_key->stats_wm, key->stats_wm); + found |= key_debug(brw, "flat shading", + old_key->flat_shade, key->flat_shade); + found |= key_debug(brw, "per-sample shading", + old_key->persample_shading, key->persample_shading); + found |= key_debug(brw, "per-sample shading and 2x MSAA", + old_key->persample_2x, key->persample_2x); + found |= key_debug(brw, "number of color buffers", + old_key->nr_color_regions, key->nr_color_regions); + found |= key_debug(brw, "MRT alpha test or alpha-to-coverage", + old_key->replicate_alpha, key->replicate_alpha); + found |= key_debug(brw, "rendering to FBO", + old_key->render_to_fbo, key->render_to_fbo); + found |= key_debug(brw, "fragment color clamping", + old_key->clamp_fragment_color, key->clamp_fragment_color); + found |= key_debug(brw, "line smoothing", + old_key->line_aa, key->line_aa); + found |= key_debug(brw, "renderbuffer height", + old_key->drawable_height, key->drawable_height); + found |= key_debug(brw, "input slots valid", + old_key->input_slots_valid, key->input_slots_valid); + found |= key_debug(brw, "mrt alpha test function", + old_key->alpha_test_func, key->alpha_test_func); + found |= key_debug(brw, "mrt alpha test reference value", + old_key->alpha_test_ref, key->alpha_test_ref); + + found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex); + + if (!found) { + perf_debug(" Something else\n"); + } +} - /* get the program - */ - program = brw_get_program(&c->func, &program_size); - - drm_intel_bo_unreference(brw->wm.prog_bo); - brw->wm.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_WM_PROG, - &c->key, sizeof(c->key), - NULL, 0, - program, program_size, - &c->prog_data, - sizeof(c->prog_data), - &brw->wm.prog_data); +static uint8_t +gen6_gather_workaround(GLenum internalformat) +{ + switch (internalformat) { + case GL_R8I: return WA_SIGN | WA_8BIT; + case GL_R8UI: return WA_8BIT; + case GL_R16I: return WA_SIGN | WA_16BIT; + case GL_R16UI: return WA_16BIT; + default: + /* Note that even though GL_R32I and GL_R32UI have format overrides in + * the surface state, there is no shader w/a required. + */ + return 0; + } } +void +brw_populate_sampler_prog_key_data(struct gl_context *ctx, + const struct gl_program *prog, + unsigned sampler_count, + struct brw_sampler_prog_key_data *key) +{ + struct brw_context *brw = brw_context(ctx); + + for (int s = 0; s < sampler_count; s++) { + key->swizzles[s] = SWIZZLE_NOOP; + + if (!(prog->SamplersUsed & (1 << s))) + continue; + + int unit_id = prog->SamplerUnits[s]; + const struct gl_texture_unit *unit = &ctx->Texture.Unit[unit_id]; + + if (unit->_Current && unit->_Current->Target != GL_TEXTURE_BUFFER) { + const struct gl_texture_object *t = unit->_Current; + const struct gl_texture_image *img = t->Image[0][t->BaseLevel]; + struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit_id); + + const bool alpha_depth = t->DepthMode == GL_ALPHA && + (img->_BaseFormat == GL_DEPTH_COMPONENT || + img->_BaseFormat == GL_DEPTH_STENCIL); + + /* Haswell handles texture swizzling as surface format overrides + * (except for GL_ALPHA); all other platforms need MOVs in the shader. + */ + if (alpha_depth || (brw->gen < 8 && !brw->is_haswell)) + key->swizzles[s] = brw_get_texture_swizzle(ctx, t); + + if (brw->gen < 8 && + sampler->MinFilter != GL_NEAREST && + sampler->MagFilter != GL_NEAREST) { + if (sampler->WrapS == GL_CLAMP) + key->gl_clamp_mask[0] |= 1 << s; + if (sampler->WrapT == GL_CLAMP) + key->gl_clamp_mask[1] |= 1 << s; + if (sampler->WrapR == GL_CLAMP) + key->gl_clamp_mask[2] |= 1 << s; + } + + /* gather4's channel select for green from RG32F is broken; requires + * a shader w/a on IVB; fixable with just SCS on HSW. + */ + if (brw->gen == 7 && !brw->is_haswell && prog->UsesGather) { + if (img->InternalFormat == GL_RG32F) + key->gather_channel_quirk_mask |= 1 << s; + } + + /* Gen6's gather4 is broken for UINT/SINT; we treat them as + * UNORM/FLOAT instead and fix it in the shader. + */ + if (brw->gen == 6 && prog->UsesGather) { + key->gen6_gather_wa[s] = gen6_gather_workaround(img->InternalFormat); + } + + /* If this is a multisample sampler, and uses the CMS MSAA layout, + * then we need to emit slightly different code to first sample the + * MCS surface. + */ + struct intel_texture_object *intel_tex = + intel_texture_object((struct gl_texture_object *)t); + + if (brw->gen >= 7 && + intel_tex->mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) { + key->compressed_multisample_layout_mask |= 1 << s; + } + } + } +} +static bool +brw_wm_state_dirty (struct brw_context *brw) +{ + return brw_state_dirty(brw, + _NEW_BUFFERS | + _NEW_COLOR | + _NEW_DEPTH | + _NEW_FRAG_CLAMP | + _NEW_HINT | + _NEW_LIGHT | + _NEW_LINE | + _NEW_MULTISAMPLE | + _NEW_POLYGON | + _NEW_STENCIL | + _NEW_TEXTURE, + BRW_NEW_FRAGMENT_PROGRAM | + BRW_NEW_REDUCED_PRIMITIVE | + BRW_NEW_STATS_WM | + BRW_NEW_VUE_MAP_GEOM_OUT); +} -static void brw_wm_populate_key( struct brw_context *brw, - struct brw_wm_prog_key *key ) +static void +brw_wm_populate_key(struct brw_context *brw, struct brw_wm_prog_key *key) { - struct gl_context *ctx = &brw->intel.ctx; + struct gl_context *ctx = &brw->ctx; /* BRW_NEW_FRAGMENT_PROGRAM */ - const struct brw_fragment_program *fp = - (struct brw_fragment_program *)brw->fragment_program; + const struct brw_fragment_program *fp = + (struct brw_fragment_program *) brw->fragment_program; + const struct gl_program *prog = (struct gl_program *) brw->fragment_program; GLuint lookup = 0; GLuint line_aa; - GLuint i; + bool program_uses_dfdy = fp->program.UsesDFdy; + const bool multisample_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1; memset(key, 0, sizeof(*key)); /* Build the index for table lookup */ - /* _NEW_COLOR */ - key->alpha_test = ctx->Color.AlphaEnabled; - if (fp->program.UsesKill || - ctx->Color.AlphaEnabled) - lookup |= IZ_PS_KILL_ALPHATEST_BIT; - - if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) - lookup |= IZ_PS_COMPUTES_DEPTH_BIT; - - /* _NEW_DEPTH */ - if (ctx->Depth.Test) - lookup |= IZ_DEPTH_TEST_ENABLE_BIT; - - if (ctx->Depth.Test && - ctx->Depth.Mask) /* ?? */ - lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; - - /* _NEW_STENCIL */ - if (ctx->Stencil._Enabled) { - lookup |= IZ_STENCIL_TEST_ENABLE_BIT; - - if (ctx->Stencil.WriteMask[0] || - ctx->Stencil.WriteMask[ctx->Stencil._BackFace]) - lookup |= IZ_STENCIL_WRITE_ENABLE_BIT; + if (brw->gen < 6) { + /* _NEW_COLOR */ + if (fp->program.UsesKill || ctx->Color.AlphaEnabled) + lookup |= IZ_PS_KILL_ALPHATEST_BIT; + + if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) + lookup |= IZ_PS_COMPUTES_DEPTH_BIT; + + /* _NEW_DEPTH */ + if (ctx->Depth.Test) + lookup |= IZ_DEPTH_TEST_ENABLE_BIT; + + if (ctx->Depth.Test && ctx->Depth.Mask) /* ?? */ + lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; + + /* _NEW_STENCIL | _NEW_BUFFERS */ + if (ctx->Stencil._Enabled) { + lookup |= IZ_STENCIL_TEST_ENABLE_BIT; + + if (ctx->Stencil.WriteMask[0] || + ctx->Stencil.WriteMask[ctx->Stencil._BackFace]) + lookup |= IZ_STENCIL_WRITE_ENABLE_BIT; + } + key->iz_lookup = lookup; } line_aa = AA_NEVER; /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */ if (ctx->Line.SmoothFlag) { - if (brw->intel.reduced_primitive == GL_LINES) { + if (brw->reduced_primitive == GL_LINES) { line_aa = AA_ALWAYS; } - else if (brw->intel.reduced_primitive == GL_TRIANGLES) { + else if (brw->reduced_primitive == GL_TRIANGLES) { if (ctx->Polygon.FrontMode == GL_LINE) { line_aa = AA_SOMETIMES; @@ -351,79 +546,24 @@ static void brw_wm_populate_key( struct brw_context *brw, } } - key->iz_lookup = lookup; key->line_aa = line_aa; - key->stats_wm = brw->intel.stats_wm; - /* BRW_NEW_WM_INPUT_DIMENSIONS */ - key->proj_attrib_mask = brw->wm.input_size_masks[4-1]; + /* _NEW_HINT */ + key->high_quality_derivatives = + ctx->Hint.FragmentShaderDerivative == GL_NICEST; + + if (brw->gen < 6) + key->stats_wm = brw->stats_wm; /* _NEW_LIGHT */ key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT); - /* _NEW_HINT */ - key->linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST); - /* _NEW_FRAG_CLAMP | _NEW_BUFFERS */ key->clamp_fragment_color = ctx->Color._ClampFragmentColor; /* _NEW_TEXTURE */ - for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - const struct gl_texture_unit *unit = &ctx->Texture.Unit[i]; - - if (unit->_ReallyEnabled) { - const struct gl_texture_object *t = unit->_Current; - const struct gl_texture_image *img = t->Image[0][t->BaseLevel]; - struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, i); - int swizzles[SWIZZLE_NIL + 1] = { - SWIZZLE_X, - SWIZZLE_Y, - SWIZZLE_Z, - SWIZZLE_W, - SWIZZLE_ZERO, - SWIZZLE_ONE, - SWIZZLE_NIL - }; - - /* GL_DEPTH_TEXTURE_MODE is normally handled through - * brw_wm_surface_state, but it applies to shadow compares as - * well and our shadow compares always return the result in - * all 4 channels. - */ - if (sampler->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB) { - if (sampler->DepthMode == GL_ALPHA) { - swizzles[0] = SWIZZLE_ZERO; - swizzles[1] = SWIZZLE_ZERO; - swizzles[2] = SWIZZLE_ZERO; - } else if (sampler->DepthMode == GL_LUMINANCE) { - swizzles[3] = SWIZZLE_ONE; - } else if (sampler->DepthMode == GL_RED) { - /* See table 3.23 of the GL 3.0 spec. */ - swizzles[1] = SWIZZLE_ZERO; - swizzles[2] = SWIZZLE_ZERO; - swizzles[3] = SWIZZLE_ONE; - } - } - - if (img->InternalFormat == GL_YCBCR_MESA) { - key->yuvtex_mask |= 1 << i; - if (img->TexFormat == MESA_FORMAT_YCBCR) - key->yuvtex_swap_mask |= 1 << i; - } - - key->tex_swizzles[i] = - MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)], - swizzles[GET_SWZ(t->_Swizzle, 1)], - swizzles[GET_SWZ(t->_Swizzle, 2)], - swizzles[GET_SWZ(t->_Swizzle, 3)]); - } - else { - key->tex_swizzles[i] = SWIZZLE_NOOP; - } - } - - /* Shadow */ - key->shadowtex_mask = fp->program.Base.ShadowSamplers; + brw_populate_sampler_prog_key_data(ctx, prog, brw->wm.base.sampler_count, + &key->tex); /* _NEW_BUFFERS */ /* @@ -446,59 +586,137 @@ static void brw_wm_populate_key( struct brw_context *brw, * For DRI2 the origin_x/y will always be (0,0) but we still need the * drawable height in order to invert the Y axis. */ - if (fp->program.Base.InputsRead & FRAG_BIT_WPOS) { - key->drawable_height = ctx->DrawBuffer->Height; - key->render_to_fbo = ctx->DrawBuffer->Name != 0; + if (fp->program.Base.InputsRead & VARYING_BIT_POS) { + key->drawable_height = _mesa_geometric_height(ctx->DrawBuffer); + } + + if ((fp->program.Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) { + key->render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); } /* _NEW_BUFFERS */ key->nr_color_regions = ctx->DrawBuffer->_NumColorDrawBuffers; - /* CACHE_NEW_VS_PROG */ - key->vp_outputs_written = brw->vs.prog_data->outputs_written; + /* _NEW_MULTISAMPLE, _NEW_COLOR, _NEW_BUFFERS */ + key->replicate_alpha = ctx->DrawBuffer->_NumColorDrawBuffers > 1 && + (ctx->Multisample.SampleAlphaToCoverage || ctx->Color.AlphaEnabled); + + /* _NEW_BUFFERS _NEW_MULTISAMPLE */ + /* Ignore sample qualifier while computing this flag. */ + key->persample_shading = + _mesa_get_min_invocations_per_fragment(ctx, &fp->program, true) > 1; + if (key->persample_shading) + key->persample_2x = _mesa_geometric_samples(ctx->DrawBuffer) == 2; + + key->compute_pos_offset = + _mesa_get_min_invocations_per_fragment(ctx, &fp->program, false) > 1 && + fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_POS; + + key->compute_sample_id = + multisample_fbo && + ctx->Multisample.Enabled && + (fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_ID); + + /* BRW_NEW_VUE_MAP_GEOM_OUT */ + if (brw->gen < 6 || _mesa_bitcount_64(fp->program.Base.InputsRead & + BRW_FS_VARYING_INPUT_MASK) > 16) + key->input_slots_valid = brw->vue_map_geom_out.slots_valid; + + + /* _NEW_COLOR | _NEW_BUFFERS */ + /* Pre-gen6, the hardware alpha test always used each render + * target's alpha to do alpha test, as opposed to render target 0's alpha + * like GL requires. Fix that by building the alpha test into the + * shader, and we'll skip enabling the fixed function alpha test. + */ + if (brw->gen < 6 && ctx->DrawBuffer->_NumColorDrawBuffers > 1 && + ctx->Color.AlphaEnabled) { + key->alpha_test_func = ctx->Color.AlphaFunc; + key->alpha_test_ref = ctx->Color.AlphaRef; + } /* The unique fragment program ID */ key->program_string_id = fp->id; } - -static void brw_prepare_wm_prog(struct brw_context *brw) +void +brw_upload_wm_prog(struct brw_context *brw) { + struct gl_context *ctx = &brw->ctx; + struct gl_shader_program *current = ctx->_Shader->_CurrentFragmentProgram; struct brw_wm_prog_key key; struct brw_fragment_program *fp = (struct brw_fragment_program *) brw->fragment_program; - + + if (!brw_wm_state_dirty(brw)) + return; + brw_wm_populate_key(brw, &key); - /* Make an early check for the key. - */ - drm_intel_bo_unreference(brw->wm.prog_bo); - brw->wm.prog_bo = brw_search_cache(&brw->cache, BRW_WM_PROG, - &key, sizeof(key), - NULL, 0, - &brw->wm.prog_data); - if (brw->wm.prog_bo == NULL) - do_wm_prog(brw, fp, &key); + if (!brw_search_cache(&brw->cache, BRW_CACHE_FS_PROG, + &key, sizeof(key), + &brw->wm.base.prog_offset, &brw->wm.prog_data)) { + bool success = brw_codegen_wm_prog(brw, current, fp, &key); + (void) success; + assert(success); + } + brw->wm.base.prog_data = &brw->wm.prog_data->base; } +bool +brw_fs_precompile(struct gl_context *ctx, + struct gl_shader_program *shader_prog, + struct gl_program *prog) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_wm_prog_key key; + + struct gl_fragment_program *fp = (struct gl_fragment_program *) prog; + struct brw_fragment_program *bfp = brw_fragment_program(fp); + bool program_uses_dfdy = fp->UsesDFdy; + + memset(&key, 0, sizeof(key)); + + if (brw->gen < 6) { + if (fp->UsesKill) + key.iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT; + + if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) + key.iz_lookup |= IZ_PS_COMPUTES_DEPTH_BIT; + + /* Just assume depth testing. */ + key.iz_lookup |= IZ_DEPTH_TEST_ENABLE_BIT; + key.iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; + } + + if (brw->gen < 6 || _mesa_bitcount_64(fp->Base.InputsRead & + BRW_FS_VARYING_INPUT_MASK) > 16) + key.input_slots_valid = fp->Base.InputsRead | VARYING_BIT_POS; + + brw_setup_tex_for_precompile(brw, &key.tex, &fp->Base); -const struct brw_tracked_state brw_wm_prog = { - .dirty = { - .mesa = (_NEW_COLOR | - _NEW_DEPTH | - _NEW_HINT | - _NEW_STENCIL | - _NEW_POLYGON | - _NEW_LINE | - _NEW_LIGHT | - _NEW_FRAG_CLAMP | - _NEW_BUFFERS | - _NEW_TEXTURE), - .brw = (BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_WM_INPUT_DIMENSIONS | - BRW_NEW_REDUCED_PRIMITIVE), - .cache = CACHE_NEW_VS_PROG, - }, - .prepare = brw_prepare_wm_prog -}; + if (fp->Base.InputsRead & VARYING_BIT_POS) { + key.drawable_height = ctx->DrawBuffer->Height; + } + + key.nr_color_regions = _mesa_bitcount_64(fp->Base.OutputsWritten & + ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) | + BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK))); + + if ((fp->Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) { + key.render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer) || + key.nr_color_regions > 1; + } + + key.program_string_id = bfp->id; + uint32_t old_prog_offset = brw->wm.base.prog_offset; + struct brw_wm_prog_data *old_prog_data = brw->wm.prog_data; + + bool success = brw_codegen_wm_prog(brw, shader_prog, bfp, &key); + + brw->wm.base.prog_offset = old_prog_offset; + brw->wm.prog_data = old_prog_data; + + return success; +}