X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_wm.c;h=bfb36db3fcc1810167f76e617e8e82cd7d10ec83;hb=098acf6c843;hp=4a7225c7228d3f930cf7740b5250e8198a96b60a;hpb=d1056541e239dfcee0ad6af2fd2d9fab37dbf025;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 4a7225c7228..bfb36db3fcc 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -39,98 +39,13 @@ #include "glsl/ralloc.h" -/** Return number of src args for given instruction */ -GLuint brw_wm_nr_args( GLuint opcode ) -{ - switch (opcode) { - case WM_FRONTFACING: - case WM_PIXELXY: - return 0; - case WM_CINTERP: - case WM_WPOSXY: - case WM_DELTAXY: - return 1; - case WM_LINTERP: - case WM_PIXELW: - return 2; - case WM_FB_WRITE: - case WM_PINTERP: - return 3; - default: - assert(opcode < MAX_OPCODE); - return _mesa_num_inst_src_regs(opcode); - } -} - - -GLuint brw_wm_is_scalar_result( GLuint opcode ) -{ - switch (opcode) { - case OPCODE_COS: - case OPCODE_EX2: - case OPCODE_LG2: - case OPCODE_POW: - case OPCODE_RCP: - case OPCODE_RSQ: - case OPCODE_SIN: - case OPCODE_DP2: - case OPCODE_DP3: - case OPCODE_DP4: - case OPCODE_DPH: - case OPCODE_DST: - return 1; - - default: - return 0; - } -} - - -/** - * Do GPU code generation for non-GLSL shader. non-GLSL shaders have - * no flow control instructions so we can more readily do SSA-style - * optimizations. - */ -static void -brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) -{ - /* Augment fragment program. Add instructions for pre- and - * post-fragment-program tasks such as interpolation and fogging. - */ - brw_wm_pass_fp(c); - - /* Translate to intermediate representation. Build register usage - * chains. - */ - brw_wm_pass0(c); - - /* Dead code removal. - */ - brw_wm_pass1(c); - - /* Register allocation. - * Divide by two because we operate on 16 pixels at a time and require - * two GRF entries for each logical shader register. - */ - c->grf_limit = BRW_WM_MAX_GRF / 2; - - brw_wm_pass2(c); - - /* how many general-purpose registers are used */ - c->prog_data.reg_blocks = brw_register_blocks(c->max_wm_grf); - - /* Emit GEN4 code. - */ - brw_wm_emit(c); -} - - /** * Return a bitfield where bit n is set if barycentric interpolation mode n * (see enum brw_wm_barycentric_interp_mode) is needed by the fragment shader. */ static unsigned -brw_compute_barycentric_interp_modes(bool shade_model_flat, +brw_compute_barycentric_interp_modes(struct brw_context *brw, + bool shade_model_flat, const struct gl_fragment_program *fprog) { unsigned barycentric_interp_modes = 0; @@ -154,11 +69,18 @@ brw_compute_barycentric_interp_modes(bool shade_model_flat, if (attr == FRAG_ATTRIB_WPOS || attr == FRAG_ATTRIB_FACE) continue; + /* Determine the set (or sets) of barycentric coordinates needed to + * interpolate this variable. Note that when + * brw->needs_unlit_centroid_workaround is set, centroid interpolation + * uses PIXEL interpolation for unlit pixels and CENTROID interpolation + * for lit pixels, so we need both sets of barycentric coordinates. + */ if (interp_qualifier == INTERP_QUALIFIER_NOPERSPECTIVE) { if (is_centroid) { barycentric_interp_modes |= 1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC; - } else { + } + if (!is_centroid || brw->needs_unlit_centroid_workaround) { barycentric_interp_modes |= 1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC; } @@ -168,7 +90,8 @@ brw_compute_barycentric_interp_modes(bool shade_model_flat, if (is_centroid) { barycentric_interp_modes |= 1 << BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC; - } else { + } + if (!is_centroid || brw->needs_unlit_centroid_workaround) { barycentric_interp_modes |= 1 << BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC; } @@ -244,6 +167,35 @@ brw_wm_payload_setup(struct brw_context *brw, } } +bool +brw_wm_prog_data_compare(const void *in_a, const void *in_b, + int aux_size, const void *in_key) +{ + const struct brw_wm_prog_data *a = in_a; + const struct brw_wm_prog_data *b = in_b; + + /* Compare all the struct up to the pointers. */ + if (memcmp(a, b, offsetof(struct brw_wm_prog_data, param))) + return false; + + if (memcmp(a->param, b->param, a->nr_params * sizeof(void *))) + return false; + + if (memcmp(a->pull_param, b->pull_param, a->nr_pull_params * sizeof(void *))) + return false; + + return true; +} + +void +brw_wm_prog_data_free(const void *in_prog_data) +{ + const struct brw_wm_prog_data *prog_data = in_prog_data; + + ralloc_free((void *)prog_data->param); + ralloc_free((void *)prog_data->pull_param); +} + /** * All Mesa program -> GPU code generation goes through this function. * Depending on the instructions used (i.e. flow control instructions) @@ -257,8 +209,12 @@ bool do_wm_prog(struct brw_context *brw, struct intel_context *intel = &brw->intel; struct brw_wm_compile *c; const GLuint *program; + struct gl_shader *fs = NULL; GLuint program_size; + if (prog) + fs = prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; + c = brw->wm.compile_data; if (c == NULL) { brw->wm.compile_data = rzalloc(NULL, struct brw_wm_compile); @@ -271,46 +227,48 @@ bool do_wm_prog(struct brw_context *brw, return false; } } else { - void *instruction = c->instruction; - void *prog_instructions = c->prog_instructions; - void *vreg = c->vreg; - void *refs = c->refs; memset(c, 0, sizeof(*brw->wm.compile_data)); - c->instruction = instruction; - c->prog_instructions = prog_instructions; - c->vreg = vreg; - c->refs = refs; } + + /* Allocate the references to the uniforms that will end up in the + * prog_data associated with the compiled program, and which will be freed + * by the state cache. + */ + if (fs) { + int param_count = fs->num_uniform_components; + /* The backend also sometimes adds params for texture size. */ + param_count += 2 * BRW_MAX_TEX_UNIT; + + c->prog_data.param = rzalloc_array(c, const float *, param_count); + c->prog_data.pull_param = rzalloc_array(c, const float *, param_count); + } else { + /* brw_wm_pass0.c will also add references to 0.0 and 1.0 which are + * uploaded as push parameters. + */ + int param_count = (fp->program.Base.Parameters->NumParameters + 2) * 4; + c->prog_data.param = rzalloc_array(c, const float *, param_count); + /* The old backend never does pull constants. */ + c->prog_data.pull_param = NULL; + } + memcpy(&c->key, key, sizeof(*key)); c->fp = fp; - c->env_param = brw->intel.ctx.FragmentProgram.Parameters; brw_init_compile(brw, &c->func, c); c->prog_data.barycentric_interp_modes = - brw_compute_barycentric_interp_modes(c->key.flat_shade, &fp->program); + brw_compute_barycentric_interp_modes(brw, c->key.flat_shade, + &fp->program); - if (prog && prog->_LinkedShaders[MESA_SHADER_FRAGMENT]) { - if (!brw_wm_fs_emit(brw, c, prog)) - return false; - } else { - if (!c->instruction) { - c->instruction = rzalloc_array(c, struct brw_wm_instruction, BRW_WM_MAX_INSN); - c->prog_instructions = rzalloc_array(c, struct prog_instruction, BRW_WM_MAX_INSN); - c->vreg = rzalloc_array(c, struct brw_wm_value, BRW_WM_MAX_VREG); - c->refs = rzalloc_array(c, struct brw_wm_ref, BRW_WM_MAX_REF); - } - - /* Fallback for fixed function and ARB_fp shaders. */ - c->dispatch_width = 16; - brw_wm_payload_setup(brw, c); - brw_wm_non_glsl_emit(brw, c); - c->prog_data.dispatch_width = 16; - } + brw_wm_fs_emit(brw, c, prog); /* Scratch space is used for register spilling */ if (c->last_scratch) { + perf_debug("Fragment shader triggered register spilling. " + "Try reducing the number of live scalar values to " + "improve performance.\n"); + c->prog_data.total_scratch = brw_get_scratch_size(c->last_scratch); brw_get_scratch_bo(intel, &brw->wm.scratch_bo, @@ -333,88 +291,137 @@ bool do_wm_prog(struct brw_context *brw, return true; } +static bool +key_debug(const char *name, int a, int b) +{ + if (a != b) { + perf_debug(" %s %d->%d\n", name, a, b); + return true; + } else { + return false; + } +} + +bool +brw_debug_recompile_sampler_key(const struct brw_sampler_prog_key_data *old_key, + const struct brw_sampler_prog_key_data *key) +{ + bool found = false; + + for (unsigned int i = 0; i < MAX_SAMPLERS; i++) { + found |= key_debug("EXT_texture_swizzle or DEPTH_TEXTURE_MODE", + old_key->swizzles[i], key->swizzles[i]); + } + found |= key_debug("GL_CLAMP enabled on any texture unit's 1st coordinate", + old_key->gl_clamp_mask[0], key->gl_clamp_mask[0]); + found |= key_debug("GL_CLAMP enabled on any texture unit's 2nd coordinate", + old_key->gl_clamp_mask[1], key->gl_clamp_mask[1]); + found |= key_debug("GL_CLAMP enabled on any texture unit's 3rd coordinate", + old_key->gl_clamp_mask[2], key->gl_clamp_mask[2]); + found |= key_debug("GL_MESA_ycbcr texturing\n", + old_key->yuvtex_mask, key->yuvtex_mask); + found |= key_debug("GL_MESA_ycbcr UV swapping\n", + old_key->yuvtex_swap_mask, key->yuvtex_swap_mask); + + return found; +} + void -brw_populate_sampler_prog_key_data(struct gl_context *ctx, - struct brw_sampler_prog_key_data *key, - int i) +brw_wm_debug_recompile(struct brw_context *brw, + struct gl_shader_program *prog, + const struct brw_wm_prog_key *key) { - const struct gl_texture_unit *unit = &ctx->Texture.Unit[i]; - - if (unit->_ReallyEnabled && unit->_Current->Target != GL_TEXTURE_BUFFER) { - const struct gl_texture_object *t = unit->_Current; - const struct gl_texture_image *img = t->Image[0][t->BaseLevel]; - struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, i); - int swizzles[SWIZZLE_NIL + 1] = { - SWIZZLE_X, - SWIZZLE_Y, - SWIZZLE_Z, - SWIZZLE_W, - SWIZZLE_ZERO, - SWIZZLE_ONE, - SWIZZLE_NIL - }; - - if (img->_BaseFormat == GL_DEPTH_COMPONENT || - img->_BaseFormat == GL_DEPTH_STENCIL) { - if (sampler->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB) - key->compare_funcs[i] = sampler->CompareFunc; - - /* We handle GL_DEPTH_TEXTURE_MODE here instead of as surface format - * overrides because shadow comparison always returns the result of - * the comparison in all channels anyway. - */ - switch (sampler->DepthMode) { - case GL_ALPHA: - swizzles[0] = SWIZZLE_ZERO; - swizzles[1] = SWIZZLE_ZERO; - swizzles[2] = SWIZZLE_ZERO; - swizzles[3] = SWIZZLE_X; - break; - case GL_LUMINANCE: - swizzles[0] = SWIZZLE_X; - swizzles[1] = SWIZZLE_X; - swizzles[2] = SWIZZLE_X; - swizzles[3] = SWIZZLE_ONE; - break; - case GL_INTENSITY: - swizzles[0] = SWIZZLE_X; - swizzles[1] = SWIZZLE_X; - swizzles[2] = SWIZZLE_X; - swizzles[3] = SWIZZLE_X; - break; - case GL_RED: - swizzles[0] = SWIZZLE_X; - swizzles[1] = SWIZZLE_ZERO; - swizzles[2] = SWIZZLE_ZERO; - swizzles[3] = SWIZZLE_ONE; - break; - } - } + struct brw_cache_item *c = NULL; + const struct brw_wm_prog_key *old_key = NULL; + bool found = false; - if (img->InternalFormat == GL_YCBCR_MESA) { - key->yuvtex_mask |= 1 << i; - if (img->TexFormat == MESA_FORMAT_YCBCR) - key->yuvtex_swap_mask |= 1 << i; - } + perf_debug("Recompiling fragment shader for program %d\n", prog->Name); + + for (unsigned int i = 0; i < brw->cache.size; i++) { + for (c = brw->cache.items[i]; c; c = c->next) { + if (c->cache_id == BRW_WM_PROG) { + old_key = c->key; - key->swizzles[i] = - MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)], - swizzles[GET_SWZ(t->_Swizzle, 1)], - swizzles[GET_SWZ(t->_Swizzle, 2)], - swizzles[GET_SWZ(t->_Swizzle, 3)]); - - if (sampler->MinFilter != GL_NEAREST && - sampler->MagFilter != GL_NEAREST) { - if (sampler->WrapS == GL_CLAMP) - key->gl_clamp_mask[0] |= 1 << i; - if (sampler->WrapT == GL_CLAMP) - key->gl_clamp_mask[1] |= 1 << i; - if (sampler->WrapR == GL_CLAMP) - key->gl_clamp_mask[2] |= 1 << i; + if (old_key->program_string_id == key->program_string_id) + break; + } } + if (c) + break; + } + + if (!c) { + perf_debug(" Didn't find previous compile in the shader cache for " + "debug\n"); + return; } - else { - key->swizzles[i] = SWIZZLE_NOOP; + + found |= key_debug("alphatest, computed depth, depth test, or depth write", + old_key->iz_lookup, key->iz_lookup); + found |= key_debug("depth statistics", old_key->stats_wm, key->stats_wm); + found |= key_debug("flat shading", old_key->flat_shade, key->flat_shade); + found |= key_debug("number of color buffers", old_key->nr_color_regions, key->nr_color_regions); + found |= key_debug("rendering to FBO", old_key->render_to_fbo, key->render_to_fbo); + found |= key_debug("fragment color clamping", old_key->clamp_fragment_color, key->clamp_fragment_color); + found |= key_debug("line smoothing", old_key->line_aa, key->line_aa); + found |= key_debug("proj_attrib_mask", old_key->proj_attrib_mask, key->proj_attrib_mask); + found |= key_debug("renderbuffer height", old_key->drawable_height, key->drawable_height); + found |= key_debug("vertex shader outputs", old_key->vp_outputs_written, key->vp_outputs_written); + + found |= brw_debug_recompile_sampler_key(&old_key->tex, &key->tex); + + if (!found) { + perf_debug(" Something else\n"); + } +} + +void +brw_populate_sampler_prog_key_data(struct gl_context *ctx, + const struct gl_program *prog, + struct brw_sampler_prog_key_data *key) +{ + struct intel_context *intel = intel_context(ctx); + + for (int s = 0; s < MAX_SAMPLERS; s++) { + key->swizzles[s] = SWIZZLE_NOOP; + + if (!(prog->SamplersUsed & (1 << s))) + continue; + + int unit_id = prog->SamplerUnits[s]; + const struct gl_texture_unit *unit = &ctx->Texture.Unit[unit_id]; + + if (unit->_ReallyEnabled && unit->_Current->Target != GL_TEXTURE_BUFFER) { + const struct gl_texture_object *t = unit->_Current; + const struct gl_texture_image *img = t->Image[0][t->BaseLevel]; + struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit_id); + + const bool alpha_depth = t->DepthMode == GL_ALPHA && + (img->_BaseFormat == GL_DEPTH_COMPONENT || + img->_BaseFormat == GL_DEPTH_STENCIL); + + /* Haswell handles texture swizzling as surface format overrides + * (except for GL_ALPHA); all other platforms need MOVs in the shader. + */ + if (!intel->is_haswell || alpha_depth) + key->swizzles[s] = brw_get_texture_swizzle(t); + + if (img->InternalFormat == GL_YCBCR_MESA) { + key->yuvtex_mask |= 1 << s; + if (img->TexFormat == MESA_FORMAT_YCBCR) + key->yuvtex_swap_mask |= 1 << s; + } + + if (sampler->MinFilter != GL_NEAREST && + sampler->MagFilter != GL_NEAREST) { + if (sampler->WrapS == GL_CLAMP) + key->gl_clamp_mask[0] |= 1 << s; + if (sampler->WrapT == GL_CLAMP) + key->gl_clamp_mask[1] |= 1 << s; + if (sampler->WrapR == GL_CLAMP) + key->gl_clamp_mask[2] |= 1 << s; + } + } } } @@ -429,23 +436,14 @@ static void brw_wm_populate_key( struct brw_context *brw, const struct gl_program *prog = (struct gl_program *) brw->fragment_program; GLuint lookup = 0; GLuint line_aa; - GLuint i; - - /* As a temporary measure we assume that all programs use dFdy() (and hence - * need to be compiled differently depending on whether we're rendering to - * an FBO). FIXME: set this bool correctly based on the contents of the - * program. - */ - bool program_uses_dfdy = true; + bool program_uses_dfdy = fp->program.UsesDFdy; memset(key, 0, sizeof(*key)); /* Build the index for table lookup */ - /* _NEW_COLOR */ - key->alpha_test = ctx->Color.AlphaEnabled; - if (intel->gen < 6) { + /* _NEW_COLOR */ if (fp->program.UsesKill || ctx->Color.AlphaEnabled) lookup |= IZ_PS_KILL_ALPHATEST_BIT; @@ -497,10 +495,18 @@ static void brw_wm_populate_key( struct brw_context *brw, } key->line_aa = line_aa; - key->stats_wm = brw->intel.stats_wm; + + if (intel->gen < 6) + key->stats_wm = brw->intel.stats_wm; /* BRW_NEW_WM_INPUT_DIMENSIONS */ - key->proj_attrib_mask = brw->wm.input_size_masks[4-1]; + /* Only set this for fixed function. The optimization it enables isn't + * useful for programs using shaders. + */ + if (ctx->Shader.CurrentFragmentProgram) + key->proj_attrib_mask = 0xffffffff; + else + key->proj_attrib_mask = brw->wm.input_size_masks[4-1]; /* _NEW_LIGHT */ key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT); @@ -509,10 +515,7 @@ static void brw_wm_populate_key( struct brw_context *brw, key->clamp_fragment_color = ctx->Color._ClampFragmentColor; /* _NEW_TEXTURE */ - for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - if (prog->TexturesUsed[i]) - brw_populate_sampler_prog_key_data(ctx, &key->tex, i); - } + brw_populate_sampler_prog_key_data(ctx, prog, &key->tex); /* _NEW_BUFFERS */ /* @@ -545,9 +548,12 @@ static void brw_wm_populate_key( struct brw_context *brw, /* _NEW_BUFFERS */ key->nr_color_regions = ctx->DrawBuffer->_NumColorDrawBuffers; + /* _NEW_MULTISAMPLE */ + key->sample_alpha_to_coverage = ctx->Multisample.SampleAlphaToCoverage; /* CACHE_NEW_VS_PROG */ - key->vp_outputs_written = brw->vs.prog_data->outputs_written; + if (intel->gen < 6) + key->vp_outputs_written = brw->vs.prog_data->outputs_written; /* The unique fragment program ID */ key->program_string_id = fp->id; @@ -586,7 +592,8 @@ const struct brw_tracked_state brw_wm_prog = { _NEW_LIGHT | _NEW_FRAG_CLAMP | _NEW_BUFFERS | - _NEW_TEXTURE), + _NEW_TEXTURE | + _NEW_MULTISAMPLE), .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_WM_INPUT_DIMENSIONS | BRW_NEW_REDUCED_PRIMITIVE),