X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_wm.c;h=f746b31f66c6266431e0d6d63b0494191a6da154;hb=47b556fbcaea4660b21481e40d89167d883d47f5;hp=fb24379c90b174d56627782bfc89ada8a78c9aaa;hpb=7b5ad23c7f7f9016f725cb1caa3cf8971aeedbc8;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index fb24379c90b..f746b31f66c 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -30,10 +30,13 @@ */ #include "brw_context.h" -#include "brw_util.h" #include "brw_wm.h" #include "brw_state.h" +#include "main/formats.h" +#include "main/samplerobj.h" +#include "program/prog_parameter.h" +#include "glsl/ralloc.h" /** Return number of src args for given instruction */ GLuint brw_wm_nr_args( GLuint opcode ) @@ -69,6 +72,7 @@ GLuint brw_wm_is_scalar_result( GLuint opcode ) case OPCODE_RCP: case OPCODE_RSQ: case OPCODE_SIN: + case OPCODE_DP2: case OPCODE_DP3: case OPCODE_DP4: case OPCODE_DPH: @@ -112,51 +116,96 @@ brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) brw_wm_pass2(c); /* how many general-purpose registers are used */ - c->prog_data.total_grf = c->max_wm_grf; - - /* Scratch space is used for register spilling */ - if (c->last_scratch) { - c->prog_data.total_scratch = c->last_scratch + 0x40; - } - else { - c->prog_data.total_scratch = 0; - } + c->prog_data.reg_blocks = brw_register_blocks(c->max_wm_grf); /* Emit GEN4 code. */ brw_wm_emit(c); } +void +brw_wm_payload_setup(struct brw_context *brw, + struct brw_wm_compile *c) +{ + struct intel_context *intel = &brw->intel; + bool uses_depth = (c->fp->program.Base.InputsRead & + (1 << FRAG_ATTRIB_WPOS)) != 0; + + if (intel->gen >= 6) { + /* R0-1: masks, pixel X/Y coordinates. */ + c->nr_payload_regs = 2; + /* R2: only for 32-pixel dispatch.*/ + /* R3-4: perspective pixel location barycentric */ + c->nr_payload_regs += 2; + /* R5-6: perspective pixel location bary for dispatch width != 8 */ + if (c->dispatch_width == 16) { + c->nr_payload_regs += 2; + } + /* R7-10: perspective centroid barycentric */ + /* R11-14: perspective sample barycentric */ + /* R15-18: linear pixel location barycentric */ + /* R19-22: linear centroid barycentric */ + /* R23-26: linear sample barycentric */ + + /* R27: interpolated depth if uses source depth */ + if (uses_depth) { + c->source_depth_reg = c->nr_payload_regs; + c->nr_payload_regs++; + if (c->dispatch_width == 16) { + /* R28: interpolated depth if not 8-wide. */ + c->nr_payload_regs++; + } + } + /* R29: interpolated W set if GEN6_WM_USES_SOURCE_W. + */ + if (uses_depth) { + c->source_w_reg = c->nr_payload_regs; + c->nr_payload_regs++; + if (c->dispatch_width == 16) { + /* R30: interpolated W if not 8-wide. */ + c->nr_payload_regs++; + } + } + /* R31: MSAA position offsets. */ + /* R32-: bary for 32-pixel. */ + /* R58-59: interp W for 32-pixel. */ + + if (c->fp->program.Base.OutputsWritten & + BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { + c->source_depth_to_render_target = GL_TRUE; + c->computes_depth = GL_TRUE; + } + } else { + brw_wm_lookup_iz(intel, c); + } +} /** * All Mesa program -> GPU code generation goes through this function. * Depending on the instructions used (i.e. flow control instructions) * we'll use one of two code generators. */ -static void do_wm_prog( struct brw_context *brw, - struct brw_fragment_program *fp, - struct brw_wm_prog_key *key) +bool do_wm_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_fragment_program *fp, + struct brw_wm_prog_key *key) { + struct intel_context *intel = &brw->intel; struct brw_wm_compile *c; const GLuint *program; GLuint program_size; c = brw->wm.compile_data; if (c == NULL) { - brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data)); + brw->wm.compile_data = rzalloc(NULL, struct brw_wm_compile); c = brw->wm.compile_data; if (c == NULL) { /* Ouch - big out of memory problem. Can't continue * without triggering a segfault, no way to signal, * so just return. */ - return; + return false; } - c->instruction = _mesa_calloc(BRW_WM_MAX_INSN * sizeof(*c->instruction)); - c->prog_instructions = _mesa_calloc(BRW_WM_MAX_INSN * - sizeof(*c->prog_instructions)); - c->vreg = _mesa_calloc(BRW_WM_MAX_VREG * sizeof(*c->vreg)); - c->refs = _mesa_calloc(BRW_WM_MAX_REF * sizeof(*c->refs)); } else { void *instruction = c->instruction; void *prog_instructions = c->prog_instructions; @@ -173,39 +222,48 @@ static void do_wm_prog( struct brw_context *brw, c->fp = fp; c->env_param = brw->intel.ctx.FragmentProgram.Parameters; - brw_init_compile(brw, &c->func); + brw_init_compile(brw, &c->func, c); - /* temporary sanity check assertion */ - ASSERT(fp->isGLSL == brw_wm_is_glsl(&c->fp->program)); + if (prog && prog->FragmentProgram) { + if (!brw_wm_fs_emit(brw, c, prog)) + return false; + } else { + if (!c->instruction) { + c->instruction = rzalloc_array(c, struct brw_wm_instruction, BRW_WM_MAX_INSN); + c->prog_instructions = rzalloc_array(c, struct prog_instruction, BRW_WM_MAX_INSN); + c->vreg = rzalloc_array(c, struct brw_wm_value, BRW_WM_MAX_VREG); + c->refs = rzalloc_array(c, struct brw_wm_ref, BRW_WM_MAX_REF); + } - /* - * Shader which use GLSL features such as flow control are handled - * differently from "simple" shaders. - */ - if (fp->isGLSL) { - c->dispatch_width = 8; - brw_wm_glsl_emit(brw, c); - } - else { + /* Fallback for fixed function and ARB_fp shaders. */ c->dispatch_width = 16; + brw_wm_payload_setup(brw, c); brw_wm_non_glsl_emit(brw, c); + c->prog_data.dispatch_width = 16; } - if (INTEL_DEBUG & DEBUG_WM) + /* Scratch space is used for register spilling */ + if (c->last_scratch) { + c->prog_data.total_scratch = brw_get_scratch_size(c->last_scratch); + + brw_get_scratch_bo(intel, &brw->wm.scratch_bo, + c->prog_data.total_scratch * brw->wm_max_threads); + } + + if (unlikely(INTEL_DEBUG & DEBUG_WM)) fprintf(stderr, "\n"); /* get the program */ program = brw_get_program(&c->func, &program_size); - dri_bo_unreference(brw->wm.prog_bo); - brw->wm.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_WM_PROG, - &c->key, sizeof(c->key), - NULL, 0, - program, program_size, - &c->prog_data, - sizeof(c->prog_data), - &brw->wm.prog_data); + brw_upload_cache(&brw->cache, BRW_WM_PROG, + &c->key, sizeof(c->key), + program, program_size, + &c->prog_data, sizeof(c->prog_data), + &brw->wm.prog_offset, &brw->wm.prog_data); + + return true; } @@ -213,11 +271,10 @@ static void do_wm_prog( struct brw_context *brw, static void brw_wm_populate_key( struct brw_context *brw, struct brw_wm_prog_key *key ) { - GLcontext *ctx = &brw->intel.ctx; + struct gl_context *ctx = &brw->intel.ctx; /* BRW_NEW_FRAGMENT_PROGRAM */ const struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program; - GLboolean uses_depth = (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0; GLuint lookup = 0; GLuint line_aa; GLuint i; @@ -227,6 +284,7 @@ static void brw_wm_populate_key( struct brw_context *brw, /* Build the index for table lookup */ /* _NEW_COLOR */ + key->alpha_test = ctx->Color.AlphaEnabled; if (fp->program.UsesKill || ctx->Color.AlphaEnabled) lookup |= IZ_PS_KILL_ALPHATEST_BIT; @@ -276,12 +334,10 @@ static void brw_wm_populate_key( struct brw_context *brw, } } } - - brw_wm_lookup_iz(line_aa, - lookup, - uses_depth, - key); + key->iz_lookup = lookup; + key->line_aa = line_aa; + key->stats_wm = brw->intel.stats_wm; /* BRW_NEW_WM_INPUT_DIMENSIONS */ key->proj_attrib_mask = brw->wm.input_size_masks[4-1]; @@ -289,8 +345,8 @@ static void brw_wm_populate_key( struct brw_context *brw, /* _NEW_LIGHT */ key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT); - /* _NEW_HINT */ - key->linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST); + /* _NEW_FRAG_CLAMP | _NEW_BUFFERS */ + key->clamp_fragment_color = ctx->Color._ClampFragmentColor; /* _NEW_TEXTURE */ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { @@ -299,22 +355,66 @@ static void brw_wm_populate_key( struct brw_context *brw, if (unit->_ReallyEnabled) { const struct gl_texture_object *t = unit->_Current; const struct gl_texture_image *img = t->Image[0][t->BaseLevel]; + struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, i); + int swizzles[SWIZZLE_NIL + 1] = { + SWIZZLE_X, + SWIZZLE_Y, + SWIZZLE_Z, + SWIZZLE_W, + SWIZZLE_ZERO, + SWIZZLE_ONE, + SWIZZLE_NIL + }; + + /* GL_DEPTH_TEXTURE_MODE is normally handled through + * brw_wm_surface_state, but it applies to shadow compares as + * well and our shadow compares always return the result in + * all 4 channels. + */ + if (sampler->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB) { + key->compare_funcs[i] = sampler->CompareFunc; + + if (sampler->DepthMode == GL_ALPHA) { + swizzles[0] = SWIZZLE_ZERO; + swizzles[1] = SWIZZLE_ZERO; + swizzles[2] = SWIZZLE_ZERO; + } else if (sampler->DepthMode == GL_LUMINANCE) { + swizzles[3] = SWIZZLE_ONE; + } else if (sampler->DepthMode == GL_RED) { + /* See table 3.23 of the GL 3.0 spec. */ + swizzles[1] = SWIZZLE_ZERO; + swizzles[2] = SWIZZLE_ZERO; + swizzles[3] = SWIZZLE_ONE; + } + } + if (img->InternalFormat == GL_YCBCR_MESA) { key->yuvtex_mask |= 1 << i; if (img->TexFormat == MESA_FORMAT_YCBCR) key->yuvtex_swap_mask |= 1 << i; } - key->tex_swizzles[i] = t->_Swizzle; + key->tex_swizzles[i] = + MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)], + swizzles[GET_SWZ(t->_Swizzle, 1)], + swizzles[GET_SWZ(t->_Swizzle, 2)], + swizzles[GET_SWZ(t->_Swizzle, 3)]); + + if (sampler->MinFilter != GL_NEAREST && + sampler->MagFilter != GL_NEAREST) { + if (sampler->WrapS == GL_CLAMP) + key->gl_clamp_mask[0] |= 1 << i; + if (sampler->WrapT == GL_CLAMP) + key->gl_clamp_mask[1] |= 1 << i; + if (sampler->WrapR == GL_CLAMP) + key->gl_clamp_mask[2] |= 1 << i; + } } else { key->tex_swizzles[i] = SWIZZLE_NOOP; } } - /* Shadow */ - key->shadowtex_mask = fp->program.Base.ShadowSamplers; - /* _NEW_BUFFERS */ /* * Include the draw buffer origin and height so that we can calculate @@ -337,14 +437,12 @@ static void brw_wm_populate_key( struct brw_context *brw, * drawable height in order to invert the Y axis. */ if (fp->program.Base.InputsRead & FRAG_BIT_WPOS) { - if (brw->intel.driDrawable != NULL) { - key->origin_x = brw->intel.driDrawable->x; - key->origin_y = brw->intel.driDrawable->y; - key->drawable_height = brw->intel.driDrawable->h; - } + key->drawable_height = ctx->DrawBuffer->Height; + key->render_to_fbo = ctx->DrawBuffer->Name != 0; } - key->nr_color_regions = brw->state.nr_color_regions; + /* _NEW_BUFFERS */ + key->nr_color_regions = ctx->DrawBuffer->_NumColorDrawBuffers; /* CACHE_NEW_VS_PROG */ key->vp_outputs_written = brw->vs.prog_data->outputs_written; @@ -356,21 +454,21 @@ static void brw_wm_populate_key( struct brw_context *brw, static void brw_prepare_wm_prog(struct brw_context *brw) { + struct intel_context *intel = &brw->intel; + struct gl_context *ctx = &intel->ctx; struct brw_wm_prog_key key; struct brw_fragment_program *fp = (struct brw_fragment_program *) brw->fragment_program; - + brw_wm_populate_key(brw, &key); - /* Make an early check for the key. - */ - dri_bo_unreference(brw->wm.prog_bo); - brw->wm.prog_bo = brw_search_cache(&brw->cache, BRW_WM_PROG, - &key, sizeof(key), - NULL, 0, - &brw->wm.prog_data); - if (brw->wm.prog_bo == NULL) - do_wm_prog(brw, fp, &key); + if (!brw_search_cache(&brw->cache, BRW_WM_PROG, + &key, sizeof(key), + &brw->wm.prog_offset, &brw->wm.prog_data)) { + bool success = do_wm_prog(brw, ctx->Shader.CurrentFragmentProgram, fp, + &key); + assert(success); + } } @@ -378,11 +476,11 @@ const struct brw_tracked_state brw_wm_prog = { .dirty = { .mesa = (_NEW_COLOR | _NEW_DEPTH | - _NEW_HINT | _NEW_STENCIL | _NEW_POLYGON | _NEW_LINE | _NEW_LIGHT | + _NEW_FRAG_CLAMP | _NEW_BUFFERS | _NEW_TEXTURE), .brw = (BRW_NEW_FRAGMENT_PROGRAM |