X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_wm.c;h=656501b4f79c33993bf7325ece53056be0c10595;hb=5b3eb7538cd9ceb967b6e9e765896183e7c2c4d4;hp=bad76793af74ec163d3fe1bb76a01901bf10e81d;hpb=e228433823b90127a217950433e31f0ef44df813;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index bad76793af7..656501b4f79 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -29,70 +29,31 @@ * Keith Whitwell */ -#include "main/texformat.h" #include "brw_context.h" -#include "brw_util.h" #include "brw_wm.h" #include "brw_state.h" +#include "main/formats.h" - +/** Return number of src args for given instruction */ GLuint brw_wm_nr_args( GLuint opcode ) { switch (opcode) { - + case WM_FRONTFACING: case WM_PIXELXY: - case OPCODE_ABS: - case OPCODE_FLR: - case OPCODE_FRC: - case OPCODE_SWZ: - case OPCODE_MOV: - case OPCODE_COS: - case OPCODE_EX2: - case OPCODE_LG2: - case OPCODE_RCP: - case OPCODE_RSQ: - case OPCODE_SIN: - case OPCODE_SCS: - case OPCODE_TEX: - case OPCODE_TXB: - case OPCODE_TXP: - case OPCODE_KIL: - case OPCODE_LIT: - case WM_CINTERP: - case WM_WPOSXY: - return 1; - - case OPCODE_POW: - case OPCODE_SUB: - case OPCODE_SGE: - case OPCODE_SGT: - case OPCODE_SLE: - case OPCODE_SLT: - case OPCODE_SEQ: - case OPCODE_SNE: - case OPCODE_ADD: - case OPCODE_MAX: - case OPCODE_MIN: - case OPCODE_MUL: - case OPCODE_XPD: - case OPCODE_DP3: - case OPCODE_DP4: - case OPCODE_DPH: - case OPCODE_DST: - case WM_LINTERP: + return 0; + case WM_CINTERP: + case WM_WPOSXY: case WM_DELTAXY: + return 1; + case WM_LINTERP: case WM_PIXELW: return 2; - case WM_FB_WRITE: - case WM_PINTERP: - case OPCODE_MAD: - case OPCODE_CMP: - case OPCODE_LRP: + case WM_PINTERP: return 3; - default: - return 0; + assert(opcode < MAX_OPCODE); + return _mesa_num_inst_src_regs(opcode); } } @@ -107,6 +68,7 @@ GLuint brw_wm_is_scalar_result( GLuint opcode ) case OPCODE_RCP: case OPCODE_RSQ: case OPCODE_SIN: + case OPCODE_DP2: case OPCODE_DP3: case OPCODE_DP4: case OPCODE_DPH: @@ -119,6 +81,106 @@ GLuint brw_wm_is_scalar_result( GLuint opcode ) } +/** + * Do GPU code generation for non-GLSL shader. non-GLSL shaders have + * no flow control instructions so we can more readily do SSA-style + * optimizations. + */ +static void +brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) +{ + /* Augment fragment program. Add instructions for pre- and + * post-fragment-program tasks such as interpolation and fogging. + */ + brw_wm_pass_fp(c); + + /* Translate to intermediate representation. Build register usage + * chains. + */ + brw_wm_pass0(c); + + /* Dead code removal. + */ + brw_wm_pass1(c); + + /* Register allocation. + * Divide by two because we operate on 16 pixels at a time and require + * two GRF entries for each logical shader register. + */ + c->grf_limit = BRW_WM_MAX_GRF / 2; + + brw_wm_pass2(c); + + /* how many general-purpose registers are used */ + c->prog_data.total_grf = c->max_wm_grf; + + /* Emit GEN4 code. + */ + brw_wm_emit(c); +} + +static void +brw_wm_payload_setup(struct brw_context *brw, + struct brw_wm_compile *c) +{ + struct intel_context *intel = &brw->intel; + bool uses_depth = (c->fp->program.Base.InputsRead & + (1 << FRAG_ATTRIB_WPOS)) != 0; + + if (intel->gen >= 6) { + /* R0-1: masks, pixel X/Y coordinates. */ + c->nr_payload_regs = 2; + /* R2: only for 32-pixel dispatch.*/ + /* R3-4: perspective pixel location barycentric */ + c->nr_payload_regs += 2; + /* R5-6: perspective pixel location bary for dispatch width != 8 */ + if (c->dispatch_width == 16) { + c->nr_payload_regs += 2; + } + /* R7-10: perspective centroid barycentric */ + /* R11-14: perspective sample barycentric */ + /* R15-18: linear pixel location barycentric */ + /* R19-22: linear centroid barycentric */ + /* R23-26: linear sample barycentric */ + + /* R27: interpolated depth if uses source depth */ + if (uses_depth) { + c->source_depth_reg = c->nr_payload_regs; + c->nr_payload_regs++; + if (c->dispatch_width == 16) { + /* R28: interpolated depth if not 8-wide. */ + c->nr_payload_regs++; + } + } + /* R29: interpolated W set if GEN6_WM_USES_SOURCE_W. + */ + if (uses_depth) { + c->source_w_reg = c->nr_payload_regs; + c->nr_payload_regs++; + if (c->dispatch_width == 16) { + /* R30: interpolated W if not 8-wide. */ + c->nr_payload_regs++; + } + } + /* R31: MSAA position offsets. */ + /* R32-: bary for 32-pixel. */ + /* R58-59: interp W for 32-pixel. */ + + if (c->fp->program.Base.OutputsWritten & + BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { + c->source_depth_to_render_target = GL_TRUE; + c->computes_depth = GL_TRUE; + } + } else { + brw_wm_lookup_iz(intel, c); + } +} + +/** + * All Mesa program -> GPU code generation goes through this function. + * Depending on the instructions used (i.e. flow control instructions) + * we'll use one of two code generators. + */ static void do_wm_prog( struct brw_context *brw, struct brw_fragment_program *fp, struct brw_wm_prog_key *key) @@ -129,63 +191,79 @@ static void do_wm_prog( struct brw_context *brw, c = brw->wm.compile_data; if (c == NULL) { - brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data)); - c = brw->wm.compile_data; + brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data)); + c = brw->wm.compile_data; + if (c == NULL) { + /* Ouch - big out of memory problem. Can't continue + * without triggering a segfault, no way to signal, + * so just return. + */ + return; + } + c->instruction = calloc(1, BRW_WM_MAX_INSN * sizeof(*c->instruction)); + c->prog_instructions = calloc(1, BRW_WM_MAX_INSN * + sizeof(*c->prog_instructions)); + c->vreg = calloc(1, BRW_WM_MAX_VREG * sizeof(*c->vreg)); + c->refs = calloc(1, BRW_WM_MAX_REF * sizeof(*c->refs)); } else { - memset(c, 0, sizeof(*brw->wm.compile_data)); + void *instruction = c->instruction; + void *prog_instructions = c->prog_instructions; + void *vreg = c->vreg; + void *refs = c->refs; + memset(c, 0, sizeof(*brw->wm.compile_data)); + c->instruction = instruction; + c->prog_instructions = prog_instructions; + c->vreg = vreg; + c->refs = refs; } memcpy(&c->key, key, sizeof(*key)); c->fp = fp; c->env_param = brw->intel.ctx.FragmentProgram.Parameters; - brw_init_compile(brw, &c->func); - if (brw_wm_is_glsl(&c->fp->program)) { - brw_wm_glsl_emit(brw, c); - } else { - /* Augment fragment program. Add instructions for pre- and - * post-fragment-program tasks such as interpolation and fogging. - */ - brw_wm_pass_fp(c); - - /* Translate to intermediate representation. Build register usage - * chains. - */ - brw_wm_pass0(c); - - /* Dead code removal. - */ - brw_wm_pass1(c); - - /* Register allocation. - */ - c->grf_limit = BRW_WM_MAX_GRF/2; - - brw_wm_pass2(c); - - c->prog_data.total_grf = c->max_wm_grf; - if (c->last_scratch) { - c->prog_data.total_scratch = - c->last_scratch + 0x40; - } else { - c->prog_data.total_scratch = 0; - } - - /* Emit GEN4 code. - */ - brw_wm_emit(c); + brw_init_compile(brw, &c->func); + + brw_wm_payload_setup(brw, c); + + if (!brw_wm_fs_emit(brw, c)) { + /* + * Shader which use GLSL features such as flow control are handled + * differently from "simple" shaders. + */ + c->dispatch_width = 16; + brw_wm_payload_setup(brw, c); + brw_wm_non_glsl_emit(brw, c); } + c->prog_data.dispatch_width = c->dispatch_width; + + /* Scratch space is used for register spilling */ + if (c->last_scratch) { + /* Per-thread scratch space is power-of-two sized. */ + for (c->prog_data.total_scratch = 1024; + c->prog_data.total_scratch <= c->last_scratch; + c->prog_data.total_scratch *= 2) { + /* empty */ + } + } + else { + c->prog_data.total_scratch = 0; + } + + if (unlikely(INTEL_DEBUG & DEBUG_WM)) + fprintf(stderr, "\n"); + /* get the program */ program = brw_get_program(&c->func, &program_size); - dri_bo_unreference(brw->wm.prog_bo); - brw->wm.prog_bo = brw_upload_cache( &brw->cache, BRW_WM_PROG, - &c->key, sizeof(c->key), - NULL, 0, - program, program_size, - &c->prog_data, - &brw->wm.prog_data ); + drm_intel_bo_unreference(brw->wm.prog_bo); + brw->wm.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_WM_PROG, + &c->key, sizeof(c->key), + NULL, 0, + program, program_size, + &c->prog_data, + sizeof(c->prog_data), + &brw->wm.prog_data); } @@ -193,8 +271,9 @@ static void do_wm_prog( struct brw_context *brw, static void brw_wm_populate_key( struct brw_context *brw, struct brw_wm_prog_key *key ) { + struct gl_context *ctx = &brw->intel.ctx; /* BRW_NEW_FRAGMENT_PROGRAM */ - struct brw_fragment_program *fp = + const struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program; GLuint lookup = 0; GLuint line_aa; @@ -206,79 +285,120 @@ static void brw_wm_populate_key( struct brw_context *brw, */ /* _NEW_COLOR */ if (fp->program.UsesKill || - brw->attribs.Color->AlphaEnabled) + ctx->Color.AlphaEnabled) lookup |= IZ_PS_KILL_ALPHATEST_BIT; - if (fp->program.Base.OutputsWritten & (1<program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) lookup |= IZ_PS_COMPUTES_DEPTH_BIT; /* _NEW_DEPTH */ - if (brw->attribs.Depth->Test) + if (ctx->Depth.Test) lookup |= IZ_DEPTH_TEST_ENABLE_BIT; - if (brw->attribs.Depth->Test && - brw->attribs.Depth->Mask) /* ?? */ + if (ctx->Depth.Test && + ctx->Depth.Mask) /* ?? */ lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; /* _NEW_STENCIL */ - if (brw->attribs.Stencil->Enabled) { + if (ctx->Stencil._Enabled) { lookup |= IZ_STENCIL_TEST_ENABLE_BIT; - if (brw->attribs.Stencil->WriteMask[0] || - (brw->attribs.Stencil->_TestTwoSide && - brw->attribs.Stencil->WriteMask[1])) + if (ctx->Stencil.WriteMask[0] || + ctx->Stencil.WriteMask[ctx->Stencil._BackFace]) lookup |= IZ_STENCIL_WRITE_ENABLE_BIT; } line_aa = AA_NEVER; /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */ - if (brw->attribs.Line->SmoothFlag) { + if (ctx->Line.SmoothFlag) { if (brw->intel.reduced_primitive == GL_LINES) { line_aa = AA_ALWAYS; } else if (brw->intel.reduced_primitive == GL_TRIANGLES) { - if (brw->attribs.Polygon->FrontMode == GL_LINE) { + if (ctx->Polygon.FrontMode == GL_LINE) { line_aa = AA_SOMETIMES; - if (brw->attribs.Polygon->BackMode == GL_LINE || - (brw->attribs.Polygon->CullFlag && - brw->attribs.Polygon->CullFaceMode == GL_BACK)) + if (ctx->Polygon.BackMode == GL_LINE || + (ctx->Polygon.CullFlag && + ctx->Polygon.CullFaceMode == GL_BACK)) line_aa = AA_ALWAYS; } - else if (brw->attribs.Polygon->BackMode == GL_LINE) { + else if (ctx->Polygon.BackMode == GL_LINE) { line_aa = AA_SOMETIMES; - if ((brw->attribs.Polygon->CullFlag && - brw->attribs.Polygon->CullFaceMode == GL_FRONT)) + if ((ctx->Polygon.CullFlag && + ctx->Polygon.CullFaceMode == GL_FRONT)) line_aa = AA_ALWAYS; } } } - - brw_wm_lookup_iz(line_aa, - lookup, - key); + key->iz_lookup = lookup; + key->line_aa = line_aa; + key->stats_wm = brw->intel.stats_wm; /* BRW_NEW_WM_INPUT_DIMENSIONS */ - key->projtex_mask = brw->wm.input_size_masks[4-1] >> (FRAG_ATTRIB_TEX0 - FRAG_ATTRIB_WPOS); + key->proj_attrib_mask = brw->wm.input_size_masks[4-1]; /* _NEW_LIGHT */ - key->flat_shade = (brw->attribs.Light->ShadeModel == GL_FLAT); + key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT); + + /* _NEW_HINT */ + key->linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST); /* _NEW_TEXTURE */ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - const struct gl_texture_unit *unit = &brw->attribs.Texture->Unit[i]; - const struct gl_texture_object *t = unit->_Current; + const struct gl_texture_unit *unit = &ctx->Texture.Unit[i]; if (unit->_ReallyEnabled) { - if (t->Image[0][t->BaseLevel]->InternalFormat == GL_YCBCR_MESA) { - key->yuvtex_mask |= 1<Image[0][t->BaseLevel]->TexFormat->MesaFormat == - MESA_FORMAT_YCBCR) - key->yuvtex_swap_mask |= 1<< i; + const struct gl_texture_object *t = unit->_Current; + const struct gl_texture_image *img = t->Image[0][t->BaseLevel]; + int swizzles[SWIZZLE_NIL + 1] = { + SWIZZLE_X, + SWIZZLE_Y, + SWIZZLE_Z, + SWIZZLE_W, + SWIZZLE_ZERO, + SWIZZLE_ONE, + SWIZZLE_NIL + }; + + key->tex_swizzles[i] = SWIZZLE_NOOP; + + /* GL_DEPTH_TEXTURE_MODE is normally handled through + * brw_wm_surface_state, but it applies to shadow compares as + * well and our shadow compares always return the result in + * all 4 channels. + */ + if (t->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB) { + if (t->DepthMode == GL_ALPHA) { + swizzles[0] = SWIZZLE_ZERO; + swizzles[1] = SWIZZLE_ZERO; + swizzles[2] = SWIZZLE_ZERO; + } else if (t->DepthMode == GL_LUMINANCE) { + swizzles[3] = SWIZZLE_ONE; + } else if (t->DepthMode == GL_RED) { + swizzles[1] = SWIZZLE_ZERO; + swizzles[2] = SWIZZLE_ZERO; + swizzles[3] = SWIZZLE_ZERO; + } + } + + if (img->InternalFormat == GL_YCBCR_MESA) { + key->yuvtex_mask |= 1 << i; + if (img->TexFormat == MESA_FORMAT_YCBCR) + key->yuvtex_swap_mask |= 1 << i; } + + key->tex_swizzles[i] = + MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)], + swizzles[GET_SWZ(t->_Swizzle, 1)], + swizzles[GET_SWZ(t->_Swizzle, 2)], + swizzles[GET_SWZ(t->_Swizzle, 3)]); + } + else { + key->tex_swizzles[i] = SWIZZLE_NOOP; } } @@ -292,6 +412,9 @@ static void brw_wm_populate_key( struct brw_context *brw, * from the incoming screen origin relative position we get as part of our * payload. * + * This is only needed for the WM_WPOSXY opcode when the fragment program + * uses the gl_FragCoord input. + * * We could avoid recompiling by including this as a constant referenced by * our program, but if we were to do that it would also be nice to handle * getting that constant updated at batchbuffer submit time (when we @@ -300,19 +423,21 @@ static void brw_wm_populate_key( struct brw_context *brw, * just avoid using this as key data if the program doesn't use * fragment.position. * - * This pretty much becomes moot with DRI2 and redirected buffers anyway, - * as our origins will always be zero then. + * For DRI2 the origin_x/y will always be (0,0) but we still need the + * drawable height in order to invert the Y axis. */ - if (brw->intel.driDrawable != NULL) { - key->origin_x = brw->intel.driDrawable->x; - key->origin_y = brw->intel.driDrawable->y; - key->drawable_height = brw->intel.driDrawable->h; + if (fp->program.Base.InputsRead & FRAG_BIT_WPOS) { + key->drawable_height = ctx->DrawBuffer->Height; + key->render_to_fbo = ctx->DrawBuffer->Name != 0; } - /* Extra info: - */ - key->program_string_id = fp->id; + key->nr_color_regions = brw->state.nr_color_regions; + + /* CACHE_NEW_VS_PROG */ + key->vp_outputs_written = brw->vs.prog_data->outputs_written; + /* The unique fragment program ID */ + key->program_string_id = fp->id; } @@ -326,7 +451,7 @@ static void brw_prepare_wm_prog(struct brw_context *brw) /* Make an early check for the key. */ - dri_bo_unreference(brw->wm.prog_bo); + drm_intel_bo_unreference(brw->wm.prog_bo); brw->wm.prog_bo = brw_search_cache(&brw->cache, BRW_WM_PROG, &key, sizeof(key), NULL, 0, @@ -336,12 +461,11 @@ static void brw_prepare_wm_prog(struct brw_context *brw) } -/* See brw_wm.c: - */ const struct brw_tracked_state brw_wm_prog = { .dirty = { .mesa = (_NEW_COLOR | _NEW_DEPTH | + _NEW_HINT | _NEW_STENCIL | _NEW_POLYGON | _NEW_LINE | @@ -351,7 +475,7 @@ const struct brw_tracked_state brw_wm_prog = { .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_WM_INPUT_DIMENSIONS | BRW_NEW_REDUCED_PRIMITIVE), - .cache = 0 + .cache = CACHE_NEW_VS_PROG, }, .prepare = brw_prepare_wm_prog };