X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_sampler_state.c;h=0eed8f962ee4d27390e787474ee40ec6526de085;hb=ac08bc8ac220f22333536a9f881fde1e5607148e;hp=5855af6d2bfa16a84f500be3e3447d2b7e9c0c4d;hpb=d8c5c4f3e480a6edfd9b4b83c35cee07c70dd421;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_sampler_state.c b/src/mesa/drivers/dri/i965/brw_sampler_state.c index 5855af6d2bf..0eed8f962ee 100644 --- a/src/mesa/drivers/dri/i965/brw_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_sampler_state.c @@ -44,6 +44,7 @@ #include "main/macros.h" #include "main/samplerobj.h" +#include "util/half_float.h" /** * Emit a 3DSTATE_SAMPLER_STATE_POINTERS_{VS,HS,GS,DS,PS} packet. @@ -54,6 +55,8 @@ gen7_emit_sampler_state_pointers_xs(struct brw_context *brw, { static const uint16_t packet_headers[] = { [MESA_SHADER_VERTEX] = _3DSTATE_SAMPLER_STATE_POINTERS_VS, + [MESA_SHADER_TESS_CTRL] = _3DSTATE_SAMPLER_STATE_POINTERS_HS, + [MESA_SHADER_TESS_EVAL] = _3DSTATE_SAMPLER_STATE_POINTERS_DS, [MESA_SHADER_GEOMETRY] = _3DSTATE_SAMPLER_STATE_POINTERS_GS, [MESA_SHADER_FRAGMENT] = _3DSTATE_SAMPLER_STATE_POINTERS_PS, }; @@ -88,13 +91,11 @@ brw_emit_sampler_state(struct brw_context *brw, unsigned min_lod, unsigned max_lod, int lod_bias, - unsigned base_level, unsigned shadow_function, bool non_normalized_coordinates, uint32_t border_color_offset) { ss[0] = BRW_SAMPLER_LOD_PRECLAMP_ENABLE | - SET_FIELD(base_level, BRW_SAMPLER_BASE_MIPLEVEL) | SET_FIELD(mip_filter, BRW_SAMPLER_MIP_FILTER) | SET_FIELD(mag_filter, BRW_SAMPLER_MAG_FILTER) | SET_FIELD(min_filter, BRW_SAMPLER_MIN_FILTER); @@ -185,61 +186,78 @@ translate_wrap_mode(struct brw_context *brw, GLenum wrap, bool using_nearest) } } +/** + * Return true if the given wrap mode requires the border color to exist. + */ +static bool +wrap_mode_needs_border_color(unsigned wrap_mode) +{ + return wrap_mode == BRW_TEXCOORDMODE_CLAMP_BORDER || + wrap_mode == GEN8_TEXCOORDMODE_HALF_BORDER; +} + +static bool +has_component(mesa_format format, int i) +{ + if (_mesa_is_format_color_format(format)) + return _mesa_format_has_color_component(format, i); + + /* depth and stencil have only one component */ + return i == 0; +} + /** * Upload SAMPLER_BORDER_COLOR_STATE. */ static void upload_default_color(struct brw_context *brw, const struct gl_sampler_object *sampler, - int unit, + mesa_format format, GLenum base_format, + bool is_integer_format, uint32_t *sdc_offset) { - struct gl_context *ctx = &brw->ctx; - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - struct gl_texture_object *texObj = texUnit->_Current; - struct gl_texture_image *firstImage = texObj->Image[0][texObj->BaseLevel]; - float color[4]; + union gl_color_union color; - switch (firstImage->_BaseFormat) { + switch (base_format) { case GL_DEPTH_COMPONENT: /* GL specs that border color for depth textures is taken from the * R channel, while the hardware uses A. Spam R into all the * channels for safety. */ - color[0] = sampler->BorderColor.f[0]; - color[1] = sampler->BorderColor.f[0]; - color[2] = sampler->BorderColor.f[0]; - color[3] = sampler->BorderColor.f[0]; + color.ui[0] = sampler->BorderColor.ui[0]; + color.ui[1] = sampler->BorderColor.ui[0]; + color.ui[2] = sampler->BorderColor.ui[0]; + color.ui[3] = sampler->BorderColor.ui[0]; break; case GL_ALPHA: - color[0] = 0.0; - color[1] = 0.0; - color[2] = 0.0; - color[3] = sampler->BorderColor.f[3]; + color.ui[0] = 0u; + color.ui[1] = 0u; + color.ui[2] = 0u; + color.ui[3] = sampler->BorderColor.ui[3]; break; case GL_INTENSITY: - color[0] = sampler->BorderColor.f[0]; - color[1] = sampler->BorderColor.f[0]; - color[2] = sampler->BorderColor.f[0]; - color[3] = sampler->BorderColor.f[0]; + color.ui[0] = sampler->BorderColor.ui[0]; + color.ui[1] = sampler->BorderColor.ui[0]; + color.ui[2] = sampler->BorderColor.ui[0]; + color.ui[3] = sampler->BorderColor.ui[0]; break; case GL_LUMINANCE: - color[0] = sampler->BorderColor.f[0]; - color[1] = sampler->BorderColor.f[0]; - color[2] = sampler->BorderColor.f[0]; - color[3] = 1.0; + color.ui[0] = sampler->BorderColor.ui[0]; + color.ui[1] = sampler->BorderColor.ui[0]; + color.ui[2] = sampler->BorderColor.ui[0]; + color.ui[3] = float_as_int(1.0); break; case GL_LUMINANCE_ALPHA: - color[0] = sampler->BorderColor.f[0]; - color[1] = sampler->BorderColor.f[0]; - color[2] = sampler->BorderColor.f[0]; - color[3] = sampler->BorderColor.f[3]; + color.ui[0] = sampler->BorderColor.ui[0]; + color.ui[1] = sampler->BorderColor.ui[0]; + color.ui[2] = sampler->BorderColor.ui[0]; + color.ui[3] = sampler->BorderColor.ui[3]; break; default: - color[0] = sampler->BorderColor.f[0]; - color[1] = sampler->BorderColor.f[1]; - color[2] = sampler->BorderColor.f[2]; - color[3] = sampler->BorderColor.f[3]; + color.ui[0] = sampler->BorderColor.ui[0]; + color.ui[1] = sampler->BorderColor.ui[1]; + color.ui[2] = sampler->BorderColor.ui[2]; + color.ui[3] = sampler->BorderColor.ui[3]; break; } @@ -247,19 +265,82 @@ upload_default_color(struct brw_context *brw, * where we've initialized the A channel to 1.0. We also have to set * the border color alpha to 1.0 in that case. */ - if (firstImage->_BaseFormat == GL_RGB) - color[3] = 1.0; + if (base_format == GL_RGB) + color.ui[3] = float_as_int(1.0); if (brw->gen >= 8) { /* On Broadwell, the border color is represented as four 32-bit floats, * integers, or unsigned values, interpreted according to the surface - * format. This matches the sampler->BorderColor union exactly. Since - * we use floats both here and in the above reswizzling code, we preserve - * the original bit pattern. So we actually handle all three formats. + * format. This matches the sampler->BorderColor union exactly; just + * memcpy the values. */ - float *sdc = brw_state_batch(brw, AUB_TRACE_SAMPLER_DEFAULT_COLOR, - 4 * 4, 64, sdc_offset); - COPY_4FV(sdc, color); + uint32_t *sdc = brw_state_batch(brw, AUB_TRACE_SAMPLER_DEFAULT_COLOR, + 4 * 4, 64, sdc_offset); + memcpy(sdc, color.ui, 4 * 4); + } else if (brw->is_haswell && is_integer_format) { + /* Haswell's integer border color support is completely insane: + * SAMPLER_BORDER_COLOR_STATE is 20 DWords. The first four are + * for float colors. The next 12 DWords are MBZ and only exist to + * pad it out to a 64 byte cacheline boundary. DWords 16-19 then + * contain integer colors; these are only used if SURFACE_STATE + * has the "Integer Surface Format" bit set. Even then, the + * arrangement of the RGBA data devolves into madness. + */ + uint32_t *sdc = brw_state_batch(brw, AUB_TRACE_SAMPLER_DEFAULT_COLOR, + 20 * 4, 512, sdc_offset); + memset(sdc, 0, 20 * 4); + sdc = &sdc[16]; + + const int bits_per_channel = + _mesa_get_format_bits(format, + format == MESA_FORMAT_S_UINT8 ? + GL_STENCIL_BITS : GL_RED_BITS); + + /* From the Haswell PRM, "Command Reference: Structures", Page 36: + * "If any color channel is missing from the surface format, + * corresponding border color should be programmed as zero and if + * alpha channel is missing, corresponding Alpha border color should + * be programmed as 1." + */ + unsigned c[4] = { 0, 0, 0, 1 }; + for (int i = 0; i < 4; i++) { + if (has_component(format, i)) + c[i] = color.ui[i]; + } + + switch (bits_per_channel) { + case 8: + /* Copy RGBA in order. */ + for (int i = 0; i < 4; i++) + ((uint8_t *) sdc)[i] = c[i]; + break; + case 10: + /* R10G10B10A2_UINT is treated like a 16-bit format. */ + case 16: + ((uint16_t *) sdc)[0] = c[0]; /* R -> DWord 0, bits 15:0 */ + ((uint16_t *) sdc)[1] = c[1]; /* G -> DWord 0, bits 31:16 */ + /* DWord 1 is Reserved/MBZ! */ + ((uint16_t *) sdc)[4] = c[2]; /* B -> DWord 2, bits 15:0 */ + ((uint16_t *) sdc)[5] = c[3]; /* A -> DWord 3, bits 31:16 */ + break; + case 32: + if (base_format == GL_RG) { + /* Careful inspection of the tables reveals that for RG32 formats, + * the green channel needs to go where blue normally belongs. + */ + sdc[0] = c[0]; + sdc[2] = c[1]; + sdc[3] = 1; + } else { + /* Copy RGBA in order. */ + for (int i = 0; i < 4; i++) + sdc[i] = c[i]; + } + break; + default: + assert(!"Invalid number of bits per channel in integer format."); + break; + } } else if (brw->gen == 5 || brw->gen == 6) { struct gen5_sampler_default_color *sdc; @@ -268,39 +349,39 @@ upload_default_color(struct brw_context *brw, memset(sdc, 0, sizeof(*sdc)); - UNCLAMPED_FLOAT_TO_UBYTE(sdc->ub[0], color[0]); - UNCLAMPED_FLOAT_TO_UBYTE(sdc->ub[1], color[1]); - UNCLAMPED_FLOAT_TO_UBYTE(sdc->ub[2], color[2]); - UNCLAMPED_FLOAT_TO_UBYTE(sdc->ub[3], color[3]); + UNCLAMPED_FLOAT_TO_UBYTE(sdc->ub[0], color.f[0]); + UNCLAMPED_FLOAT_TO_UBYTE(sdc->ub[1], color.f[1]); + UNCLAMPED_FLOAT_TO_UBYTE(sdc->ub[2], color.f[2]); + UNCLAMPED_FLOAT_TO_UBYTE(sdc->ub[3], color.f[3]); - UNCLAMPED_FLOAT_TO_USHORT(sdc->us[0], color[0]); - UNCLAMPED_FLOAT_TO_USHORT(sdc->us[1], color[1]); - UNCLAMPED_FLOAT_TO_USHORT(sdc->us[2], color[2]); - UNCLAMPED_FLOAT_TO_USHORT(sdc->us[3], color[3]); + UNCLAMPED_FLOAT_TO_USHORT(sdc->us[0], color.f[0]); + UNCLAMPED_FLOAT_TO_USHORT(sdc->us[1], color.f[1]); + UNCLAMPED_FLOAT_TO_USHORT(sdc->us[2], color.f[2]); + UNCLAMPED_FLOAT_TO_USHORT(sdc->us[3], color.f[3]); - UNCLAMPED_FLOAT_TO_SHORT(sdc->s[0], color[0]); - UNCLAMPED_FLOAT_TO_SHORT(sdc->s[1], color[1]); - UNCLAMPED_FLOAT_TO_SHORT(sdc->s[2], color[2]); - UNCLAMPED_FLOAT_TO_SHORT(sdc->s[3], color[3]); + UNCLAMPED_FLOAT_TO_SHORT(sdc->s[0], color.f[0]); + UNCLAMPED_FLOAT_TO_SHORT(sdc->s[1], color.f[1]); + UNCLAMPED_FLOAT_TO_SHORT(sdc->s[2], color.f[2]); + UNCLAMPED_FLOAT_TO_SHORT(sdc->s[3], color.f[3]); - sdc->hf[0] = _mesa_float_to_half(color[0]); - sdc->hf[1] = _mesa_float_to_half(color[1]); - sdc->hf[2] = _mesa_float_to_half(color[2]); - sdc->hf[3] = _mesa_float_to_half(color[3]); + sdc->hf[0] = _mesa_float_to_half(color.f[0]); + sdc->hf[1] = _mesa_float_to_half(color.f[1]); + sdc->hf[2] = _mesa_float_to_half(color.f[2]); + sdc->hf[3] = _mesa_float_to_half(color.f[3]); sdc->b[0] = sdc->s[0] >> 8; sdc->b[1] = sdc->s[1] >> 8; sdc->b[2] = sdc->s[2] >> 8; sdc->b[3] = sdc->s[3] >> 8; - sdc->f[0] = color[0]; - sdc->f[1] = color[1]; - sdc->f[2] = color[2]; - sdc->f[3] = color[3]; + sdc->f[0] = color.f[0]; + sdc->f[1] = color.f[1]; + sdc->f[2] = color.f[2]; + sdc->f[3] = color.f[3]; } else { float *sdc = brw_state_batch(brw, AUB_TRACE_SAMPLER_DEFAULT_COLOR, 4 * 4, 32, sdc_offset); - memcpy(sdc, color, 4 * 4); + memcpy(sdc, color.f, 4 * 4); } } @@ -308,21 +389,16 @@ upload_default_color(struct brw_context *brw, * Sets the sampler state for a single unit based off of the sampler key * entry. */ -static void +void brw_update_sampler_state(struct brw_context *brw, - int unit, + GLenum target, bool tex_cube_map_seamless, + GLfloat tex_unit_lod_bias, + mesa_format format, GLenum base_format, + bool is_integer_format, + const struct gl_sampler_object *sampler, uint32_t *sampler_state, uint32_t batch_offset_for_sampler_state) { - struct gl_context *ctx = &brw->ctx; - const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - const struct gl_texture_object *texObj = texUnit->_Current; - const struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit); - - /* These don't use samplers at all. */ - if (texObj->Target == GL_TEXTURE_BUFFER) - return; - unsigned min_filter, mag_filter, mip_filter; /* Select min and mip filters. */ @@ -363,11 +439,11 @@ brw_update_sampler_state(struct brw_context *brw, /* Enable anisotropic filtering if desired. */ unsigned max_anisotropy = BRW_ANISORATIO_2; - if (sampler->MaxAnisotropy > 1.0) { + if (sampler->MaxAnisotropy > 1.0f) { min_filter = BRW_MAPFILTER_ANISOTROPIC; mag_filter = BRW_MAPFILTER_ANISOTROPIC; - if (sampler->MaxAnisotropy > 2.0) { + if (sampler->MaxAnisotropy > 2.0f) { max_anisotropy = MIN2((sampler->MaxAnisotropy - 2) / 2, BRW_ANISORATIO_16); } @@ -392,14 +468,16 @@ brw_update_sampler_state(struct brw_context *brw, unsigned wrap_t = translate_wrap_mode(brw, sampler->WrapT, either_nearest); unsigned wrap_r = translate_wrap_mode(brw, sampler->WrapR, either_nearest); - if (texObj->Target == GL_TEXTURE_CUBE_MAP || - texObj->Target == GL_TEXTURE_CUBE_MAP_ARRAY) { + if (target == GL_TEXTURE_CUBE_MAP || + target == GL_TEXTURE_CUBE_MAP_ARRAY) { /* Cube maps must use the same wrap mode for all three coordinate * dimensions. Prior to Haswell, only CUBE and CLAMP are valid. + * + * Ivybridge and Baytrail seem to have problems with CUBE mode and + * integer formats. Fall back to CLAMP for now. */ - if ((ctx->Texture.CubeMapSeamless || sampler->CubeMapSeamless) && - (sampler->MinFilter != GL_NEAREST || - sampler->MagFilter != GL_NEAREST)) { + if ((tex_cube_map_seamless || sampler->CubeMapSeamless) && + !(brw->gen == 7 && !brw->is_haswell && is_integer_format)) { wrap_s = BRW_TEXCOORDMODE_CUBE; wrap_t = BRW_TEXCOORDMODE_CUBE; wrap_r = BRW_TEXCOORDMODE_CUBE; @@ -408,7 +486,7 @@ brw_update_sampler_state(struct brw_context *brw, wrap_t = BRW_TEXCOORDMODE_CLAMP; wrap_r = BRW_TEXCOORDMODE_CLAMP; } - } else if (texObj->Target == GL_TEXTURE_1D) { + } else if (target == GL_TEXTURE_1D) { /* There's a bug in 1D texture sampling - it actually pays * attention to the wrap_t value, though it should not. * Override the wrap_t value here to GL_REPEAT to keep @@ -428,13 +506,22 @@ brw_update_sampler_state(struct brw_context *brw, const unsigned min_lod = U_FIXED(CLAMP(sampler->MinLod, 0, 13), lod_bits); const unsigned max_lod = U_FIXED(CLAMP(sampler->MaxLod, 0, 13), lod_bits); const int lod_bias = - S_FIXED(CLAMP(texUnit->LodBias + sampler->LodBias, -16, 15), lod_bits); - const unsigned base_level = U_FIXED(0, 1); + S_FIXED(CLAMP(tex_unit_lod_bias + sampler->LodBias, -16, 15), lod_bits); - uint32_t border_color_offset; - upload_default_color(brw, sampler, unit, &border_color_offset); + /* Upload the border color if necessary. If not, just point it at + * offset 0 (the start of the batch) - the color should be ignored, + * but that address won't fault in case something reads it anyway. + */ + uint32_t border_color_offset = 0; + if (wrap_mode_needs_border_color(wrap_s) || + wrap_mode_needs_border_color(wrap_t) || + wrap_mode_needs_border_color(wrap_r)) { + upload_default_color(brw, sampler, + format, base_format, is_integer_format, + &border_color_offset); + } - const bool non_normalized_coords = texObj->Target == GL_TEXTURE_RECTANGLE; + const bool non_normalized_coords = target == GL_TEXTURE_RECTANGLE; brw_emit_sampler_state(brw, sampler_state, @@ -443,12 +530,35 @@ brw_update_sampler_state(struct brw_context *brw, max_anisotropy, address_rounding, wrap_s, wrap_t, wrap_r, - min_lod, max_lod, lod_bias, base_level, + min_lod, max_lod, lod_bias, shadow_function, non_normalized_coords, border_color_offset); } +static void +update_sampler_state(struct brw_context *brw, + int unit, + uint32_t *sampler_state, + uint32_t batch_offset_for_sampler_state) +{ + struct gl_context *ctx = &brw->ctx; + const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + const struct gl_texture_object *texObj = texUnit->_Current; + const struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit); + + /* These don't use samplers at all. */ + if (texObj->Target == GL_TEXTURE_BUFFER) + return; + + struct gl_texture_image *firstImage = texObj->Image[0][texObj->BaseLevel]; + brw_update_sampler_state(brw, texObj->Target, ctx->Texture.CubeMapSeamless, + texUnit->LodBias, + firstImage->TexFormat, firstImage->_BaseFormat, + texObj->_IsIntegerFormat, + sampler, + sampler_state, batch_offset_for_sampler_state); +} static void brw_upload_sampler_state_table(struct brw_context *brw, @@ -478,7 +588,7 @@ brw_upload_sampler_state_table(struct brw_context *brw, if (SamplersUsed & (1 << s)) { const unsigned unit = prog->SamplerUnits[s]; if (ctx->Texture.Unit[unit]._Current) { - brw_update_sampler_state(brw, unit, sampler_state, + update_sampler_state(brw, unit, sampler_state, batch_offset_for_sampler_state); } } @@ -487,14 +597,14 @@ brw_upload_sampler_state_table(struct brw_context *brw, batch_offset_for_sampler_state += size_in_bytes; } - if (brw->gen >= 7) { + if (brw->gen >= 7 && stage_state->stage != MESA_SHADER_COMPUTE) { /* Emit a 3DSTATE_SAMPLER_STATE_POINTERS_XS packet. */ gen7_emit_sampler_state_pointers_xs(brw, stage_state); } else { /* Flag that the sampler state table pointer has changed; later atoms * will handle it. */ - brw->state.dirty.cache |= CACHE_NEW_SAMPLER; + brw->ctx.NewDriverState |= BRW_NEW_SAMPLER_STATE_TABLE; } } @@ -510,8 +620,8 @@ const struct brw_tracked_state brw_fs_samplers = { .dirty = { .mesa = _NEW_TEXTURE, .brw = BRW_NEW_BATCH | + BRW_NEW_BLORP | BRW_NEW_FRAGMENT_PROGRAM, - .cache = 0 }, .emit = brw_upload_fs_samplers, }; @@ -529,8 +639,8 @@ const struct brw_tracked_state brw_vs_samplers = { .dirty = { .mesa = _NEW_TEXTURE, .brw = BRW_NEW_BATCH | + BRW_NEW_BLORP | BRW_NEW_VERTEX_PROGRAM, - .cache = 0 }, .emit = brw_upload_vs_samplers, }; @@ -552,8 +662,75 @@ const struct brw_tracked_state brw_gs_samplers = { .dirty = { .mesa = _NEW_TEXTURE, .brw = BRW_NEW_BATCH | + BRW_NEW_BLORP | BRW_NEW_GEOMETRY_PROGRAM, - .cache = 0 }, .emit = brw_upload_gs_samplers, }; + + +static void +brw_upload_tcs_samplers(struct brw_context *brw) +{ + /* BRW_NEW_TESS_PROGRAMS */ + struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program; + if (!tcs) + return; + + brw_upload_sampler_state_table(brw, tcs, &brw->tcs.base); +} + + +const struct brw_tracked_state brw_tcs_samplers = { + .dirty = { + .mesa = _NEW_TEXTURE, + .brw = BRW_NEW_BATCH | + BRW_NEW_BLORP | + BRW_NEW_TESS_PROGRAMS, + }, + .emit = brw_upload_tcs_samplers, +}; + + +static void +brw_upload_tes_samplers(struct brw_context *brw) +{ + /* BRW_NEW_TESS_PROGRAMS */ + struct gl_program *tes = (struct gl_program *) brw->tess_eval_program; + if (!tes) + return; + + brw_upload_sampler_state_table(brw, tes, &brw->tes.base); +} + + +const struct brw_tracked_state brw_tes_samplers = { + .dirty = { + .mesa = _NEW_TEXTURE, + .brw = BRW_NEW_BATCH | + BRW_NEW_BLORP | + BRW_NEW_TESS_PROGRAMS, + }, + .emit = brw_upload_tes_samplers, +}; + +static void +brw_upload_cs_samplers(struct brw_context *brw) +{ + /* BRW_NEW_COMPUTE_PROGRAM */ + struct gl_program *cs = (struct gl_program *) brw->compute_program; + if (!cs) + return; + + brw_upload_sampler_state_table(brw, cs, &brw->cs.base); +} + +const struct brw_tracked_state brw_cs_samplers = { + .dirty = { + .mesa = _NEW_TEXTURE, + .brw = BRW_NEW_BATCH | + BRW_NEW_BLORP | + BRW_NEW_COMPUTE_PROGRAM, + }, + .emit = brw_upload_cs_samplers, +};