From b3c9e2161f22c3c6b8b2fc83ea4bc7ea883329d7 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Fri, 31 May 2013 11:52:47 +0800 Subject: [PATCH] ilo: introduce sampler CSO Introduce ilo_sampler_cso and initialize it in create_sampler_state(). This saves us from having to perform CPU-intensive calculations to construct hardware sampler states in draw_vbo(). --- .../drivers/ilo/ilo_3d_pipeline_gen6.c | 6 +- src/gallium/drivers/ilo/ilo_blit.c | 2 +- src/gallium/drivers/ilo/ilo_gpe.h | 23 +- src/gallium/drivers/ilo/ilo_gpe_gen6.c | 733 ++++++++++-------- src/gallium/drivers/ilo/ilo_gpe_gen6.h | 6 +- src/gallium/drivers/ilo/ilo_gpe_gen7.c | 20 +- src/gallium/drivers/ilo/ilo_shader.c | 15 +- src/gallium/drivers/ilo/ilo_state.c | 13 +- 8 files changed, 447 insertions(+), 371 deletions(-) diff --git a/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c index 43f0056522c..81cd6c22eaa 100644 --- a/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c +++ b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c @@ -1123,8 +1123,8 @@ gen6_pipeline_state_samplers(struct ilo_3d_pipeline *p, int shader_type, struct gen6_pipeline_session *session) { - const struct pipe_sampler_state **samplers = - (const struct pipe_sampler_state **) ilo->sampler[shader_type].states; + const struct ilo_sampler_cso * const *samplers = + ilo->sampler[shader_type].cso; const struct pipe_sampler_view **views = (const struct pipe_sampler_view **) ilo->view[shader_type].states; const int num_samplers = ilo->sampler[shader_type].count; @@ -1177,7 +1177,7 @@ gen6_pipeline_state_samplers(struct ilo_3d_pipeline *p, for (i = 0; i < num_samplers; i++) { border_color_state[i] = (samplers[i]) ? p->gen6_SAMPLER_BORDER_COLOR_STATE(p->dev, - &samplers[i]->border_color, p->cp) : 0; + samplers[i], p->cp) : 0; } } diff --git a/src/gallium/drivers/ilo/ilo_blit.c b/src/gallium/drivers/ilo/ilo_blit.c index 362128c2aa8..06a93c54739 100644 --- a/src/gallium/drivers/ilo/ilo_blit.c +++ b/src/gallium/drivers/ilo/ilo_blit.c @@ -570,7 +570,7 @@ ilo_blitter_begin(struct ilo_context *ilo, enum ilo_blitter_op op) util_blitter_save_fragment_sampler_states(ilo->blitter, ilo->sampler[PIPE_SHADER_FRAGMENT].count, - (void **) ilo->sampler[PIPE_SHADER_FRAGMENT].states); + (void **) ilo->sampler[PIPE_SHADER_FRAGMENT].cso); util_blitter_save_fragment_sampler_views(ilo->blitter, ilo->view[PIPE_SHADER_FRAGMENT].count, diff --git a/src/gallium/drivers/ilo/ilo_gpe.h b/src/gallium/drivers/ilo/ilo_gpe.h index 810a625730c..467a6b3a216 100644 --- a/src/gallium/drivers/ilo/ilo_gpe.h +++ b/src/gallium/drivers/ilo/ilo_gpe.h @@ -111,8 +111,24 @@ struct ilo_blend_state { struct pipe_blend_state state; }; +struct ilo_sampler_cso { + /* SAMPLER_STATE and SAMPLER_BORDER_COLOR_STATE */ + uint32_t payload[15]; + + uint32_t dw_filter; + uint32_t dw_filter_aniso; + uint32_t dw_wrap; + uint32_t dw_wrap_1d; + uint32_t dw_wrap_cube; + + bool anisotropic; + bool saturate_r; + bool saturate_s; + bool saturate_t; +}; + struct ilo_sampler_state { - struct pipe_sampler_state *states[ILO_MAX_SAMPLERS]; + const struct ilo_sampler_cso *cso[ILO_MAX_SAMPLERS]; unsigned count; }; @@ -175,4 +191,9 @@ void ilo_gpe_set_scissor_null(const struct ilo_dev_info *dev, struct ilo_scissor_state *scissor); +void +ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev, + const struct pipe_sampler_state *state, + struct ilo_sampler_cso *sampler); + #endif /* ILO_GPE_H */ diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen6.c b/src/gallium/drivers/ilo/ilo_gpe_gen6.c index 91dff27be72..082392bdeea 100644 --- a/src/gallium/drivers/ilo/ilo_gpe_gen6.c +++ b/src/gallium/drivers/ilo/ilo_gpe_gen6.c @@ -4114,10 +4114,368 @@ gen6_emit_so_SURFACE_STATE(const struct ilo_dev_info *dev, return gen6_emit_SURFACE_STATE(dev, buf->bo, false, dw, Elements(dw), cp); } +static void +sampler_init_border_color_gen6(const struct ilo_dev_info *dev, + const union pipe_color_union *color, + uint32_t *dw, int num_dwords) +{ + float rgba[4] = { + color->f[0], color->f[1], color->f[2], color->f[3], + }; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + assert(num_dwords >= 12); + + /* + * This state is not documented in the Sandy Bridge PRM, but in the + * Ironlake PRM. SNORM8 seems to be in DW11 instead of DW1. + */ + + /* IEEE_FP */ + dw[1] = fui(rgba[0]); + dw[2] = fui(rgba[1]); + dw[3] = fui(rgba[2]); + dw[4] = fui(rgba[3]); + + /* FLOAT_16 */ + dw[5] = util_float_to_half(rgba[0]) | + util_float_to_half(rgba[1]) << 16; + dw[6] = util_float_to_half(rgba[2]) | + util_float_to_half(rgba[3]) << 16; + + /* clamp to [-1.0f, 1.0f] */ + rgba[0] = CLAMP(rgba[0], -1.0f, 1.0f); + rgba[1] = CLAMP(rgba[1], -1.0f, 1.0f); + rgba[2] = CLAMP(rgba[2], -1.0f, 1.0f); + rgba[3] = CLAMP(rgba[3], -1.0f, 1.0f); + + /* SNORM16 */ + dw[9] = (int16_t) util_iround(rgba[0] * 32767.0f) | + (int16_t) util_iround(rgba[1] * 32767.0f) << 16; + dw[10] = (int16_t) util_iround(rgba[2] * 32767.0f) | + (int16_t) util_iround(rgba[3] * 32767.0f) << 16; + + /* SNORM8 */ + dw[11] = (int8_t) util_iround(rgba[0] * 127.0f) | + (int8_t) util_iround(rgba[1] * 127.0f) << 8 | + (int8_t) util_iround(rgba[2] * 127.0f) << 16 | + (int8_t) util_iround(rgba[3] * 127.0f) << 24; + + /* clamp to [0.0f, 1.0f] */ + rgba[0] = CLAMP(rgba[0], 0.0f, 1.0f); + rgba[1] = CLAMP(rgba[1], 0.0f, 1.0f); + rgba[2] = CLAMP(rgba[2], 0.0f, 1.0f); + rgba[3] = CLAMP(rgba[3], 0.0f, 1.0f); + + /* UNORM8 */ + dw[0] = (uint8_t) util_iround(rgba[0] * 255.0f) | + (uint8_t) util_iround(rgba[1] * 255.0f) << 8 | + (uint8_t) util_iround(rgba[2] * 255.0f) << 16 | + (uint8_t) util_iround(rgba[3] * 255.0f) << 24; + + /* UNORM16 */ + dw[7] = (uint16_t) util_iround(rgba[0] * 65535.0f) | + (uint16_t) util_iround(rgba[1] * 65535.0f) << 16; + dw[8] = (uint16_t) util_iround(rgba[2] * 65535.0f) | + (uint16_t) util_iround(rgba[3] * 65535.0f) << 16; +} + +void +ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev, + const struct pipe_sampler_state *state, + struct ilo_sampler_cso *sampler) +{ + int mip_filter, min_filter, mag_filter, max_aniso; + int lod_bias, max_lod, min_lod; + int wrap_s, wrap_t, wrap_r, wrap_cube; + bool clamp_is_to_edge; + uint32_t dw0, dw1, dw3; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + memset(sampler, 0, sizeof(*sampler)); + + mip_filter = gen6_translate_tex_mipfilter(state->min_mip_filter); + min_filter = gen6_translate_tex_filter(state->min_img_filter); + mag_filter = gen6_translate_tex_filter(state->mag_img_filter); + + sampler->anisotropic = state->max_anisotropy; + + if (state->max_anisotropy >= 2 && state->max_anisotropy <= 16) + max_aniso = state->max_anisotropy / 2 - 1; + else if (state->max_anisotropy > 16) + max_aniso = BRW_ANISORATIO_16; + else + max_aniso = BRW_ANISORATIO_2; + + /* + * + * Here is how the hardware calculate per-pixel LOD, from my reading of the + * PRMs: + * + * 1) LOD is set to log2(ratio of texels to pixels) if not specified in + * other ways. The number of texels is measured using level + * SurfMinLod. + * 2) Bias is added to LOD. + * 3) LOD is clamped to [MinLod, MaxLod], and the clamped value is + * compared with Base to determine whether magnification or + * minification is needed. (if preclamp is disabled, LOD is compared + * with Base before clamping) + * 4) If magnification is needed, or no mipmapping is requested, LOD is + * set to floor(MinLod). + * 5) LOD is clamped to [0, MIPCnt], and SurfMinLod is added to LOD. + * + * With Gallium interface, Base is always zero and + * pipe_sampler_view::u.tex.first_level specifies SurfMinLod. + */ + if (dev->gen >= ILO_GEN(7)) { + const float scale = 256.0f; + + /* [-16.0, 16.0) in S4.8 */ + lod_bias = (int) + (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale); + lod_bias &= 0x1fff; + + /* [0.0, 14.0] in U4.8 */ + max_lod = (int) (CLAMP(state->max_lod, 0.0f, 14.0f) * scale); + min_lod = (int) (CLAMP(state->min_lod, 0.0f, 14.0f) * scale); + } + else { + const float scale = 64.0f; + + /* [-16.0, 16.0) in S4.6 */ + lod_bias = (int) + (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale); + lod_bias &= 0x7ff; + + /* [0.0, 13.0] in U4.6 */ + max_lod = (int) (CLAMP(state->max_lod, 0.0f, 13.0f) * scale); + min_lod = (int) (CLAMP(state->min_lod, 0.0f, 13.0f) * scale); + } + + /* + * We want LOD to be clamped to determine magnification/minification, and + * get set to zero when it is magnification or when mipmapping is disabled. + * The hardware would set LOD to floor(MinLod) and that is a problem when + * MinLod is greater than or equal to 1.0f. + * + * With Base being zero, it is always minification when MinLod is non-zero. + * To achieve our goal, we just need to set MinLod to zero and set + * MagFilter to MinFilter when mipmapping is disabled. + */ + if (state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && min_lod) { + min_lod = 0; + mag_filter = min_filter; + } + + /* + * For nearest filtering, PIPE_TEX_WRAP_CLAMP means + * PIPE_TEX_WRAP_CLAMP_TO_EDGE; for linear filtering, PIPE_TEX_WRAP_CLAMP + * means PIPE_TEX_WRAP_CLAMP_TO_BORDER while additionally clamping the + * texture coordinates to [0.0, 1.0]. + * + * The clamping will be taken care of in the shaders. There are two + * filters here, but let the minification one has a say. + */ + clamp_is_to_edge = (state->min_img_filter == PIPE_TEX_FILTER_NEAREST); + if (!clamp_is_to_edge) { + sampler->saturate_s = (state->wrap_s == PIPE_TEX_WRAP_CLAMP); + sampler->saturate_t = (state->wrap_t == PIPE_TEX_WRAP_CLAMP); + sampler->saturate_r = (state->wrap_r == PIPE_TEX_WRAP_CLAMP); + } + + /* determine wrap s/t/r */ + wrap_s = gen6_translate_tex_wrap(state->wrap_s, clamp_is_to_edge); + wrap_t = gen6_translate_tex_wrap(state->wrap_t, clamp_is_to_edge); + wrap_r = gen6_translate_tex_wrap(state->wrap_r, clamp_is_to_edge); + + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 107: + * + * "When using cube map texture coordinates, only TEXCOORDMODE_CLAMP + * and TEXCOORDMODE_CUBE settings are valid, and each TC component + * must have the same Address Control mode." + * + * From the Ivy Bridge PRM, volume 4 part 1, page 96: + * + * "This field (Cube Surface Control Mode) must be set to + * CUBECTRLMODE_PROGRAMMED" + * + * Therefore, we cannot use "Cube Surface Control Mode" for semless cube + * map filtering. + */ + if (state->seamless_cube_map && + (state->min_img_filter != PIPE_TEX_FILTER_NEAREST || + state->mag_img_filter != PIPE_TEX_FILTER_NEAREST)) { + wrap_cube = BRW_TEXCOORDMODE_CUBE; + } + else { + wrap_cube = BRW_TEXCOORDMODE_CLAMP; + } + + if (!state->normalized_coords) { + /* + * From the Ivy Bridge PRM, volume 4 part 1, page 98: + * + * "The following state must be set as indicated if this field + * (Non-normalized Coordinate Enable) is enabled: + * + * - TCX/Y/Z Address Control Mode must be TEXCOORDMODE_CLAMP, + * TEXCOORDMODE_HALF_BORDER, or TEXCOORDMODE_CLAMP_BORDER. + * - Surface Type must be SURFTYPE_2D or SURFTYPE_3D. + * - Mag Mode Filter must be MAPFILTER_NEAREST or + * MAPFILTER_LINEAR. + * - Min Mode Filter must be MAPFILTER_NEAREST or + * MAPFILTER_LINEAR. + * - Mip Mode Filter must be MIPFILTER_NONE. + * - Min LOD must be 0. + * - Max LOD must be 0. + * - MIP Count must be 0. + * - Surface Min LOD must be 0. + * - Texture LOD Bias must be 0." + */ + assert(wrap_s == BRW_TEXCOORDMODE_CLAMP || + wrap_s == BRW_TEXCOORDMODE_CLAMP_BORDER); + assert(wrap_t == BRW_TEXCOORDMODE_CLAMP || + wrap_t == BRW_TEXCOORDMODE_CLAMP_BORDER); + assert(wrap_r == BRW_TEXCOORDMODE_CLAMP || + wrap_r == BRW_TEXCOORDMODE_CLAMP_BORDER); + + assert(mag_filter == BRW_MAPFILTER_NEAREST || + mag_filter == BRW_MAPFILTER_LINEAR); + assert(min_filter == BRW_MAPFILTER_NEAREST || + min_filter == BRW_MAPFILTER_LINEAR); + + /* work around a bug in util_blitter */ + mip_filter = BRW_MIPFILTER_NONE; + + assert(mip_filter == BRW_MIPFILTER_NONE); + } + + if (dev->gen >= ILO_GEN(7)) { + dw0 = 1 << 28 | + mip_filter << 20 | + lod_bias << 1; + + sampler->dw_filter = mag_filter << 17 | + min_filter << 14; + + sampler->dw_filter_aniso = BRW_MAPFILTER_ANISOTROPIC << 17 | + BRW_MAPFILTER_ANISOTROPIC << 14 | + 1; + + dw1 = min_lod << 20 | + max_lod << 8; + + if (state->compare_mode != PIPE_TEX_COMPARE_NONE) + dw1 |= gen6_translate_shadow_func(state->compare_func) << 1; + + dw3 = max_aniso << 19; + + /* round the coordinates for linear filtering */ + if (min_filter != BRW_MAPFILTER_NEAREST) { + dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MIN | + BRW_ADDRESS_ROUNDING_ENABLE_V_MIN | + BRW_ADDRESS_ROUNDING_ENABLE_R_MIN) << 13; + } + if (mag_filter != BRW_MAPFILTER_NEAREST) { + dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MAG | + BRW_ADDRESS_ROUNDING_ENABLE_V_MAG | + BRW_ADDRESS_ROUNDING_ENABLE_R_MAG) << 13; + } + + if (!state->normalized_coords) + dw3 |= 1 << 10; + + sampler->dw_wrap = wrap_s << 6 | + wrap_t << 3 | + wrap_r; + + /* + * As noted in the classic i965 driver, the HW may still reference + * wrap_t and wrap_r for 1D textures. We need to set them to a safe + * mode + */ + sampler->dw_wrap_1d = wrap_s << 6 | + BRW_TEXCOORDMODE_WRAP << 3 | + BRW_TEXCOORDMODE_WRAP; + + sampler->dw_wrap_cube = wrap_cube << 6 | + wrap_cube << 3 | + wrap_cube; + + STATIC_ASSERT(Elements(sampler->payload) >= 7); + + sampler->payload[0] = dw0; + sampler->payload[1] = dw1; + sampler->payload[2] = dw3; + + memcpy(&sampler->payload[3], + state->border_color.ui, sizeof(state->border_color.ui)); + } + else { + dw0 = 1 << 28 | + mip_filter << 20 | + lod_bias << 3; + + if (state->compare_mode != PIPE_TEX_COMPARE_NONE) + dw0 |= gen6_translate_shadow_func(state->compare_func); + + sampler->dw_filter = (min_filter != mag_filter) << 27 | + mag_filter << 17 | + min_filter << 14; + + sampler->dw_filter_aniso = BRW_MAPFILTER_ANISOTROPIC << 17 | + BRW_MAPFILTER_ANISOTROPIC << 14; + + dw1 = min_lod << 22 | + max_lod << 12; + + sampler->dw_wrap = wrap_s << 6 | + wrap_t << 3 | + wrap_r; + + sampler->dw_wrap_1d = wrap_s << 6 | + BRW_TEXCOORDMODE_WRAP << 3 | + BRW_TEXCOORDMODE_WRAP; + + sampler->dw_wrap_cube = wrap_cube << 6 | + wrap_cube << 3 | + wrap_cube; + + dw3 = max_aniso << 19; + + /* round the coordinates for linear filtering */ + if (min_filter != BRW_MAPFILTER_NEAREST) { + dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MIN | + BRW_ADDRESS_ROUNDING_ENABLE_V_MIN | + BRW_ADDRESS_ROUNDING_ENABLE_R_MIN) << 13; + } + if (mag_filter != BRW_MAPFILTER_NEAREST) { + dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MAG | + BRW_ADDRESS_ROUNDING_ENABLE_V_MAG | + BRW_ADDRESS_ROUNDING_ENABLE_R_MAG) << 13; + } + + if (!state->normalized_coords) + dw3 |= 1; + + STATIC_ASSERT(Elements(sampler->payload) >= 15); + + sampler->payload[0] = dw0; + sampler->payload[1] = dw1; + sampler->payload[2] = dw3; + + sampler_init_border_color_gen6(dev, + &state->border_color, &sampler->payload[3], 12); + } +} + static uint32_t gen6_emit_SAMPLER_STATE(const struct ilo_dev_info *dev, - const struct pipe_sampler_state **samplers, - const struct pipe_sampler_view **sampler_views, + const struct ilo_sampler_cso * const *samplers, + const struct pipe_sampler_view * const *sampler_views, const uint32_t *sampler_border_colors, int num_samplers, struct ilo_cp *cp) @@ -4143,14 +4501,10 @@ gen6_emit_SAMPLER_STATE(const struct ilo_dev_info *dev, state_len, state_align, &state_offset); for (i = 0; i < num_samplers; i++) { - const struct pipe_sampler_state *sampler = samplers[i]; + const struct ilo_sampler_cso *sampler = samplers[i]; const struct pipe_sampler_view *view = sampler_views[i]; const uint32_t border_color = sampler_border_colors[i]; - enum pipe_texture_target target; - int mip_filter, min_filter, mag_filter, max_aniso; - int lod_bias, max_lod, min_lod, base_level; - int wrap_s, wrap_t, wrap_r; - bool clamp_to_edge; + uint32_t dw_filter, dw_wrap; /* there may be holes */ if (!sampler || !view) { @@ -4164,287 +4518,62 @@ gen6_emit_SAMPLER_STATE(const struct ilo_dev_info *dev, continue; } - target = view->texture->target; - - /* determine mip/min/mag filters */ - mip_filter = gen6_translate_tex_mipfilter(sampler->min_mip_filter); - - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 103: - * - * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for - * surfaces of type SURFTYPE_3D." - */ - if (sampler->max_anisotropy && target != PIPE_TEXTURE_3D) { - min_filter = BRW_MAPFILTER_ANISOTROPIC; - mag_filter = BRW_MAPFILTER_ANISOTROPIC; - - if (sampler->max_anisotropy >= 2 && sampler->max_anisotropy <= 16) - max_aniso = sampler->max_anisotropy / 2 - 1; - else if (sampler->max_anisotropy > 16) - max_aniso = BRW_ANISORATIO_16; - else - max_aniso = BRW_ANISORATIO_2; - } - else { - min_filter = gen6_translate_tex_filter(sampler->min_img_filter); - mag_filter = gen6_translate_tex_filter(sampler->mag_img_filter); - - /* ignored */ - max_aniso = 0; - } - - /* - * For nearest filtering, PIPE_TEX_WRAP_CLAMP means - * PIPE_TEX_WRAP_CLAMP_TO_EDGE; for linear filtering, - * PIPE_TEX_WRAP_CLAMP means PIPE_TEX_WRAP_CLAMP_TO_BORDER while - * additionally clamping the texture coordinates to [0.0, 1.0]. - * - * The clamping is taken care of in the shaders. There are two filters - * here, but let the minification one has a say. - */ - clamp_to_edge = (sampler->min_img_filter == PIPE_TEX_FILTER_NEAREST); - - switch (target) { - case PIPE_TEXTURE_CUBE: + /* determine filter and wrap modes */ + switch (view->texture->target) { + case PIPE_TEXTURE_1D: + dw_filter = (sampler->anisotropic) ? + sampler->dw_filter_aniso : sampler->dw_filter; + dw_wrap = sampler->dw_wrap_1d; + break; + case PIPE_TEXTURE_3D: /* - * From the Sandy Bridge PRM, volume 4 part 1, page 107: - * - * "When using cube map texture coordinates, only - * TEXCOORDMODE_CLAMP and TEXCOORDMODE_CUBE settings are valid, - * and each TC component must have the same Address Control - * mode." - * - * From the Ivy Bridge PRM, volume 4 part 1, page 96: + * From the Sandy Bridge PRM, volume 4 part 1, page 103: * - * "This field (Cube Surface Control Mode) must be set to - * CUBECTRLMODE_PROGRAMMED" - * - * Therefore, we cannot use "Cube Surface Control Mode" for semless - * cube map filtering. + * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for + * surfaces of type SURFTYPE_3D." */ - if (sampler->seamless_cube_map && - (sampler->min_img_filter != PIPE_TEX_FILTER_NEAREST || - sampler->mag_img_filter != PIPE_TEX_FILTER_NEAREST)) { - wrap_s = BRW_TEXCOORDMODE_CUBE; - wrap_t = BRW_TEXCOORDMODE_CUBE; - wrap_r = BRW_TEXCOORDMODE_CUBE; - } - else { - wrap_s = BRW_TEXCOORDMODE_CLAMP; - wrap_t = BRW_TEXCOORDMODE_CLAMP; - wrap_r = BRW_TEXCOORDMODE_CLAMP; - } + dw_filter = sampler->dw_filter; + dw_wrap = sampler->dw_wrap; break; - case PIPE_TEXTURE_1D: - wrap_s = gen6_translate_tex_wrap(sampler->wrap_s, clamp_to_edge); - /* - * as noted in the classic i965 driver, the HW may look at these - * values so we need to set them to a safe mode - */ - wrap_t = BRW_TEXCOORDMODE_WRAP; - wrap_r = BRW_TEXCOORDMODE_WRAP; + case PIPE_TEXTURE_CUBE: + dw_filter = (sampler->anisotropic) ? + sampler->dw_filter_aniso : sampler->dw_filter; + dw_wrap = sampler->dw_wrap_cube; break; default: - wrap_s = gen6_translate_tex_wrap(sampler->wrap_s, clamp_to_edge); - wrap_t = gen6_translate_tex_wrap(sampler->wrap_t, clamp_to_edge); - wrap_r = gen6_translate_tex_wrap(sampler->wrap_r, clamp_to_edge); + dw_filter = (sampler->anisotropic) ? + sampler->dw_filter_aniso : sampler->dw_filter; + dw_wrap = sampler->dw_wrap; break; } - /* - * Here is how the hardware calculate per-pixel LOD, from my reading of - * the PRMs: - * - * 1) LOD is set to log2(ratio of texels to pixels) if not specified in - * other ways. The number of texels is measured using level - * SurfMinLod. - * 2) Bias is added to LOD. - * 3) LOD is clamped to [MinLod, MaxLod], and the clamped value is - * compared with Base to determine whether magnification or - * minification is needed. - * (if preclamp is disabled, LOD is compared with Base before - * clamping) - * 4) If magnification is needed, or no mipmapping is requested, LOD is - * set to floor(MinLod). - * 5) LOD is clamped to [0, MIPCnt], and SurfMinLod is added to LOD. - * - * With Gallium interface, Base is always zero and view->u.tex.first_level - * specifies SurfMinLod. - * - * From the Sandy Bridge PRM, volume 4 part 1, page 21: - * - * "[DevSNB] Errata: Incorrect behavior is observed in cases where - * the min and mag mode filters are different and SurfMinLOD is - * nonzero. The determination of MagMode uses the following equation - * instead of the one in the above pseudocode: MagMode = (LOD + - * SurfMinLOD - Base <= 0)" - * - * As a way to work around that, we set Base to view->u.tex.first_level - * on GEN6. - */ - if (dev->gen >= ILO_GEN(7)) { - const float scale = 256.0f; - - /* [-16.0, 16.0) in S4.8 */ - lod_bias = (int) - (CLAMP(sampler->lod_bias, -16.0f, 15.9f) * scale); - lod_bias &= 0x1fff; + dw[0] = sampler->payload[0]; + dw[1] = sampler->payload[1]; + assert(!(border_color & 0x1f)); + dw[2] = border_color; + dw[3] = sampler->payload[2]; - base_level = 0; + dw[0] |= dw_filter; - /* [0.0, 14.0] in U4.8 */ - max_lod = (int) (CLAMP(sampler->max_lod, 0.0f, 14.0f) * scale); - min_lod = (int) (CLAMP(sampler->min_lod, 0.0f, 14.0f) * scale); + if (dev->gen >= ILO_GEN(7)) { + dw[3] |= dw_wrap; } else { - const float scale = 64.0f; - - /* [-16.0, 16.0) in S4.6 */ - lod_bias = (int) - (CLAMP(sampler->lod_bias, -16.0f, 15.9f) * scale); - lod_bias &= 0x7ff; - - base_level = view->u.tex.first_level; - - /* [0.0, 13.0] in U4.6 */ - max_lod = (int) (CLAMP(sampler->max_lod, 0.0f, 13.0f) * scale); - min_lod = (int) (CLAMP(sampler->min_lod, 0.0f, 13.0f) * scale); - } - - /* - * We want LOD to be clamped to determine magnification/minification, - * and get set to zero when it is magnification or when mipmapping is - * disabled. The hardware would set LOD to floor(MinLod) and that is a - * problem when MinLod is greater than or equal to 1.0f. - * - * We know that with Base being zero, it is always minification when - * MinLod is non-zero. To meet our need, we just need to set MinLod to - * zero and set MagFilter to MinFilter when mipmapping is disabled. - */ - if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && min_lod) { - min_lod = 0; - mag_filter = min_filter; - } - - if (!sampler->normalized_coords) { - /* work around a bug in util_blitter */ - mip_filter = BRW_MIPFILTER_NONE; - /* - * From the Ivy Bridge PRM, volume 4 part 1, page 98: + * From the Sandy Bridge PRM, volume 4 part 1, page 21: * - * "The following state must be set as indicated if this field - * (Non-normalized Coordinate Enable) is enabled: + * "[DevSNB] Errata: Incorrect behavior is observed in cases + * where the min and mag mode filters are different and + * SurfMinLOD is nonzero. The determination of MagMode uses the + * following equation instead of the one in the above + * pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)" * - * - TCX/Y/Z Address Control Mode must be TEXCOORDMODE_CLAMP, - * TEXCOORDMODE_HALF_BORDER, or TEXCOORDMODE_CLAMP_BORDER. - * - Surface Type must be SURFTYPE_2D or SURFTYPE_3D. - * - Mag Mode Filter must be MAPFILTER_NEAREST or - * MAPFILTER_LINEAR. - * - Min Mode Filter must be MAPFILTER_NEAREST or - * MAPFILTER_LINEAR. - * - Mip Mode Filter must be MIPFILTER_NONE. - * - Min LOD must be 0. - * - Max LOD must be 0. - * - MIP Count must be 0. - * - Surface Min LOD must be 0. - * - Texture LOD Bias must be 0." + * As a way to work around that, we set Base to + * view->u.tex.first_level. */ - assert(wrap_s == BRW_TEXCOORDMODE_CLAMP || - wrap_s == BRW_TEXCOORDMODE_CLAMP_BORDER); - assert(wrap_t == BRW_TEXCOORDMODE_CLAMP || - wrap_t == BRW_TEXCOORDMODE_CLAMP_BORDER); - assert(wrap_r == BRW_TEXCOORDMODE_CLAMP || - wrap_r == BRW_TEXCOORDMODE_CLAMP_BORDER); - - assert(target == PIPE_TEXTURE_RECT); - - assert(mag_filter == BRW_MAPFILTER_NEAREST || - mag_filter == BRW_MAPFILTER_LINEAR); - assert(min_filter == BRW_MAPFILTER_NEAREST || - min_filter == BRW_MAPFILTER_LINEAR); - assert(mip_filter == BRW_MIPFILTER_NONE); - } - - if (dev->gen >= ILO_GEN(7)) { - dw[0] = 1 << 28 | - base_level << 22 | - mip_filter << 20 | - mag_filter << 17 | - min_filter << 14 | - lod_bias << 1; - - /* enable EWA filtering unconditionally breaks some piglit tests */ - if (sampler->max_anisotropy) - dw[0] |= 1; - - dw[1] = min_lod << 20 | - max_lod << 8; - - if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) - dw[1] |= gen6_translate_shadow_func(sampler->compare_func) << 1; - - assert(!(border_color & 0x1f)); - dw[2] = border_color; - - dw[3] = max_aniso << 19 | - wrap_s << 6 | - wrap_t << 3 | - wrap_r; - - /* round the coordinates for linear filtering */ - if (min_filter != BRW_MAPFILTER_NEAREST) { - dw[3] |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MIN | - BRW_ADDRESS_ROUNDING_ENABLE_V_MIN | - BRW_ADDRESS_ROUNDING_ENABLE_R_MIN) << 13; - } - if (mag_filter != BRW_MAPFILTER_NEAREST) { - dw[3] |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MAG | - BRW_ADDRESS_ROUNDING_ENABLE_V_MAG | - BRW_ADDRESS_ROUNDING_ENABLE_R_MAG) << 13; - } + dw[0] |= view->u.tex.first_level << 22; - if (!sampler->normalized_coords) - dw[3] |= 1 << 10; - } - else { - dw[0] = 1 << 28 | - (min_filter != mag_filter) << 27 | - base_level << 22 | - mip_filter << 20 | - mag_filter << 17 | - min_filter << 14 | - lod_bias << 3; - - if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) - dw[0] |= gen6_translate_shadow_func(sampler->compare_func); - - dw[1] = min_lod << 22 | - max_lod << 12 | - wrap_s << 6 | - wrap_t << 3 | - wrap_r; - - assert(!(border_color & 0x1f)); - dw[2] = border_color; - - dw[3] = max_aniso << 19; - - /* round the coordinates for linear filtering */ - if (min_filter != BRW_MAPFILTER_NEAREST) { - dw[3] |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MIN | - BRW_ADDRESS_ROUNDING_ENABLE_V_MIN | - BRW_ADDRESS_ROUNDING_ENABLE_R_MIN) << 13; - } - if (mag_filter != BRW_MAPFILTER_NEAREST) { - dw[3] |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MAG | - BRW_ADDRESS_ROUNDING_ENABLE_V_MAG | - BRW_ADDRESS_ROUNDING_ENABLE_R_MAG) << 13; - } - - if (!sampler->normalized_coords) - dw[3] |= 1; + dw[1] |= dw_wrap; } dw += 4; @@ -4455,73 +4584,19 @@ gen6_emit_SAMPLER_STATE(const struct ilo_dev_info *dev, static uint32_t gen6_emit_SAMPLER_BORDER_COLOR_STATE(const struct ilo_dev_info *dev, - const union pipe_color_union *color, + const struct ilo_sampler_cso *sampler, struct ilo_cp *cp) { const int state_align = 32 / 4; - const int state_len = 12; + const int state_len = (dev->gen >= ILO_GEN(7)) ? 4 : 12; uint32_t state_offset, *dw; - float rgba[4] = { - color->f[0], color->f[1], color->f[2], color->f[3], - }; - ILO_GPE_VALID_GEN(dev, 6, 6); + ILO_GPE_VALID_GEN(dev, 6, 7); dw = ilo_cp_steal_ptr(cp, "SAMPLER_BORDER_COLOR_STATE", state_len, state_align, &state_offset); - /* - * This state is not documented in the Sandy Bridge PRM, but in the - * Ironlake PRM. SNORM8 seems to be in DW11 instead of DW1. - */ - - /* IEEE_FP */ - dw[1] = fui(rgba[0]); - dw[2] = fui(rgba[1]); - dw[3] = fui(rgba[2]); - dw[4] = fui(rgba[3]); - - /* FLOAT_16 */ - dw[5] = util_float_to_half(rgba[0]) | - util_float_to_half(rgba[1]) << 16; - dw[6] = util_float_to_half(rgba[2]) | - util_float_to_half(rgba[3]) << 16; - - /* clamp to [-1.0f, 1.0f] */ - rgba[0] = CLAMP(rgba[0], -1.0f, 1.0f); - rgba[1] = CLAMP(rgba[1], -1.0f, 1.0f); - rgba[2] = CLAMP(rgba[2], -1.0f, 1.0f); - rgba[3] = CLAMP(rgba[3], -1.0f, 1.0f); - - /* SNORM16 */ - dw[9] = (int16_t) util_iround(rgba[0] * 32767.0f) | - (int16_t) util_iround(rgba[1] * 32767.0f) << 16; - dw[10] = (int16_t) util_iround(rgba[2] * 32767.0f) | - (int16_t) util_iround(rgba[3] * 32767.0f) << 16; - - /* SNORM8 */ - dw[11] = (int8_t) util_iround(rgba[0] * 127.0f) | - (int8_t) util_iround(rgba[1] * 127.0f) << 8 | - (int8_t) util_iround(rgba[2] * 127.0f) << 16 | - (int8_t) util_iround(rgba[3] * 127.0f) << 24; - - /* clamp to [0.0f, 1.0f] */ - rgba[0] = CLAMP(rgba[0], 0.0f, 1.0f); - rgba[1] = CLAMP(rgba[1], 0.0f, 1.0f); - rgba[2] = CLAMP(rgba[2], 0.0f, 1.0f); - rgba[3] = CLAMP(rgba[3], 0.0f, 1.0f); - - /* UNORM8 */ - dw[0] = (uint8_t) util_iround(rgba[0] * 255.0f) | - (uint8_t) util_iround(rgba[1] * 255.0f) << 8 | - (uint8_t) util_iround(rgba[2] * 255.0f) << 16 | - (uint8_t) util_iround(rgba[3] * 255.0f) << 24; - - /* UNORM16 */ - dw[7] = (uint16_t) util_iround(rgba[0] * 65535.0f) | - (uint16_t) util_iround(rgba[1] * 65535.0f) << 16; - dw[8] = (uint16_t) util_iround(rgba[2] * 65535.0f) | - (uint16_t) util_iround(rgba[3] * 65535.0f) << 16; + memcpy(dw, &sampler->payload[3], state_len * 4); return state_offset; } diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen6.h b/src/gallium/drivers/ilo/ilo_gpe_gen6.h index a44e1ebf148..bb09dc0ff94 100644 --- a/src/gallium/drivers/ilo/ilo_gpe_gen6.h +++ b/src/gallium/drivers/ilo/ilo_gpe_gen6.h @@ -448,15 +448,15 @@ typedef uint32_t typedef uint32_t (*ilo_gpe_gen6_SAMPLER_STATE)(const struct ilo_dev_info *dev, - const struct pipe_sampler_state **samplers, - const struct pipe_sampler_view **sampler_views, + const struct ilo_sampler_cso * const *samplers, + const struct pipe_sampler_view * const *sampler_views, const uint32_t *sampler_border_colors, int num_samplers, struct ilo_cp *cp); typedef uint32_t (*ilo_gpe_gen6_SAMPLER_BORDER_COLOR_STATE)(const struct ilo_dev_info *dev, - const union pipe_color_union *color, + const struct ilo_sampler_cso *sampler, struct ilo_cp *cp); typedef uint32_t diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen7.c b/src/gallium/drivers/ilo/ilo_gpe_gen7.c index dfb5fe15dc8..383b7d9a3d2 100644 --- a/src/gallium/drivers/ilo/ilo_gpe_gen7.c +++ b/src/gallium/drivers/ilo/ilo_gpe_gen7.c @@ -1768,24 +1768,6 @@ gen7_emit_cbuf_SURFACE_STATE(const struct ilo_dev_info *dev, return gen7_emit_SURFACE_STATE(dev, buf->bo, false, dw, Elements(dw), cp); } -static uint32_t -gen7_emit_SAMPLER_BORDER_COLOR_STATE(const struct ilo_dev_info *dev, - const union pipe_color_union *color, - struct ilo_cp *cp) -{ - const int state_align = 32 / 4; - const int state_len = 4; - uint32_t state_offset, *dw; - - ILO_GPE_VALID_GEN(dev, 7, 7); - - dw = ilo_cp_steal_ptr(cp, "SAMPLER_BORDER_COLOR_STATE", - state_len, state_align, &state_offset); - memcpy(dw, color->f, 4 * 4); - - return state_offset; -} - static int gen7_estimate_command_size(const struct ilo_dev_info *dev, enum ilo_gpe_gen7_command cmd, @@ -2014,7 +1996,7 @@ gen7_init(struct ilo_gpe_gen7 *gen7) GEN7_SET(gen7, view_SURFACE_STATE); GEN7_SET(gen7, cbuf_SURFACE_STATE); GEN7_USE(gen7, SAMPLER_STATE, gen6); - GEN7_SET(gen7, SAMPLER_BORDER_COLOR_STATE); + GEN7_USE(gen7, SAMPLER_BORDER_COLOR_STATE, gen6); GEN7_USE(gen7, push_constant_buffer, gen6); #undef GEN7_USE #undef GEN7_SET diff --git a/src/gallium/drivers/ilo/ilo_shader.c b/src/gallium/drivers/ilo/ilo_shader.c index 76ba7f04c66..754b59baec2 100644 --- a/src/gallium/drivers/ilo/ilo_shader.c +++ b/src/gallium/drivers/ilo/ilo_shader.c @@ -79,8 +79,8 @@ ilo_shader_variant_init(struct ilo_shader_variant *variant, for (i = 0; i < info->num_samplers; i++) { const struct pipe_sampler_view *view = ilo->view[info->type].states[i]; - const struct pipe_sampler_state *sampler = - ilo->sampler[info->type].states[i]; + const struct ilo_sampler_cso *sampler = + ilo->sampler[info->type].cso[i]; if (view) { variant->sampler_view_swizzles[i].r = view->swizzle_r; @@ -106,13 +106,10 @@ ilo_shader_variant_init(struct ilo_shader_variant *variant, * the HW wrap mode is set to BRW_TEXCOORDMODE_CLAMP_BORDER, and we need * to manually saturate the texture coordinates. */ - if (sampler && sampler->min_img_filter != PIPE_TEX_FILTER_NEAREST) { - if (sampler->wrap_s == PIPE_TEX_WRAP_CLAMP) - variant->saturate_tex_coords[0] |= 1 << i; - if (sampler->wrap_t == PIPE_TEX_WRAP_CLAMP) - variant->saturate_tex_coords[1] |= 1 << i; - if (sampler->wrap_r == PIPE_TEX_WRAP_CLAMP) - variant->saturate_tex_coords[2] |= 1 << i; + if (sampler) { + variant->saturate_tex_coords[0] |= sampler->saturate_s << i; + variant->saturate_tex_coords[1] |= sampler->saturate_t << i; + variant->saturate_tex_coords[2] |= sampler->saturate_r << i; } } } diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c index cc74d03c49b..ab2a4e9ae10 100644 --- a/src/gallium/drivers/ilo/ilo_state.c +++ b/src/gallium/drivers/ilo/ilo_state.c @@ -186,12 +186,13 @@ static void * ilo_create_sampler_state(struct pipe_context *pipe, const struct pipe_sampler_state *state) { - struct pipe_sampler_state *sampler; + struct ilo_context *ilo = ilo_context(pipe); + struct ilo_sampler_cso *sampler; - sampler = MALLOC_STRUCT(pipe_sampler_state); + sampler = MALLOC_STRUCT(ilo_sampler_cso); assert(sampler); - *sampler = *state; + ilo_gpe_init_sampler_cso(ilo->dev, state, sampler); return sampler; } @@ -201,10 +202,10 @@ bind_samplers(struct ilo_context *ilo, unsigned shader, unsigned start, unsigned count, void **samplers, bool unbind_old) { - struct pipe_sampler_state **dst = ilo->sampler[shader].states; + const struct ilo_sampler_cso **dst = ilo->sampler[shader].cso; unsigned i; - assert(start + count <= Elements(ilo->sampler[shader].states)); + assert(start + count <= Elements(ilo->sampler[shader].cso)); if (unbind_old) { if (!samplers) { @@ -237,7 +238,7 @@ bind_samplers(struct ilo_context *ilo, if (ilo->sampler[shader].count <= start + count) { count += start; - while (count > 0 && !ilo->sampler[shader].states[count - 1]) + while (count > 0 && !ilo->sampler[shader].cso[count - 1]) count--; ilo->sampler[shader].count = count; -- 2.30.2