X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fblorp%2Fblorp_blit.c;h=3aac0abf274c1612e807d0150c531a6d5b81fc1d;hb=HEAD;hp=cdabf441e52890740cce26ddf39381a47b0ba2ec;hpb=9fbe2a20078242594db788e5abec41651cbc6991;p=mesa.git diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c index cdabf441e52..3aac0abf274 100644 --- a/src/intel/blorp/blorp_blit.c +++ b/src/intel/blorp/blorp_blit.c @@ -29,26 +29,12 @@ #include "util/format_rgb9e5.h" /* header-only include needed for _mesa_unorm_to_float and friends. */ #include "mesa/main/format_utils.h" +#include "util/u_math.h" #define FILE_DEBUG_FLAG DEBUG_BLORP static const bool split_blorp_blit_debug = false; -/** - * Enum to specify the order of arguments in a sampler message - */ -enum sampler_message_arg -{ - SAMPLER_MESSAGE_ARG_U_FLOAT, - SAMPLER_MESSAGE_ARG_V_FLOAT, - SAMPLER_MESSAGE_ARG_U_INT, - SAMPLER_MESSAGE_ARG_V_INT, - SAMPLER_MESSAGE_ARG_R_INT, - SAMPLER_MESSAGE_ARG_SI_INT, - SAMPLER_MESSAGE_ARG_MCS_INT, - SAMPLER_MESSAGE_ARG_ZERO_INT, -}; - struct brw_blorp_blit_vars { /* Input values from brw_blorp_wm_inputs */ nir_variable *v_discard_rect; @@ -58,12 +44,6 @@ struct brw_blorp_blit_vars { nir_variable *v_src_offset; nir_variable *v_dst_offset; nir_variable *v_src_inv_size; - - /* gl_FragCoord */ - nir_variable *frag_coord; - - /* gl_FragColor */ - nir_variable *color_out; }; static void @@ -82,15 +62,6 @@ brw_blorp_blit_vars_init(nir_builder *b, struct brw_blorp_blit_vars *v, LOAD_INPUT(src_inv_size, glsl_vector_type(GLSL_TYPE_FLOAT, 2)) #undef LOAD_INPUT - - v->frag_coord = nir_variable_create(b->shader, nir_var_shader_in, - glsl_vec4_type(), "gl_FragCoord"); - v->frag_coord->data.location = VARYING_SLOT_POS; - v->frag_coord->data.origin_upper_left = true; - - v->color_out = nir_variable_create(b->shader, nir_var_shader_out, - glsl_vec4_type(), "gl_FragColor"); - v->color_out->data.location = FRAG_RESULT_COLOR; } static nir_ssa_def * @@ -98,7 +69,7 @@ blorp_blit_get_frag_coords(nir_builder *b, const struct brw_blorp_blit_prog_key *key, struct brw_blorp_blit_vars *v) { - nir_ssa_def *coord = nir_f2i32(b, nir_load_var(b, v->frag_coord)); + nir_ssa_def *coord = nir_f2i32(b, nir_load_frag_coord(b)); /* Account for destination surface intratile offset * @@ -582,15 +553,16 @@ static inline int count_trailing_one_bits(unsigned value) #ifdef HAVE___BUILTIN_CTZ return __builtin_ctz(~value); #else - return _mesa_bitcount(value & ~(value + 1)); + return util_bitcount(value & ~(value + 1)); #endif } static nir_ssa_def * -blorp_nir_manual_blend_average(nir_builder *b, struct brw_blorp_blit_vars *v, - nir_ssa_def *pos, unsigned tex_samples, - enum isl_aux_usage tex_aux_usage, - nir_alu_type dst_type) +blorp_nir_combine_samples(nir_builder *b, struct brw_blorp_blit_vars *v, + nir_ssa_def *pos, unsigned tex_samples, + enum isl_aux_usage tex_aux_usage, + nir_alu_type dst_type, + enum blorp_filter filter) { /* If non-null, this is the outer-most if statement */ nir_if *outer_if = NULL; @@ -599,9 +571,38 @@ blorp_nir_manual_blend_average(nir_builder *b, struct brw_blorp_blit_vars *v, nir_local_variable_create(b->impl, glsl_vec4_type(), "color"); nir_ssa_def *mcs = NULL; - if (tex_aux_usage == ISL_AUX_USAGE_MCS) + if (isl_aux_usage_has_mcs(tex_aux_usage)) mcs = blorp_blit_txf_ms_mcs(b, v, pos); + nir_op combine_op; + switch (filter) { + case BLORP_FILTER_AVERAGE: + assert(dst_type == nir_type_float); + combine_op = nir_op_fadd; + break; + + case BLORP_FILTER_MIN_SAMPLE: + switch (dst_type) { + case nir_type_int: combine_op = nir_op_imin; break; + case nir_type_uint: combine_op = nir_op_umin; break; + case nir_type_float: combine_op = nir_op_fmin; break; + default: unreachable("Invalid dst_type"); + } + break; + + case BLORP_FILTER_MAX_SAMPLE: + switch (dst_type) { + case nir_type_int: combine_op = nir_op_imax; break; + case nir_type_uint: combine_op = nir_op_umax; break; + case nir_type_float: combine_op = nir_op_fmax; break; + default: unreachable("Invalid dst_type"); + } + break; + + default: + unreachable("Invalid filter"); + } + /* We add together samples using a binary tree structure, e.g. for 4x MSAA: * * result = ((sample[0] + sample[1]) + (sample[2] + sample[3])) / 4 @@ -634,7 +635,7 @@ blorp_nir_manual_blend_average(nir_builder *b, struct brw_blorp_blit_vars *v, nir_ssa_def *texture_data[5]; unsigned stack_depth = 0; for (unsigned i = 0; i < tex_samples; ++i) { - assert(stack_depth == _mesa_bitcount(i)); /* Loop invariant */ + assert(stack_depth == util_bitcount(i)); /* Loop invariant */ /* Push sample i onto the stack */ assert(stack_depth < ARRAY_SIZE(texture_data)); @@ -644,7 +645,7 @@ blorp_nir_manual_blend_average(nir_builder *b, struct brw_blorp_blit_vars *v, nir_imm_int(b, i)); texture_data[stack_depth++] = blorp_nir_txf_ms(b, v, ms_pos, mcs, dst_type); - if (i == 0 && tex_aux_usage == ISL_AUX_USAGE_MCS) { + if (i == 0 && isl_aux_usage_has_mcs(tex_aux_usage)) { /* The Ivy Bridge PRM, Vol4 Part1 p27 (Multisample Control Surface) * suggests an optimization: * @@ -688,18 +689,22 @@ blorp_nir_manual_blend_average(nir_builder *b, struct brw_blorp_blit_vars *v, assert(stack_depth >= 2); --stack_depth; - assert(dst_type == nir_type_float); texture_data[stack_depth - 1] = - nir_fadd(b, texture_data[stack_depth - 1], - texture_data[stack_depth]); + nir_build_alu(b, combine_op, + texture_data[stack_depth - 1], + texture_data[stack_depth], + NULL, NULL); } } /* We should have just 1 sample on the stack now. */ assert(stack_depth == 1); - texture_data[0] = nir_fmul(b, texture_data[0], - nir_imm_float(b, 1.0 / tex_samples)); + if (filter == BLORP_FILTER_AVERAGE) { + assert(dst_type == nir_type_float); + texture_data[0] = nir_fmul(b, texture_data[0], + nir_imm_float(b, 1.0 / tex_samples)); + } nir_store_var(b, color, texture_data[0], 0xf); @@ -709,18 +714,6 @@ blorp_nir_manual_blend_average(nir_builder *b, struct brw_blorp_blit_vars *v, return nir_load_var(b, color); } -static inline nir_ssa_def * -nir_imm_vec2(nir_builder *build, float x, float y) -{ - nir_const_value v; - - memset(&v, 0, sizeof(v)); - v.f32[0] = x; - v.f32[1] = y; - - return nir_build_imm(build, 4, 32, v); -} - static nir_ssa_def * blorp_nir_manual_blend_bilinear(nir_builder *b, nir_ssa_def *pos, unsigned tex_samples, @@ -768,7 +761,7 @@ blorp_nir_manual_blend_bilinear(nir_builder *b, nir_ssa_def *pos, * here inside the loop after computing the pixel coordinates. */ nir_ssa_def *mcs = NULL; - if (key->tex_aux_usage == ISL_AUX_USAGE_MCS) + if (isl_aux_usage_has_mcs(key->tex_aux_usage)) mcs = blorp_blit_txf_ms_mcs(b, v, sample_coords_int); /* Compute sample index and map the sample index to a sample number. @@ -776,6 +769,14 @@ blorp_nir_manual_blend_bilinear(nir_builder *b, nir_ssa_def *pos, * grid of samples with in a pixel. Sample number layout shows the * rectangular grid of samples roughly corresponding to the real sample * locations with in a pixel. + * + * In the case of 2x MSAA, the layout of sample indices is reversed from + * the layout of sample numbers: + * + * sample index layout : --------- sample number layout : --------- + * | 0 | 1 | | 1 | 0 | + * --------- --------- + * * In case of 4x MSAA, layout of sample indices matches the layout of * sample numbers: * --------- @@ -819,7 +820,9 @@ blorp_nir_manual_blend_bilinear(nir_builder *b, nir_ssa_def *pos, key->x_scale * key->y_scale)); sample = nir_f2i32(b, sample); - if (tex_samples == 8) { + if (tex_samples == 2) { + sample = nir_isub(b, nir_imm_int(b, 1), sample); + } else if (tex_samples == 8) { sample = nir_iand(b, nir_ishr(b, nir_imm_int(b, 0x64210573), nir_ishl(b, sample, nir_imm_int(b, 2))), nir_imm_int(b, 0xf)); @@ -864,8 +867,6 @@ static nir_ssa_def * bit_cast_color(struct nir_builder *b, nir_ssa_def *color, const struct brw_blorp_blit_prog_key *key) { - assert(key->texture_data_type == nir_type_uint); - if (key->src_format == key->dst_format) return color; @@ -874,40 +875,51 @@ bit_cast_color(struct nir_builder *b, nir_ssa_def *color, const struct isl_format_layout *dst_fmtl = isl_format_get_layout(key->dst_format); - /* They must be uint formats with the same bit size */ + /* They must be formats with the same bit size */ assert(src_fmtl->bpb == dst_fmtl->bpb); - assert(src_fmtl->channels.r.type == ISL_UINT); - assert(dst_fmtl->channels.r.type == ISL_UINT); - /* They must be in regular color formats (no luminance or alpha) */ - assert(src_fmtl->channels.r.bits > 0); - assert(dst_fmtl->channels.r.bits > 0); + if (src_fmtl->bpb <= 32) { + assert(src_fmtl->channels.r.type == ISL_UINT || + src_fmtl->channels.r.type == ISL_UNORM); + assert(dst_fmtl->channels.r.type == ISL_UINT || + dst_fmtl->channels.r.type == ISL_UNORM); + + nir_ssa_def *packed = nir_imm_int(b, 0); + for (unsigned c = 0; c < 4; c++) { + if (src_fmtl->channels_array[c].bits == 0) + continue; - /* They must be in RGBA order (possibly with channels missing) */ - assert(src_fmtl->channels.r.start_bit == 0); - assert(dst_fmtl->channels.r.start_bit == 0); + const unsigned chan_start_bit = src_fmtl->channels_array[c].start_bit; + const unsigned chan_bits = src_fmtl->channels_array[c].bits; - if (src_fmtl->bpb <= 32) { - const unsigned src_channels = - isl_format_get_num_channels(key->src_format); - const unsigned src_bits[4] = { - src_fmtl->channels.r.bits, - src_fmtl->channels.g.bits, - src_fmtl->channels.b.bits, - src_fmtl->channels.a.bits, - }; - const unsigned dst_channels = - isl_format_get_num_channels(key->dst_format); - const unsigned dst_bits[4] = { - dst_fmtl->channels.r.bits, - dst_fmtl->channels.g.bits, - dst_fmtl->channels.b.bits, - dst_fmtl->channels.a.bits, - }; - nir_ssa_def *packed = - nir_format_pack_uint_unmasked(b, color, src_bits, src_channels); - color = nir_format_unpack_uint(b, packed, dst_bits, dst_channels); + nir_ssa_def *chan = nir_channel(b, color, c); + if (src_fmtl->channels_array[c].type == ISL_UNORM) + chan = nir_format_float_to_unorm(b, chan, &chan_bits); + + packed = nir_ior(b, packed, nir_shift(b, chan, chan_start_bit)); + } + + nir_ssa_def *chans[4] = { }; + for (unsigned c = 0; c < 4; c++) { + if (dst_fmtl->channels_array[c].bits == 0) { + chans[c] = nir_imm_int(b, 0); + continue; + } + + const unsigned chan_start_bit = dst_fmtl->channels_array[c].start_bit; + const unsigned chan_bits = dst_fmtl->channels_array[c].bits; + chans[c] = nir_iand(b, nir_shift(b, packed, -(int)chan_start_bit), + nir_imm_int(b, BITFIELD_MASK(chan_bits))); + + if (dst_fmtl->channels_array[c].type == ISL_UNORM) + chans[c] = nir_format_unorm_to_float(b, chans[c], &chan_bits); + } + color = nir_vec(b, chans, 4); } else { + /* This path only supports UINT formats */ + assert(src_fmtl->channels.r.type == ISL_UINT); + assert(dst_fmtl->channels.r.type == ISL_UINT); + const unsigned src_bpc = src_fmtl->channels.r.bits; const unsigned dst_bpc = dst_fmtl->channels.r.bits; @@ -929,7 +941,7 @@ bit_cast_color(struct nir_builder *b, nir_ssa_def *color, isl_format_get_num_channels(key->src_format); color = nir_channels(b, color, (1 << src_channels) - 1); - color = nir_format_bitcast_uint_vec_unmasked(b, color, src_bpc, dst_bpc); + color = nir_format_bitcast_uvec_unmasked(b, color, src_bpc, dst_bpc); } /* Blorp likes to assume that colors are vec4s */ @@ -984,15 +996,17 @@ convert_color(struct nir_builder *b, nir_ssa_def *color, nir_ssa_def *value; if (key->dst_format == ISL_FORMAT_R24_UNORM_X8_TYPELESS) { - /* The destination image is bound as R32_UNORM but the data needs to be + /* The destination image is bound as R32_UINT but the data needs to be * in R24_UNORM_X8_TYPELESS. The bottom 24 are the actual data and the * top 8 need to be zero. We can accomplish this by simply multiplying * by a factor to scale things down. */ - float factor = (float)((1 << 24) - 1) / (float)UINT32_MAX; - value = nir_fmul(b, nir_fsat(b, nir_channel(b, color, 0)), - nir_imm_float(b, factor)); + unsigned factor = (1 << 24) - 1; + value = nir_fsat(b, nir_channel(b, color, 0)); + value = nir_f2i32(b, nir_fmul(b, value, nir_imm_float(b, factor))); } else if (key->dst_format == ISL_FORMAT_L8_UNORM_SRGB) { + value = nir_format_linear_to_srgb(b, nir_channel(b, color, 0)); + } else if (key->dst_format == ISL_FORMAT_R8G8B8_UNORM_SRGB) { value = nir_format_linear_to_srgb(b, color); } else if (key->dst_format == ISL_FORMAT_R9G9B9E5_SHAREDEXP) { value = nir_format_pack_r9g9b9e5(b, color); @@ -1000,8 +1014,14 @@ convert_color(struct nir_builder *b, nir_ssa_def *color, unreachable("Unsupported format conversion"); } - nir_ssa_def *u = nir_ssa_undef(b, 1, 32); - return nir_vec4(b, value, u, u, u); + nir_ssa_def *out_comps[4]; + for (unsigned i = 0; i < 4; i++) { + if (i < value->num_components) + out_comps[i] = nir_channel(b, value, i); + else + out_comps[i] = nir_ssa_undef(b, 1, 32); + } + return nir_vec(b, out_comps, 4); } /** @@ -1179,7 +1199,7 @@ brw_blorp_build_nir_shader(struct blorp_context *blorp, void *mem_ctx, (key->dst_samples <= 1)); nir_builder b; - nir_builder_init_simple_shader(&b, mem_ctx, MESA_SHADER_FRAGMENT, NULL); + blorp_nir_init_shader(&b, mem_ctx, MESA_SHADER_FRAGMENT, NULL); struct brw_blorp_blit_vars v; brw_blorp_blit_vars_init(&b, &v, key); @@ -1310,7 +1330,7 @@ brw_blorp_build_nir_shader(struct blorp_context *blorp, void *mem_ctx, color = blorp_nir_txf(&b, &v, src_pos, key->texture_data_type); } else { nir_ssa_def *mcs = NULL; - if (key->tex_aux_usage == ISL_AUX_USAGE_MCS) + if (isl_aux_usage_has_mcs(key->tex_aux_usage)) mcs = blorp_blit_txf_ms_mcs(&b, &v, src_pos); color = blorp_nir_txf_ms(&b, &v, src_pos, mcs, key->texture_data_type); @@ -1332,6 +1352,8 @@ brw_blorp_build_nir_shader(struct blorp_context *blorp, void *mem_ctx, break; case BLORP_FILTER_AVERAGE: + case BLORP_FILTER_MIN_SAMPLE: + case BLORP_FILTER_MAX_SAMPLE: assert(!key->src_tiled_w); assert(key->tex_samples == key->src_samples); assert(key->tex_layout == key->src_layout); @@ -1350,15 +1372,17 @@ brw_blorp_build_nir_shader(struct blorp_context *blorp, void *mem_ctx, * to multiply our X and Y coordinates each by 2 and then add 1. */ assert(key->src_coords_normalized); + assert(key->filter == BLORP_FILTER_AVERAGE); src_pos = nir_fadd(&b, nir_i2f32(&b, src_pos), nir_imm_float(&b, 0.5f)); color = blorp_nir_tex(&b, &v, key, src_pos); } else { /* Gen7+ hardware doesn't automaticaly blend. */ - color = blorp_nir_manual_blend_average(&b, &v, src_pos, key->src_samples, - key->tex_aux_usage, - key->texture_data_type); + color = blorp_nir_combine_samples(&b, &v, src_pos, key->src_samples, + key->tex_aux_usage, + key->texture_data_type, + key->filter); } break; @@ -1382,6 +1406,29 @@ brw_blorp_build_nir_shader(struct blorp_context *blorp, void *mem_ctx, color = bit_cast_color(&b, color, key); } else if (key->dst_format) { color = convert_color(&b, color, key); + } else if (key->uint32_to_sint) { + /* Normally the hardware will take care of converting values from/to + * the source and destination formats. But a few cases need help. + * + * The Skylake PRM, volume 07, page 658 has a programming note: + * + * "When using SINT or UINT rendertarget surface formats, Blending + * must be DISABLED. The Pre-Blend Color Clamp Enable and Color + * Clamp Range fields are ignored, and an implied clamp to the + * rendertarget surface format is performed." + * + * For UINT to SINT blits, our sample operation gives us a uint32_t, + * but our render target write expects a signed int32_t number. If we + * simply passed the value along, the hardware would interpret a value + * with bit 31 set as a negative value, clamping it to the largest + * negative number the destination format could represent. But the + * actual source value is a positive number, so we want to clamp it + * to INT_MAX. To fix this, we explicitly take min(color, INT_MAX). + */ + color = nir_umin(&b, color, nir_imm_int(&b, INT32_MAX)); + } else if (key->sint32_to_uint) { + /* Similar to above, but clamping negative numbers to zero. */ + color = nir_imax(&b, color, nir_imm_int(&b, 0)); } if (key->dst_rgb) { @@ -1403,17 +1450,39 @@ brw_blorp_build_nir_shader(struct blorp_context *blorp, void *mem_ctx, color = nir_vec4(&b, color_component, u, u, u); } - nir_store_var(&b, v.color_out, color, 0xf); + if (key->dst_usage == ISL_SURF_USAGE_RENDER_TARGET_BIT) { + nir_variable *color_out = + nir_variable_create(b.shader, nir_var_shader_out, + glsl_vec4_type(), "gl_FragColor"); + color_out->data.location = FRAG_RESULT_COLOR; + nir_store_var(&b, color_out, color, 0xf); + } else if (key->dst_usage == ISL_SURF_USAGE_DEPTH_BIT) { + nir_variable *depth_out = + nir_variable_create(b.shader, nir_var_shader_out, + glsl_float_type(), "gl_FragDepth"); + depth_out->data.location = FRAG_RESULT_DEPTH; + nir_store_var(&b, depth_out, nir_channel(&b, color, 0), 0x1); + } else if (key->dst_usage == ISL_SURF_USAGE_STENCIL_BIT) { + nir_variable *stencil_out = + nir_variable_create(b.shader, nir_var_shader_out, + glsl_int_type(), "gl_FragStencilRef"); + stencil_out->data.location = FRAG_RESULT_STENCIL; + nir_store_var(&b, stencil_out, nir_channel(&b, color, 0), 0x1); + } else { + unreachable("Invalid destination usage"); + } return b.shader; } static bool -brw_blorp_get_blit_kernel(struct blorp_context *blorp, +brw_blorp_get_blit_kernel(struct blorp_batch *batch, struct blorp_params *params, const struct brw_blorp_blit_prog_key *prog_key) { - if (blorp->lookup_shader(blorp, prog_key, sizeof(*prog_key), + struct blorp_context *blorp = batch->blorp; + + if (blorp->lookup_shader(batch, prog_key, sizeof(*prog_key), ¶ms->wm_prog_kernel, ¶ms->wm_prog_data)) return true; @@ -1427,16 +1496,17 @@ brw_blorp_get_blit_kernel(struct blorp_context *blorp, struct brw_wm_prog_key wm_key; brw_blorp_init_wm_prog_key(&wm_key); - wm_key.tex.compressed_multisample_layout_mask = - prog_key->tex_aux_usage == ISL_AUX_USAGE_MCS; - wm_key.tex.msaa_16 = prog_key->tex_samples == 16; + wm_key.base.tex.compressed_multisample_layout_mask = + isl_aux_usage_has_mcs(prog_key->tex_aux_usage); + wm_key.base.tex.msaa_16 = prog_key->tex_samples == 16; wm_key.multisample_fbo = prog_key->rt_samples > 1; program = blorp_compile_fs(blorp, mem_ctx, nir, &wm_key, false, &prog_data); bool result = - blorp->upload_shader(blorp, prog_key, sizeof(*prog_key), + blorp->upload_shader(batch, MESA_SHADER_FRAGMENT, + prog_key, sizeof(*prog_key), program, prog_data.base.program_size, &prog_data.base, sizeof(prog_data), ¶ms->wm_prog_kernel, ¶ms->wm_prog_data); @@ -1499,6 +1569,9 @@ blorp_surf_convert_to_single_slice(const struct isl_device *isl_dev, { bool ok UNUSED; + /* It would be insane to try and do this on a compressed surface */ + assert(info->aux_usage == ISL_AUX_USAGE_NONE); + /* Just bail if we have nothing to do. */ if (info->surf.dim == ISL_SURF_DIM_2D && info->view.base_level == 0 && info->view.base_array_layer == 0 && @@ -1544,9 +1617,9 @@ blorp_surf_convert_to_single_slice(const struct isl_device *isl_dev, info->z_offset = 0; } -static void -surf_fake_interleaved_msaa(const struct isl_device *isl_dev, - struct brw_blorp_surface_info *info) +void +blorp_surf_fake_interleaved_msaa(const struct isl_device *isl_dev, + struct brw_blorp_surface_info *info) { assert(info->surf.msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED); @@ -1558,9 +1631,9 @@ surf_fake_interleaved_msaa(const struct isl_device *isl_dev, info->surf.msaa_layout = ISL_MSAA_LAYOUT_NONE; } -static void -surf_retile_w_to_y(const struct isl_device *isl_dev, - struct brw_blorp_surface_info *info) +void +blorp_surf_retile_w_to_y(const struct isl_device *isl_dev, + struct brw_blorp_surface_info *info) { assert(info->surf.tiling == ISL_TILING_W); @@ -1574,7 +1647,7 @@ surf_retile_w_to_y(const struct isl_device *isl_dev, */ if (isl_dev->info->gen > 6 && info->surf.msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) { - surf_fake_interleaved_msaa(isl_dev, info); + blorp_surf_fake_interleaved_msaa(isl_dev, info); } if (isl_dev->info->gen == 6) { @@ -1623,20 +1696,12 @@ can_shrink_surface(const struct brw_blorp_surface_info *surf) return true; } -static bool -can_shrink_surfaces(const struct blorp_params *params) -{ - return - can_shrink_surface(¶ms->src) && - can_shrink_surface(¶ms->dst); -} - static unsigned get_max_surface_size(const struct gen_device_info *devinfo, - const struct blorp_params *params) + const struct brw_blorp_surface_info *surf) { const unsigned max = devinfo->gen >= 7 ? 16384 : 8192; - if (split_blorp_blit_debug && can_shrink_surfaces(params)) + if (split_blorp_blit_debug && can_shrink_surface(surf)) return max >> 4; /* A smaller restriction when debug is enabled */ else return max; @@ -1724,8 +1789,10 @@ surf_fake_rgb_with_red(const struct isl_device *isl_dev, enum blit_shrink_status { BLIT_NO_SHRINK = 0, - BLIT_WIDTH_SHRINK = 1, - BLIT_HEIGHT_SHRINK = 2, + BLIT_SRC_WIDTH_SHRINK = (1 << 0), + BLIT_DST_WIDTH_SHRINK = (1 << 1), + BLIT_SRC_HEIGHT_SHRINK = (1 << 2), + BLIT_DST_HEIGHT_SHRINK = (1 << 3), }; /* Try to blit. If the surface parameters exceed the size allowed by hardware, @@ -1740,6 +1807,30 @@ try_blorp_blit(struct blorp_batch *batch, { const struct gen_device_info *devinfo = batch->blorp->isl_dev->info; + if (params->dst.surf.usage & ISL_SURF_USAGE_DEPTH_BIT) { + if (devinfo->gen >= 7) { + /* We can render as depth on Gen5 but there's no real advantage since + * it doesn't support MSAA or HiZ. On Gen4, we can't always render + * to depth due to issues with depth buffers and mip-mapping. On + * Gen6, we can do everything but we have weird offsetting for HiZ + * and stencil. It's easier to just render using the color pipe + * on those platforms. + */ + wm_prog_key->dst_usage = ISL_SURF_USAGE_DEPTH_BIT; + } else { + wm_prog_key->dst_usage = ISL_SURF_USAGE_RENDER_TARGET_BIT; + } + } else if (params->dst.surf.usage & ISL_SURF_USAGE_STENCIL_BIT) { + assert(params->dst.surf.format == ISL_FORMAT_R8_UINT); + if (devinfo->gen >= 9) { + wm_prog_key->dst_usage = ISL_SURF_USAGE_STENCIL_BIT; + } else { + wm_prog_key->dst_usage = ISL_SURF_USAGE_RENDER_TARGET_BIT; + } + } else { + wm_prog_key->dst_usage = ISL_SURF_USAGE_RENDER_TARGET_BIT; + } + if (isl_format_has_sint_channel(params->src.view.format)) { wm_prog_key->texture_data_type = nir_type_int; } else if (isl_format_has_uint_channel(params->src.view.format)) { @@ -1794,6 +1885,7 @@ try_blorp_blit(struct blorp_batch *batch, } if (devinfo->gen > 6 && + !isl_surf_usage_is_depth_or_stencil(wm_prog_key->dst_usage) && params->dst.surf.msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) { assert(params->dst.surf.samples > 1); @@ -1816,13 +1908,14 @@ try_blorp_blit(struct blorp_batch *batch, params->x1 = ALIGN(params->x1, 2) * px_size_sa.width; params->y1 = ALIGN(params->y1, 2) * px_size_sa.height; - surf_fake_interleaved_msaa(batch->blorp->isl_dev, ¶ms->dst); + blorp_surf_fake_interleaved_msaa(batch->blorp->isl_dev, ¶ms->dst); wm_prog_key->use_kill = true; wm_prog_key->need_dst_offset = true; } - if (params->dst.surf.tiling == ISL_TILING_W) { + if (params->dst.surf.tiling == ISL_TILING_W && + wm_prog_key->dst_usage != ISL_SURF_USAGE_STENCIL_BIT) { /* We must modify the rectangle we send through the rendering pipeline * (and the size and x/y offset of the destination surface), to account * for the fact that we are mapping it as Y-tiled when it is in fact @@ -1877,7 +1970,7 @@ try_blorp_blit(struct blorp_batch *batch, params->y1 = ALIGN(params->y1, y_align) / 2; /* Retile the surface to Y-tiled */ - surf_retile_w_to_y(batch->blorp->isl_dev, ¶ms->dst); + blorp_surf_retile_w_to_y(batch->blorp->isl_dev, ¶ms->dst); wm_prog_key->dst_tiled_w = true; wm_prog_key->use_kill = true; @@ -1903,7 +1996,7 @@ try_blorp_blit(struct blorp_batch *batch, * * TODO: what if this makes the texture size too large? */ - surf_retile_w_to_y(batch->blorp->isl_dev, ¶ms->src); + blorp_surf_retile_w_to_y(batch->blorp->isl_dev, ¶ms->src); wm_prog_key->src_tiled_w = true; wm_prog_key->need_src_offset = true; @@ -1956,7 +2049,7 @@ try_blorp_blit(struct blorp_batch *batch, /* If it happens to be sRGB, we need to force a conversion */ if (params->dst.view.format == ISL_FORMAT_R8G8B8_UNORM_SRGB) - wm_prog_key->dst_format = ISL_FORMAT_R9G9B9E5_SHAREDEXP; + wm_prog_key->dst_format = ISL_FORMAT_R8G8B8_UNORM_SRGB; surf_fake_rgb_with_red(batch->blorp->isl_dev, ¶ms->dst); @@ -1966,9 +2059,10 @@ try_blorp_blit(struct blorp_batch *batch, /* We can handle RGBX formats easily enough by treating them as RGBA */ params->dst.view.format = isl_format_rgbx_to_rgba(params->dst.view.format); - } else if (params->dst.view.format == ISL_FORMAT_R24_UNORM_X8_TYPELESS) { + } else if (params->dst.view.format == ISL_FORMAT_R24_UNORM_X8_TYPELESS && + wm_prog_key->dst_usage != ISL_SURF_USAGE_DEPTH_BIT) { wm_prog_key->dst_format = params->dst.view.format; - params->dst.view.format = ISL_FORMAT_R32_UNORM; + params->dst.view.format = ISL_FORMAT_R32_UINT; } else if (params->dst.view.format == ISL_FORMAT_A4B4G4R4_UNORM) { params->dst.view.swizzle = isl_swizzle_compose(params->dst.view.swizzle, @@ -2018,22 +2112,35 @@ try_blorp_blit(struct blorp_batch *batch, /* For some texture types, we need to pass the layer through the sampler. */ params->wm_inputs.src_z = params->src.z_offset; - if (!brw_blorp_get_blit_kernel(batch->blorp, params, wm_prog_key)) + if (!brw_blorp_get_blit_kernel(batch, params, wm_prog_key)) return 0; - if (!blorp_ensure_sf_program(batch->blorp, params)) + if (!blorp_ensure_sf_program(batch, params)) return 0; unsigned result = 0; - unsigned max_surface_size = get_max_surface_size(devinfo, params); - if (params->src.surf.logical_level0_px.width > max_surface_size || - params->dst.surf.logical_level0_px.width > max_surface_size) - result |= BLIT_WIDTH_SHRINK; - if (params->src.surf.logical_level0_px.height > max_surface_size || - params->dst.surf.logical_level0_px.height > max_surface_size) - result |= BLIT_HEIGHT_SHRINK; + unsigned max_src_surface_size = get_max_surface_size(devinfo, ¶ms->src); + if (params->src.surf.logical_level0_px.width > max_src_surface_size) + result |= BLIT_SRC_WIDTH_SHRINK; + if (params->src.surf.logical_level0_px.height > max_src_surface_size) + result |= BLIT_SRC_HEIGHT_SHRINK; + + unsigned max_dst_surface_size = get_max_surface_size(devinfo, ¶ms->dst); + if (params->dst.surf.logical_level0_px.width > max_dst_surface_size) + result |= BLIT_DST_WIDTH_SHRINK; + if (params->dst.surf.logical_level0_px.height > max_dst_surface_size) + result |= BLIT_DST_HEIGHT_SHRINK; if (result == 0) { + if (wm_prog_key->dst_usage == ISL_SURF_USAGE_DEPTH_BIT) { + params->depth = params->dst; + memset(¶ms->dst, 0, sizeof(params->dst)); + } else if (wm_prog_key->dst_usage == ISL_SURF_USAGE_STENCIL_BIT) { + params->stencil = params->dst; + params->stencil_mask = 0xff; + memset(¶ms->dst, 0, sizeof(params->dst)); + } + batch->blorp->exec(batch, params); } @@ -2090,7 +2197,7 @@ shrink_surface_params(const struct isl_device *dev, x_offset_sa = (uint32_t)*x0 * px_size_sa.w + info->tile_x_sa; y_offset_sa = (uint32_t)*y0 * px_size_sa.h + info->tile_y_sa; isl_tiling_get_intratile_offset_sa(info->surf.tiling, - info->surf.format, info->surf.row_pitch, + info->surf.format, info->surf.row_pitch_B, x_offset_sa, y_offset_sa, &byte_offset, &info->tile_x_sa, &info->tile_y_sa); @@ -2116,23 +2223,6 @@ shrink_surface_params(const struct isl_device *dev, info->surf.phys_level0_sa.height = size * px_size_sa.h; } -static void -shrink_surfaces(const struct isl_device *dev, - struct blorp_params *params, - struct brw_blorp_blit_prog_key *wm_prog_key, - struct blt_coords *coords) -{ - /* Shrink source surface */ - shrink_surface_params(dev, ¶ms->src, &coords->x.src0, &coords->x.src1, - &coords->y.src0, &coords->y.src1); - wm_prog_key->need_src_offset = false; - - /* Shrink destination surface */ - shrink_surface_params(dev, ¶ms->dst, &coords->x.dst0, &coords->x.dst1, - &coords->y.dst0, &coords->y.dst1); - wm_prog_key->need_dst_offset = false; -} - static void do_blorp_blit(struct blorp_batch *batch, const struct blorp_params *orig_params, @@ -2151,33 +2241,60 @@ do_blorp_blit(struct blorp_batch *batch, if (orig->y.mirror) y_scale = -y_scale; + enum blit_shrink_status shrink = BLIT_NO_SHRINK; + if (split_blorp_blit_debug) { + if (can_shrink_surface(&orig_params->src)) + shrink |= BLIT_SRC_WIDTH_SHRINK | BLIT_SRC_HEIGHT_SHRINK; + if (can_shrink_surface(&orig_params->dst)) + shrink |= BLIT_DST_WIDTH_SHRINK | BLIT_DST_HEIGHT_SHRINK; + } + bool x_done, y_done; - bool shrink = split_blorp_blit_debug && can_shrink_surfaces(orig_params); do { params = *orig_params; blit_coords = split_coords; - if (shrink) - shrink_surfaces(batch->blorp->isl_dev, ¶ms, wm_prog_key, - &blit_coords); + + if (shrink & (BLIT_SRC_WIDTH_SHRINK | BLIT_SRC_HEIGHT_SHRINK)) { + shrink_surface_params(batch->blorp->isl_dev, ¶ms.src, + &blit_coords.x.src0, &blit_coords.x.src1, + &blit_coords.y.src0, &blit_coords.y.src1); + wm_prog_key->need_src_offset = false; + } + + if (shrink & (BLIT_DST_WIDTH_SHRINK | BLIT_DST_HEIGHT_SHRINK)) { + shrink_surface_params(batch->blorp->isl_dev, ¶ms.dst, + &blit_coords.x.dst0, &blit_coords.x.dst1, + &blit_coords.y.dst0, &blit_coords.y.dst1); + wm_prog_key->need_dst_offset = false; + } + enum blit_shrink_status result = try_blorp_blit(batch, ¶ms, wm_prog_key, &blit_coords); - if (result & BLIT_WIDTH_SHRINK) { + if (result & (BLIT_SRC_WIDTH_SHRINK | BLIT_SRC_HEIGHT_SHRINK)) + assert(can_shrink_surface(&orig_params->src)); + + if (result & (BLIT_DST_WIDTH_SHRINK | BLIT_DST_HEIGHT_SHRINK)) + assert(can_shrink_surface(&orig_params->dst)); + + if (result & (BLIT_SRC_WIDTH_SHRINK | BLIT_DST_WIDTH_SHRINK)) { w /= 2.0; assert(w >= 1.0); split_coords.x.dst1 = MIN2(split_coords.x.dst0 + w, orig->x.dst1); adjust_split_source_coords(&orig->x, &split_coords.x, x_scale); } - if (result & BLIT_HEIGHT_SHRINK) { + if (result & (BLIT_SRC_HEIGHT_SHRINK | BLIT_DST_HEIGHT_SHRINK)) { h /= 2.0; assert(h >= 1.0); split_coords.y.dst1 = MIN2(split_coords.y.dst0 + h, orig->y.dst1); adjust_split_source_coords(&orig->y, &split_coords.y, y_scale); } - if (result != 0) { - assert(can_shrink_surfaces(orig_params)); - shrink = true; + if (result) { + /* We may get less bits set on result than we had already, so make + * sure we remember all the ways in which a resize is required. + */ + shrink |= result; continue; } @@ -2211,7 +2328,8 @@ blorp_blit(struct blorp_batch *batch, float src_x1, float src_y1, float dst_x0, float dst_y0, float dst_x1, float dst_y1, - GLenum filter, bool mirror_x, bool mirror_y) + enum blorp_filter filter, + bool mirror_x, bool mirror_y) { struct blorp_params params; blorp_params_init(¶ms); @@ -2239,15 +2357,20 @@ blorp_blit(struct blorp_batch *batch, params.src.view.swizzle = src_swizzle; params.dst.view.swizzle = dst_swizzle; + const struct isl_format_layout *src_fmtl = + isl_format_get_layout(params.src.view.format); + struct brw_blorp_blit_prog_key wm_prog_key = { - .shader_type = BLORP_SHADER_TYPE_BLIT + .shader_type = BLORP_SHADER_TYPE_BLIT, + .filter = filter, + .sint32_to_uint = src_fmtl->channels.r.bits == 32 && + isl_format_has_sint_channel(params.src.view.format) && + isl_format_has_uint_channel(params.dst.view.format), + .uint32_to_sint = src_fmtl->channels.r.bits == 32 && + isl_format_has_uint_channel(params.src.view.format) && + isl_format_has_sint_channel(params.dst.view.format), }; - /* Scaled blitting or not. */ - const bool blit_scaled = - ((dst_x1 - dst_x0) == (src_x1 - src_x0) && - (dst_y1 - dst_y0) == (src_y1 - src_y0)) ? false : true; - /* Scaling factors used for bilinear filtering in multisample scaled * blits. */ @@ -2257,39 +2380,6 @@ blorp_blit(struct blorp_batch *batch, wm_prog_key.x_scale = 2.0f; wm_prog_key.y_scale = params.src.surf.samples / wm_prog_key.x_scale; - const bool bilinear_filter = filter == GL_LINEAR && - params.src.surf.samples <= 1 && - params.dst.surf.samples <= 1; - - /* If we are downsampling a non-integer color buffer, blend. - * - * Regarding integer color buffers, the OpenGL ES 3.2 spec says: - * - * "If the source formats are integer types or stencil values, a - * single sample's value is selected for each pixel." - * - * This implies we should not blend in that case. - */ - const bool blend = - (params.src.surf.usage & ISL_SURF_USAGE_DEPTH_BIT) == 0 && - (params.src.surf.usage & ISL_SURF_USAGE_STENCIL_BIT) == 0 && - !isl_format_has_int_channel(params.src.surf.format) && - params.src.surf.samples > 1 && - params.dst.surf.samples <= 1; - - if (blend && !blit_scaled) { - wm_prog_key.filter = BLORP_FILTER_AVERAGE; - } else if (blend && blit_scaled) { - wm_prog_key.filter = BLORP_FILTER_BILINEAR; - } else if (bilinear_filter) { - wm_prog_key.filter = BLORP_FILTER_BILINEAR; - } else { - if (params.src.surf.samples > 1) - wm_prog_key.filter = BLORP_FILTER_SAMPLE_0; - else - wm_prog_key.filter = BLORP_FILTER_NEAREST; - } - params.wm_inputs.rect_grid.x1 = minify(params.src.surf.logical_level0_px.width, src_level) * wm_prog_key.x_scale - 1.0f; @@ -2379,7 +2469,7 @@ get_copy_format_for_bpb(const struct isl_device *isl_dev, unsigned bpb) * operation between the two bit layouts. */ static enum isl_format -get_ccs_compatible_uint_format(const struct isl_format_layout *fmtl) +get_ccs_compatible_copy_format(const struct isl_format_layout *fmtl) { switch (fmtl->format) { case ISL_FORMAT_R32G32B32A32_FLOAT: @@ -2436,9 +2526,49 @@ get_ccs_compatible_uint_format(const struct isl_format_layout *fmtl) case ISL_FORMAT_B10G10R10A2_UNORM: case ISL_FORMAT_B10G10R10A2_UNORM_SRGB: case ISL_FORMAT_R10G10B10A2_UNORM: + case ISL_FORMAT_R10G10B10A2_UNORM_SRGB: + case ISL_FORMAT_R10G10B10_FLOAT_A2_UNORM: case ISL_FORMAT_R10G10B10A2_UINT: return ISL_FORMAT_R10G10B10A2_UINT; + case ISL_FORMAT_R16_UNORM: + case ISL_FORMAT_R16_SNORM: + case ISL_FORMAT_R16_SINT: + case ISL_FORMAT_R16_UINT: + case ISL_FORMAT_R16_FLOAT: + return ISL_FORMAT_R16_UINT; + + case ISL_FORMAT_R8G8_UNORM: + case ISL_FORMAT_R8G8_SNORM: + case ISL_FORMAT_R8G8_SINT: + case ISL_FORMAT_R8G8_UINT: + return ISL_FORMAT_R8G8_UINT; + + case ISL_FORMAT_B5G5R5X1_UNORM: + case ISL_FORMAT_B5G5R5X1_UNORM_SRGB: + case ISL_FORMAT_B5G5R5A1_UNORM: + case ISL_FORMAT_B5G5R5A1_UNORM_SRGB: + return ISL_FORMAT_B5G5R5A1_UNORM; + + case ISL_FORMAT_A4B4G4R4_UNORM: + case ISL_FORMAT_B4G4R4A4_UNORM: + case ISL_FORMAT_B4G4R4A4_UNORM_SRGB: + return ISL_FORMAT_B4G4R4A4_UNORM; + + case ISL_FORMAT_B5G6R5_UNORM: + case ISL_FORMAT_B5G6R5_UNORM_SRGB: + return ISL_FORMAT_B5G6R5_UNORM; + + case ISL_FORMAT_A1B5G5R5_UNORM: + return ISL_FORMAT_A1B5G5R5_UNORM; + + case ISL_FORMAT_A8_UNORM: + case ISL_FORMAT_R8_UNORM: + case ISL_FORMAT_R8_SNORM: + case ISL_FORMAT_R8_SINT: + case ISL_FORMAT_R8_UINT: + return ISL_FORMAT_R8_UINT; + default: unreachable("Not a compressible format"); } @@ -2482,15 +2612,8 @@ blorp_surf_convert_to_uncompressed(const struct isl_device *isl_dev, *y /= fmtl->bh; } - info->surf.logical_level0_px.width = - DIV_ROUND_UP(info->surf.logical_level0_px.width, fmtl->bw); - info->surf.logical_level0_px.height = - DIV_ROUND_UP(info->surf.logical_level0_px.height, fmtl->bh); - - assert(info->surf.phys_level0_sa.width % fmtl->bw == 0); - assert(info->surf.phys_level0_sa.height % fmtl->bh == 0); - info->surf.phys_level0_sa.width /= fmtl->bw; - info->surf.phys_level0_sa.height /= fmtl->bh; + info->surf.logical_level0_px = isl_surf_get_logical_level0_el(&info->surf); + info->surf.phys_level0_sa = isl_surf_get_phys_level0_el(&info->surf); assert(info->tile_x_sa % fmtl->bw == 0); assert(info->tile_y_sa % fmtl->bh == 0); @@ -2536,24 +2659,42 @@ blorp_copy(struct blorp_batch *batch, isl_format_get_layout(params.dst.surf.format); assert(params.src.aux_usage == ISL_AUX_USAGE_NONE || + params.src.aux_usage == ISL_AUX_USAGE_HIZ || + params.src.aux_usage == ISL_AUX_USAGE_HIZ_CCS_WT || params.src.aux_usage == ISL_AUX_USAGE_MCS || - params.src.aux_usage == ISL_AUX_USAGE_CCS_E); - assert(params.dst.aux_usage == ISL_AUX_USAGE_NONE || - params.dst.aux_usage == ISL_AUX_USAGE_MCS || - params.dst.aux_usage == ISL_AUX_USAGE_CCS_E); - - if (params.dst.aux_usage == ISL_AUX_USAGE_CCS_E) { - params.dst.view.format = get_ccs_compatible_uint_format(dst_fmtl); - if (params.src.aux_usage == ISL_AUX_USAGE_CCS_E) { - params.src.view.format = get_ccs_compatible_uint_format(src_fmtl); + params.src.aux_usage == ISL_AUX_USAGE_MCS_CCS || + params.src.aux_usage == ISL_AUX_USAGE_CCS_E || + params.src.aux_usage == ISL_AUX_USAGE_GEN12_CCS_E || + params.src.aux_usage == ISL_AUX_USAGE_STC_CCS); + + if (isl_aux_usage_has_hiz(params.src.aux_usage)) { + /* In order to use HiZ, we have to use the real format for the source. + * Depth <-> Color copies are not allowed. + */ + params.src.view.format = params.src.surf.format; + params.dst.view.format = params.src.surf.format; + } else if ((params.dst.surf.usage & ISL_SURF_USAGE_DEPTH_BIT) && + isl_dev->info->gen >= 7) { + /* On Gen7 and higher, we use actual depth writes for blits into depth + * buffers so we need the real format. + */ + params.src.view.format = params.dst.surf.format; + params.dst.view.format = params.dst.surf.format; + } else if (params.dst.aux_usage == ISL_AUX_USAGE_CCS_E || + params.dst.aux_usage == ISL_AUX_USAGE_GEN12_CCS_E) { + params.dst.view.format = get_ccs_compatible_copy_format(dst_fmtl); + if (params.src.aux_usage == ISL_AUX_USAGE_CCS_E || + params.src.aux_usage == ISL_AUX_USAGE_GEN12_CCS_E) { + params.src.view.format = get_ccs_compatible_copy_format(src_fmtl); } else if (src_fmtl->bpb == dst_fmtl->bpb) { params.src.view.format = params.dst.view.format; } else { params.src.view.format = get_copy_format_for_bpb(isl_dev, src_fmtl->bpb); } - } else if (params.src.aux_usage == ISL_AUX_USAGE_CCS_E) { - params.src.view.format = get_ccs_compatible_uint_format(src_fmtl); + } else if (params.src.aux_usage == ISL_AUX_USAGE_CCS_E || + params.src.aux_usage == ISL_AUX_USAGE_GEN12_CCS_E) { + params.src.view.format = get_ccs_compatible_copy_format(src_fmtl); if (src_fmtl->bpb == dst_fmtl->bpb) { params.dst.view.format = params.src.view.format; } else { @@ -2610,9 +2751,9 @@ blorp_copy(struct blorp_batch *batch, * because BLORP likes to treat things as if they have vec4 colors all * the time anyway. */ - if (isl_format_is_rgb(src_cast_format)) + if (isl_format_get_layout(src_cast_format)->bpb % 3 == 0) src_cast_format = isl_format_rgb_to_rgba(src_cast_format); - if (isl_format_is_rgb(dst_cast_format)) + if (isl_format_get_layout(dst_cast_format)->bpb % 3 == 0) dst_cast_format = isl_format_rgb_to_rgba(dst_cast_format); if (src_cast_format != dst_cast_format) { @@ -2715,7 +2856,7 @@ do_buffer_copy(struct blorp_batch *batch, .levels = 1, .array_len = 1, .samples = 1, - .row_pitch = width * block_size, + .row_pitch_B = width * block_size, .usage = ISL_SURF_USAGE_TEXTURE_BIT | ISL_SURF_USAGE_RENDER_TARGET_BIT, .tiling_flags = ISL_TILING_LINEAR_BIT);