#include "util/format_rgb9e5.h"
/* header-only include needed for _mesa_unorm_to_float and friends. */
#include "mesa/main/format_utils.h"
+#include "util/u_math.h"
#define FILE_DEBUG_FLAG DEBUG_BLORP
static const bool split_blorp_blit_debug = false;
-/**
- * Enum to specify the order of arguments in a sampler message
- */
-enum sampler_message_arg
-{
- SAMPLER_MESSAGE_ARG_U_FLOAT,
- SAMPLER_MESSAGE_ARG_V_FLOAT,
- SAMPLER_MESSAGE_ARG_U_INT,
- SAMPLER_MESSAGE_ARG_V_INT,
- SAMPLER_MESSAGE_ARG_R_INT,
- SAMPLER_MESSAGE_ARG_SI_INT,
- SAMPLER_MESSAGE_ARG_MCS_INT,
- SAMPLER_MESSAGE_ARG_ZERO_INT,
-};
-
struct brw_blorp_blit_vars {
/* Input values from brw_blorp_wm_inputs */
nir_variable *v_discard_rect;
nir_variable *v_src_offset;
nir_variable *v_dst_offset;
nir_variable *v_src_inv_size;
-
- /* gl_FragCoord */
- nir_variable *frag_coord;
-
- /* gl_FragColor */
- nir_variable *color_out;
};
static void
LOAD_INPUT(src_inv_size, glsl_vector_type(GLSL_TYPE_FLOAT, 2))
#undef LOAD_INPUT
-
- v->frag_coord = nir_variable_create(b->shader, nir_var_shader_in,
- glsl_vec4_type(), "gl_FragCoord");
- v->frag_coord->data.location = VARYING_SLOT_POS;
- v->frag_coord->data.origin_upper_left = true;
-
- v->color_out = nir_variable_create(b->shader, nir_var_shader_out,
- glsl_vec4_type(), "gl_FragColor");
- v->color_out->data.location = FRAG_RESULT_COLOR;
}
static nir_ssa_def *
const struct brw_blorp_blit_prog_key *key,
struct brw_blorp_blit_vars *v)
{
- nir_ssa_def *coord = nir_f2i32(b, nir_load_var(b, v->frag_coord));
+ nir_ssa_def *coord = nir_f2i32(b, nir_load_frag_coord(b));
/* Account for destination surface intratile offset
*
#ifdef HAVE___BUILTIN_CTZ
return __builtin_ctz(~value);
#else
- return _mesa_bitcount(value & ~(value + 1));
+ return util_bitcount(value & ~(value + 1));
#endif
}
static nir_ssa_def *
-blorp_nir_manual_blend_average(nir_builder *b, struct brw_blorp_blit_vars *v,
- nir_ssa_def *pos, unsigned tex_samples,
- enum isl_aux_usage tex_aux_usage,
- nir_alu_type dst_type)
+blorp_nir_combine_samples(nir_builder *b, struct brw_blorp_blit_vars *v,
+ nir_ssa_def *pos, unsigned tex_samples,
+ enum isl_aux_usage tex_aux_usage,
+ nir_alu_type dst_type,
+ enum blorp_filter filter)
{
/* If non-null, this is the outer-most if statement */
nir_if *outer_if = NULL;
nir_local_variable_create(b->impl, glsl_vec4_type(), "color");
nir_ssa_def *mcs = NULL;
- if (tex_aux_usage == ISL_AUX_USAGE_MCS)
+ if (isl_aux_usage_has_mcs(tex_aux_usage))
mcs = blorp_blit_txf_ms_mcs(b, v, pos);
+ nir_op combine_op;
+ switch (filter) {
+ case BLORP_FILTER_AVERAGE:
+ assert(dst_type == nir_type_float);
+ combine_op = nir_op_fadd;
+ break;
+
+ case BLORP_FILTER_MIN_SAMPLE:
+ switch (dst_type) {
+ case nir_type_int: combine_op = nir_op_imin; break;
+ case nir_type_uint: combine_op = nir_op_umin; break;
+ case nir_type_float: combine_op = nir_op_fmin; break;
+ default: unreachable("Invalid dst_type");
+ }
+ break;
+
+ case BLORP_FILTER_MAX_SAMPLE:
+ switch (dst_type) {
+ case nir_type_int: combine_op = nir_op_imax; break;
+ case nir_type_uint: combine_op = nir_op_umax; break;
+ case nir_type_float: combine_op = nir_op_fmax; break;
+ default: unreachable("Invalid dst_type");
+ }
+ break;
+
+ default:
+ unreachable("Invalid filter");
+ }
+
/* We add together samples using a binary tree structure, e.g. for 4x MSAA:
*
* result = ((sample[0] + sample[1]) + (sample[2] + sample[3])) / 4
nir_ssa_def *texture_data[5];
unsigned stack_depth = 0;
for (unsigned i = 0; i < tex_samples; ++i) {
- assert(stack_depth == _mesa_bitcount(i)); /* Loop invariant */
+ assert(stack_depth == util_bitcount(i)); /* Loop invariant */
/* Push sample i onto the stack */
assert(stack_depth < ARRAY_SIZE(texture_data));
nir_imm_int(b, i));
texture_data[stack_depth++] = blorp_nir_txf_ms(b, v, ms_pos, mcs, dst_type);
- if (i == 0 && tex_aux_usage == ISL_AUX_USAGE_MCS) {
+ if (i == 0 && isl_aux_usage_has_mcs(tex_aux_usage)) {
/* The Ivy Bridge PRM, Vol4 Part1 p27 (Multisample Control Surface)
* suggests an optimization:
*
assert(stack_depth >= 2);
--stack_depth;
- assert(dst_type == nir_type_float);
texture_data[stack_depth - 1] =
- nir_fadd(b, texture_data[stack_depth - 1],
- texture_data[stack_depth]);
+ nir_build_alu(b, combine_op,
+ texture_data[stack_depth - 1],
+ texture_data[stack_depth],
+ NULL, NULL);
}
}
/* We should have just 1 sample on the stack now. */
assert(stack_depth == 1);
- texture_data[0] = nir_fmul(b, texture_data[0],
- nir_imm_float(b, 1.0 / tex_samples));
+ if (filter == BLORP_FILTER_AVERAGE) {
+ assert(dst_type == nir_type_float);
+ texture_data[0] = nir_fmul(b, texture_data[0],
+ nir_imm_float(b, 1.0 / tex_samples));
+ }
nir_store_var(b, color, texture_data[0], 0xf);
return nir_load_var(b, color);
}
-static inline nir_ssa_def *
-nir_imm_vec2(nir_builder *build, float x, float y)
-{
- nir_const_value v;
-
- memset(&v, 0, sizeof(v));
- v.f32[0] = x;
- v.f32[1] = y;
-
- return nir_build_imm(build, 4, 32, v);
-}
-
static nir_ssa_def *
blorp_nir_manual_blend_bilinear(nir_builder *b, nir_ssa_def *pos,
unsigned tex_samples,
* here inside the loop after computing the pixel coordinates.
*/
nir_ssa_def *mcs = NULL;
- if (key->tex_aux_usage == ISL_AUX_USAGE_MCS)
+ if (isl_aux_usage_has_mcs(key->tex_aux_usage))
mcs = blorp_blit_txf_ms_mcs(b, v, sample_coords_int);
/* Compute sample index and map the sample index to a sample number.
* grid of samples with in a pixel. Sample number layout shows the
* rectangular grid of samples roughly corresponding to the real sample
* locations with in a pixel.
+ *
+ * In the case of 2x MSAA, the layout of sample indices is reversed from
+ * the layout of sample numbers:
+ *
+ * sample index layout : --------- sample number layout : ---------
+ * | 0 | 1 | | 1 | 0 |
+ * --------- ---------
+ *
* In case of 4x MSAA, layout of sample indices matches the layout of
* sample numbers:
* ---------
key->x_scale * key->y_scale));
sample = nir_f2i32(b, sample);
- if (tex_samples == 8) {
+ if (tex_samples == 2) {
+ sample = nir_isub(b, nir_imm_int(b, 1), sample);
+ } else if (tex_samples == 8) {
sample = nir_iand(b, nir_ishr(b, nir_imm_int(b, 0x64210573),
nir_ishl(b, sample, nir_imm_int(b, 2))),
nir_imm_int(b, 0xf));
bit_cast_color(struct nir_builder *b, nir_ssa_def *color,
const struct brw_blorp_blit_prog_key *key)
{
- assert(key->texture_data_type == nir_type_uint);
-
if (key->src_format == key->dst_format)
return color;
const struct isl_format_layout *dst_fmtl =
isl_format_get_layout(key->dst_format);
- /* They must be uint formats with the same bit size */
+ /* They must be formats with the same bit size */
assert(src_fmtl->bpb == dst_fmtl->bpb);
- assert(src_fmtl->channels.r.type == ISL_UINT);
- assert(dst_fmtl->channels.r.type == ISL_UINT);
- /* They must be in regular color formats (no luminance or alpha) */
- assert(src_fmtl->channels.r.bits > 0);
- assert(dst_fmtl->channels.r.bits > 0);
+ if (src_fmtl->bpb <= 32) {
+ assert(src_fmtl->channels.r.type == ISL_UINT ||
+ src_fmtl->channels.r.type == ISL_UNORM);
+ assert(dst_fmtl->channels.r.type == ISL_UINT ||
+ dst_fmtl->channels.r.type == ISL_UNORM);
- /* They must be in RGBA order (possibly with channels missing) */
- assert(src_fmtl->channels.r.start_bit == 0);
- assert(dst_fmtl->channels.r.start_bit == 0);
+ nir_ssa_def *packed = nir_imm_int(b, 0);
+ for (unsigned c = 0; c < 4; c++) {
+ if (src_fmtl->channels_array[c].bits == 0)
+ continue;
- if (src_fmtl->bpb <= 32) {
- const unsigned src_channels =
- isl_format_get_num_channels(key->src_format);
- const unsigned src_bits[4] = {
- src_fmtl->channels.r.bits,
- src_fmtl->channels.g.bits,
- src_fmtl->channels.b.bits,
- src_fmtl->channels.a.bits,
- };
- const unsigned dst_channels =
- isl_format_get_num_channels(key->dst_format);
- const unsigned dst_bits[4] = {
- dst_fmtl->channels.r.bits,
- dst_fmtl->channels.g.bits,
- dst_fmtl->channels.b.bits,
- dst_fmtl->channels.a.bits,
- };
- nir_ssa_def *packed =
- nir_format_pack_uint_unmasked(b, color, src_bits, src_channels);
- color = nir_format_unpack_uint(b, packed, dst_bits, dst_channels);
+ const unsigned chan_start_bit = src_fmtl->channels_array[c].start_bit;
+ const unsigned chan_bits = src_fmtl->channels_array[c].bits;
+
+ nir_ssa_def *chan = nir_channel(b, color, c);
+ if (src_fmtl->channels_array[c].type == ISL_UNORM)
+ chan = nir_format_float_to_unorm(b, chan, &chan_bits);
+
+ packed = nir_ior(b, packed, nir_shift(b, chan, chan_start_bit));
+ }
+
+ nir_ssa_def *chans[4] = { };
+ for (unsigned c = 0; c < 4; c++) {
+ if (dst_fmtl->channels_array[c].bits == 0) {
+ chans[c] = nir_imm_int(b, 0);
+ continue;
+ }
+
+ const unsigned chan_start_bit = dst_fmtl->channels_array[c].start_bit;
+ const unsigned chan_bits = dst_fmtl->channels_array[c].bits;
+ chans[c] = nir_iand(b, nir_shift(b, packed, -(int)chan_start_bit),
+ nir_imm_int(b, BITFIELD_MASK(chan_bits)));
+
+ if (dst_fmtl->channels_array[c].type == ISL_UNORM)
+ chans[c] = nir_format_unorm_to_float(b, chans[c], &chan_bits);
+ }
+ color = nir_vec(b, chans, 4);
} else {
+ /* This path only supports UINT formats */
+ assert(src_fmtl->channels.r.type == ISL_UINT);
+ assert(dst_fmtl->channels.r.type == ISL_UINT);
+
const unsigned src_bpc = src_fmtl->channels.r.bits;
const unsigned dst_bpc = dst_fmtl->channels.r.bits;
isl_format_get_num_channels(key->src_format);
color = nir_channels(b, color, (1 << src_channels) - 1);
- color = nir_format_bitcast_uint_vec_unmasked(b, color, src_bpc, dst_bpc);
+ color = nir_format_bitcast_uvec_unmasked(b, color, src_bpc, dst_bpc);
}
/* Blorp likes to assume that colors are vec4s */
nir_ssa_def *value;
if (key->dst_format == ISL_FORMAT_R24_UNORM_X8_TYPELESS) {
- /* The destination image is bound as R32_UNORM but the data needs to be
+ /* The destination image is bound as R32_UINT but the data needs to be
* in R24_UNORM_X8_TYPELESS. The bottom 24 are the actual data and the
* top 8 need to be zero. We can accomplish this by simply multiplying
* by a factor to scale things down.
*/
- float factor = (float)((1 << 24) - 1) / (float)UINT32_MAX;
- value = nir_fmul(b, nir_fsat(b, nir_channel(b, color, 0)),
- nir_imm_float(b, factor));
+ unsigned factor = (1 << 24) - 1;
+ value = nir_fsat(b, nir_channel(b, color, 0));
+ value = nir_f2i32(b, nir_fmul(b, value, nir_imm_float(b, factor)));
} else if (key->dst_format == ISL_FORMAT_L8_UNORM_SRGB) {
value = nir_format_linear_to_srgb(b, nir_channel(b, color, 0));
} else if (key->dst_format == ISL_FORMAT_R8G8B8_UNORM_SRGB) {
(key->dst_samples <= 1));
nir_builder b;
- nir_builder_init_simple_shader(&b, mem_ctx, MESA_SHADER_FRAGMENT, NULL);
+ blorp_nir_init_shader(&b, mem_ctx, MESA_SHADER_FRAGMENT, NULL);
struct brw_blorp_blit_vars v;
brw_blorp_blit_vars_init(&b, &v, key);
color = blorp_nir_txf(&b, &v, src_pos, key->texture_data_type);
} else {
nir_ssa_def *mcs = NULL;
- if (key->tex_aux_usage == ISL_AUX_USAGE_MCS)
+ if (isl_aux_usage_has_mcs(key->tex_aux_usage))
mcs = blorp_blit_txf_ms_mcs(&b, &v, src_pos);
color = blorp_nir_txf_ms(&b, &v, src_pos, mcs, key->texture_data_type);
break;
case BLORP_FILTER_AVERAGE:
+ case BLORP_FILTER_MIN_SAMPLE:
+ case BLORP_FILTER_MAX_SAMPLE:
assert(!key->src_tiled_w);
assert(key->tex_samples == key->src_samples);
assert(key->tex_layout == key->src_layout);
* to multiply our X and Y coordinates each by 2 and then add 1.
*/
assert(key->src_coords_normalized);
+ assert(key->filter == BLORP_FILTER_AVERAGE);
src_pos = nir_fadd(&b,
nir_i2f32(&b, src_pos),
nir_imm_float(&b, 0.5f));
color = blorp_nir_tex(&b, &v, key, src_pos);
} else {
/* Gen7+ hardware doesn't automaticaly blend. */
- color = blorp_nir_manual_blend_average(&b, &v, src_pos, key->src_samples,
- key->tex_aux_usage,
- key->texture_data_type);
+ color = blorp_nir_combine_samples(&b, &v, src_pos, key->src_samples,
+ key->tex_aux_usage,
+ key->texture_data_type,
+ key->filter);
}
break;
color = bit_cast_color(&b, color, key);
} else if (key->dst_format) {
color = convert_color(&b, color, key);
+ } else if (key->uint32_to_sint) {
+ /* Normally the hardware will take care of converting values from/to
+ * the source and destination formats. But a few cases need help.
+ *
+ * The Skylake PRM, volume 07, page 658 has a programming note:
+ *
+ * "When using SINT or UINT rendertarget surface formats, Blending
+ * must be DISABLED. The Pre-Blend Color Clamp Enable and Color
+ * Clamp Range fields are ignored, and an implied clamp to the
+ * rendertarget surface format is performed."
+ *
+ * For UINT to SINT blits, our sample operation gives us a uint32_t,
+ * but our render target write expects a signed int32_t number. If we
+ * simply passed the value along, the hardware would interpret a value
+ * with bit 31 set as a negative value, clamping it to the largest
+ * negative number the destination format could represent. But the
+ * actual source value is a positive number, so we want to clamp it
+ * to INT_MAX. To fix this, we explicitly take min(color, INT_MAX).
+ */
+ color = nir_umin(&b, color, nir_imm_int(&b, INT32_MAX));
+ } else if (key->sint32_to_uint) {
+ /* Similar to above, but clamping negative numbers to zero. */
+ color = nir_imax(&b, color, nir_imm_int(&b, 0));
}
if (key->dst_rgb) {
color = nir_vec4(&b, color_component, u, u, u);
}
- nir_store_var(&b, v.color_out, color, 0xf);
+ if (key->dst_usage == ISL_SURF_USAGE_RENDER_TARGET_BIT) {
+ nir_variable *color_out =
+ nir_variable_create(b.shader, nir_var_shader_out,
+ glsl_vec4_type(), "gl_FragColor");
+ color_out->data.location = FRAG_RESULT_COLOR;
+ nir_store_var(&b, color_out, color, 0xf);
+ } else if (key->dst_usage == ISL_SURF_USAGE_DEPTH_BIT) {
+ nir_variable *depth_out =
+ nir_variable_create(b.shader, nir_var_shader_out,
+ glsl_float_type(), "gl_FragDepth");
+ depth_out->data.location = FRAG_RESULT_DEPTH;
+ nir_store_var(&b, depth_out, nir_channel(&b, color, 0), 0x1);
+ } else if (key->dst_usage == ISL_SURF_USAGE_STENCIL_BIT) {
+ nir_variable *stencil_out =
+ nir_variable_create(b.shader, nir_var_shader_out,
+ glsl_int_type(), "gl_FragStencilRef");
+ stencil_out->data.location = FRAG_RESULT_STENCIL;
+ nir_store_var(&b, stencil_out, nir_channel(&b, color, 0), 0x1);
+ } else {
+ unreachable("Invalid destination usage");
+ }
return b.shader;
}
static bool
-brw_blorp_get_blit_kernel(struct blorp_context *blorp,
+brw_blorp_get_blit_kernel(struct blorp_batch *batch,
struct blorp_params *params,
const struct brw_blorp_blit_prog_key *prog_key)
{
- if (blorp->lookup_shader(blorp, prog_key, sizeof(*prog_key),
+ struct blorp_context *blorp = batch->blorp;
+
+ if (blorp->lookup_shader(batch, prog_key, sizeof(*prog_key),
¶ms->wm_prog_kernel, ¶ms->wm_prog_data))
return true;
struct brw_wm_prog_key wm_key;
brw_blorp_init_wm_prog_key(&wm_key);
- wm_key.tex.compressed_multisample_layout_mask =
- prog_key->tex_aux_usage == ISL_AUX_USAGE_MCS;
- wm_key.tex.msaa_16 = prog_key->tex_samples == 16;
+ wm_key.base.tex.compressed_multisample_layout_mask =
+ isl_aux_usage_has_mcs(prog_key->tex_aux_usage);
+ wm_key.base.tex.msaa_16 = prog_key->tex_samples == 16;
wm_key.multisample_fbo = prog_key->rt_samples > 1;
program = blorp_compile_fs(blorp, mem_ctx, nir, &wm_key, false,
&prog_data);
bool result =
- blorp->upload_shader(blorp, prog_key, sizeof(*prog_key),
+ blorp->upload_shader(batch, MESA_SHADER_FRAGMENT,
+ prog_key, sizeof(*prog_key),
program, prog_data.base.program_size,
&prog_data.base, sizeof(prog_data),
¶ms->wm_prog_kernel, ¶ms->wm_prog_data);
{
bool ok UNUSED;
+ /* It would be insane to try and do this on a compressed surface */
+ assert(info->aux_usage == ISL_AUX_USAGE_NONE);
+
/* Just bail if we have nothing to do. */
if (info->surf.dim == ISL_SURF_DIM_2D &&
info->view.base_level == 0 && info->view.base_array_layer == 0 &&
info->z_offset = 0;
}
-static void
-surf_fake_interleaved_msaa(const struct isl_device *isl_dev,
- struct brw_blorp_surface_info *info)
+void
+blorp_surf_fake_interleaved_msaa(const struct isl_device *isl_dev,
+ struct brw_blorp_surface_info *info)
{
assert(info->surf.msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED);
info->surf.msaa_layout = ISL_MSAA_LAYOUT_NONE;
}
-static void
-surf_retile_w_to_y(const struct isl_device *isl_dev,
- struct brw_blorp_surface_info *info)
+void
+blorp_surf_retile_w_to_y(const struct isl_device *isl_dev,
+ struct brw_blorp_surface_info *info)
{
assert(info->surf.tiling == ISL_TILING_W);
*/
if (isl_dev->info->gen > 6 &&
info->surf.msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) {
- surf_fake_interleaved_msaa(isl_dev, info);
+ blorp_surf_fake_interleaved_msaa(isl_dev, info);
}
if (isl_dev->info->gen == 6) {
return true;
}
-static bool
-can_shrink_surfaces(const struct blorp_params *params)
-{
- return
- can_shrink_surface(¶ms->src) &&
- can_shrink_surface(¶ms->dst);
-}
-
static unsigned
get_max_surface_size(const struct gen_device_info *devinfo,
- const struct blorp_params *params)
+ const struct brw_blorp_surface_info *surf)
{
const unsigned max = devinfo->gen >= 7 ? 16384 : 8192;
- if (split_blorp_blit_debug && can_shrink_surfaces(params))
+ if (split_blorp_blit_debug && can_shrink_surface(surf))
return max >> 4; /* A smaller restriction when debug is enabled */
else
return max;
enum blit_shrink_status {
BLIT_NO_SHRINK = 0,
- BLIT_WIDTH_SHRINK = 1,
- BLIT_HEIGHT_SHRINK = 2,
+ BLIT_SRC_WIDTH_SHRINK = (1 << 0),
+ BLIT_DST_WIDTH_SHRINK = (1 << 1),
+ BLIT_SRC_HEIGHT_SHRINK = (1 << 2),
+ BLIT_DST_HEIGHT_SHRINK = (1 << 3),
};
/* Try to blit. If the surface parameters exceed the size allowed by hardware,
{
const struct gen_device_info *devinfo = batch->blorp->isl_dev->info;
+ if (params->dst.surf.usage & ISL_SURF_USAGE_DEPTH_BIT) {
+ if (devinfo->gen >= 7) {
+ /* We can render as depth on Gen5 but there's no real advantage since
+ * it doesn't support MSAA or HiZ. On Gen4, we can't always render
+ * to depth due to issues with depth buffers and mip-mapping. On
+ * Gen6, we can do everything but we have weird offsetting for HiZ
+ * and stencil. It's easier to just render using the color pipe
+ * on those platforms.
+ */
+ wm_prog_key->dst_usage = ISL_SURF_USAGE_DEPTH_BIT;
+ } else {
+ wm_prog_key->dst_usage = ISL_SURF_USAGE_RENDER_TARGET_BIT;
+ }
+ } else if (params->dst.surf.usage & ISL_SURF_USAGE_STENCIL_BIT) {
+ assert(params->dst.surf.format == ISL_FORMAT_R8_UINT);
+ if (devinfo->gen >= 9) {
+ wm_prog_key->dst_usage = ISL_SURF_USAGE_STENCIL_BIT;
+ } else {
+ wm_prog_key->dst_usage = ISL_SURF_USAGE_RENDER_TARGET_BIT;
+ }
+ } else {
+ wm_prog_key->dst_usage = ISL_SURF_USAGE_RENDER_TARGET_BIT;
+ }
+
if (isl_format_has_sint_channel(params->src.view.format)) {
wm_prog_key->texture_data_type = nir_type_int;
} else if (isl_format_has_uint_channel(params->src.view.format)) {
}
if (devinfo->gen > 6 &&
+ !isl_surf_usage_is_depth_or_stencil(wm_prog_key->dst_usage) &&
params->dst.surf.msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) {
assert(params->dst.surf.samples > 1);
params->x1 = ALIGN(params->x1, 2) * px_size_sa.width;
params->y1 = ALIGN(params->y1, 2) * px_size_sa.height;
- surf_fake_interleaved_msaa(batch->blorp->isl_dev, ¶ms->dst);
+ blorp_surf_fake_interleaved_msaa(batch->blorp->isl_dev, ¶ms->dst);
wm_prog_key->use_kill = true;
wm_prog_key->need_dst_offset = true;
}
- if (params->dst.surf.tiling == ISL_TILING_W) {
+ if (params->dst.surf.tiling == ISL_TILING_W &&
+ wm_prog_key->dst_usage != ISL_SURF_USAGE_STENCIL_BIT) {
/* We must modify the rectangle we send through the rendering pipeline
* (and the size and x/y offset of the destination surface), to account
* for the fact that we are mapping it as Y-tiled when it is in fact
params->y1 = ALIGN(params->y1, y_align) / 2;
/* Retile the surface to Y-tiled */
- surf_retile_w_to_y(batch->blorp->isl_dev, ¶ms->dst);
+ blorp_surf_retile_w_to_y(batch->blorp->isl_dev, ¶ms->dst);
wm_prog_key->dst_tiled_w = true;
wm_prog_key->use_kill = true;
*
* TODO: what if this makes the texture size too large?
*/
- surf_retile_w_to_y(batch->blorp->isl_dev, ¶ms->src);
+ blorp_surf_retile_w_to_y(batch->blorp->isl_dev, ¶ms->src);
wm_prog_key->src_tiled_w = true;
wm_prog_key->need_src_offset = true;
/* We can handle RGBX formats easily enough by treating them as RGBA */
params->dst.view.format =
isl_format_rgbx_to_rgba(params->dst.view.format);
- } else if (params->dst.view.format == ISL_FORMAT_R24_UNORM_X8_TYPELESS) {
+ } else if (params->dst.view.format == ISL_FORMAT_R24_UNORM_X8_TYPELESS &&
+ wm_prog_key->dst_usage != ISL_SURF_USAGE_DEPTH_BIT) {
wm_prog_key->dst_format = params->dst.view.format;
- params->dst.view.format = ISL_FORMAT_R32_UNORM;
+ params->dst.view.format = ISL_FORMAT_R32_UINT;
} else if (params->dst.view.format == ISL_FORMAT_A4B4G4R4_UNORM) {
params->dst.view.swizzle =
isl_swizzle_compose(params->dst.view.swizzle,
/* For some texture types, we need to pass the layer through the sampler. */
params->wm_inputs.src_z = params->src.z_offset;
- if (!brw_blorp_get_blit_kernel(batch->blorp, params, wm_prog_key))
+ if (!brw_blorp_get_blit_kernel(batch, params, wm_prog_key))
return 0;
- if (!blorp_ensure_sf_program(batch->blorp, params))
+ if (!blorp_ensure_sf_program(batch, params))
return 0;
unsigned result = 0;
- unsigned max_surface_size = get_max_surface_size(devinfo, params);
- if (params->src.surf.logical_level0_px.width > max_surface_size ||
- params->dst.surf.logical_level0_px.width > max_surface_size)
- result |= BLIT_WIDTH_SHRINK;
- if (params->src.surf.logical_level0_px.height > max_surface_size ||
- params->dst.surf.logical_level0_px.height > max_surface_size)
- result |= BLIT_HEIGHT_SHRINK;
+ unsigned max_src_surface_size = get_max_surface_size(devinfo, ¶ms->src);
+ if (params->src.surf.logical_level0_px.width > max_src_surface_size)
+ result |= BLIT_SRC_WIDTH_SHRINK;
+ if (params->src.surf.logical_level0_px.height > max_src_surface_size)
+ result |= BLIT_SRC_HEIGHT_SHRINK;
+
+ unsigned max_dst_surface_size = get_max_surface_size(devinfo, ¶ms->dst);
+ if (params->dst.surf.logical_level0_px.width > max_dst_surface_size)
+ result |= BLIT_DST_WIDTH_SHRINK;
+ if (params->dst.surf.logical_level0_px.height > max_dst_surface_size)
+ result |= BLIT_DST_HEIGHT_SHRINK;
if (result == 0) {
+ if (wm_prog_key->dst_usage == ISL_SURF_USAGE_DEPTH_BIT) {
+ params->depth = params->dst;
+ memset(¶ms->dst, 0, sizeof(params->dst));
+ } else if (wm_prog_key->dst_usage == ISL_SURF_USAGE_STENCIL_BIT) {
+ params->stencil = params->dst;
+ params->stencil_mask = 0xff;
+ memset(¶ms->dst, 0, sizeof(params->dst));
+ }
+
batch->blorp->exec(batch, params);
}
x_offset_sa = (uint32_t)*x0 * px_size_sa.w + info->tile_x_sa;
y_offset_sa = (uint32_t)*y0 * px_size_sa.h + info->tile_y_sa;
isl_tiling_get_intratile_offset_sa(info->surf.tiling,
- info->surf.format, info->surf.row_pitch,
+ info->surf.format, info->surf.row_pitch_B,
x_offset_sa, y_offset_sa,
&byte_offset,
&info->tile_x_sa, &info->tile_y_sa);
info->surf.phys_level0_sa.height = size * px_size_sa.h;
}
-static void
-shrink_surfaces(const struct isl_device *dev,
- struct blorp_params *params,
- struct brw_blorp_blit_prog_key *wm_prog_key,
- struct blt_coords *coords)
-{
- /* Shrink source surface */
- shrink_surface_params(dev, ¶ms->src, &coords->x.src0, &coords->x.src1,
- &coords->y.src0, &coords->y.src1);
- wm_prog_key->need_src_offset = false;
-
- /* Shrink destination surface */
- shrink_surface_params(dev, ¶ms->dst, &coords->x.dst0, &coords->x.dst1,
- &coords->y.dst0, &coords->y.dst1);
- wm_prog_key->need_dst_offset = false;
-}
-
static void
do_blorp_blit(struct blorp_batch *batch,
const struct blorp_params *orig_params,
if (orig->y.mirror)
y_scale = -y_scale;
+ enum blit_shrink_status shrink = BLIT_NO_SHRINK;
+ if (split_blorp_blit_debug) {
+ if (can_shrink_surface(&orig_params->src))
+ shrink |= BLIT_SRC_WIDTH_SHRINK | BLIT_SRC_HEIGHT_SHRINK;
+ if (can_shrink_surface(&orig_params->dst))
+ shrink |= BLIT_DST_WIDTH_SHRINK | BLIT_DST_HEIGHT_SHRINK;
+ }
+
bool x_done, y_done;
- bool shrink = split_blorp_blit_debug && can_shrink_surfaces(orig_params);
do {
params = *orig_params;
blit_coords = split_coords;
- if (shrink)
- shrink_surfaces(batch->blorp->isl_dev, ¶ms, wm_prog_key,
- &blit_coords);
+
+ if (shrink & (BLIT_SRC_WIDTH_SHRINK | BLIT_SRC_HEIGHT_SHRINK)) {
+ shrink_surface_params(batch->blorp->isl_dev, ¶ms.src,
+ &blit_coords.x.src0, &blit_coords.x.src1,
+ &blit_coords.y.src0, &blit_coords.y.src1);
+ wm_prog_key->need_src_offset = false;
+ }
+
+ if (shrink & (BLIT_DST_WIDTH_SHRINK | BLIT_DST_HEIGHT_SHRINK)) {
+ shrink_surface_params(batch->blorp->isl_dev, ¶ms.dst,
+ &blit_coords.x.dst0, &blit_coords.x.dst1,
+ &blit_coords.y.dst0, &blit_coords.y.dst1);
+ wm_prog_key->need_dst_offset = false;
+ }
+
enum blit_shrink_status result =
try_blorp_blit(batch, ¶ms, wm_prog_key, &blit_coords);
- if (result & BLIT_WIDTH_SHRINK) {
+ if (result & (BLIT_SRC_WIDTH_SHRINK | BLIT_SRC_HEIGHT_SHRINK))
+ assert(can_shrink_surface(&orig_params->src));
+
+ if (result & (BLIT_DST_WIDTH_SHRINK | BLIT_DST_HEIGHT_SHRINK))
+ assert(can_shrink_surface(&orig_params->dst));
+
+ if (result & (BLIT_SRC_WIDTH_SHRINK | BLIT_DST_WIDTH_SHRINK)) {
w /= 2.0;
assert(w >= 1.0);
split_coords.x.dst1 = MIN2(split_coords.x.dst0 + w, orig->x.dst1);
adjust_split_source_coords(&orig->x, &split_coords.x, x_scale);
}
- if (result & BLIT_HEIGHT_SHRINK) {
+ if (result & (BLIT_SRC_HEIGHT_SHRINK | BLIT_DST_HEIGHT_SHRINK)) {
h /= 2.0;
assert(h >= 1.0);
split_coords.y.dst1 = MIN2(split_coords.y.dst0 + h, orig->y.dst1);
adjust_split_source_coords(&orig->y, &split_coords.y, y_scale);
}
- if (result != 0) {
- assert(can_shrink_surfaces(orig_params));
- shrink = true;
+ if (result) {
+ /* We may get less bits set on result than we had already, so make
+ * sure we remember all the ways in which a resize is required.
+ */
+ shrink |= result;
continue;
}
params.src.view.swizzle = src_swizzle;
params.dst.view.swizzle = dst_swizzle;
+ const struct isl_format_layout *src_fmtl =
+ isl_format_get_layout(params.src.view.format);
+
struct brw_blorp_blit_prog_key wm_prog_key = {
.shader_type = BLORP_SHADER_TYPE_BLIT,
.filter = filter,
+ .sint32_to_uint = src_fmtl->channels.r.bits == 32 &&
+ isl_format_has_sint_channel(params.src.view.format) &&
+ isl_format_has_uint_channel(params.dst.view.format),
+ .uint32_to_sint = src_fmtl->channels.r.bits == 32 &&
+ isl_format_has_uint_channel(params.src.view.format) &&
+ isl_format_has_sint_channel(params.dst.view.format),
};
/* Scaling factors used for bilinear filtering in multisample scaled
* operation between the two bit layouts.
*/
static enum isl_format
-get_ccs_compatible_uint_format(const struct isl_format_layout *fmtl)
+get_ccs_compatible_copy_format(const struct isl_format_layout *fmtl)
{
switch (fmtl->format) {
case ISL_FORMAT_R32G32B32A32_FLOAT:
case ISL_FORMAT_B10G10R10A2_UNORM:
case ISL_FORMAT_B10G10R10A2_UNORM_SRGB:
case ISL_FORMAT_R10G10B10A2_UNORM:
+ case ISL_FORMAT_R10G10B10A2_UNORM_SRGB:
+ case ISL_FORMAT_R10G10B10_FLOAT_A2_UNORM:
case ISL_FORMAT_R10G10B10A2_UINT:
return ISL_FORMAT_R10G10B10A2_UINT;
+ case ISL_FORMAT_R16_UNORM:
+ case ISL_FORMAT_R16_SNORM:
+ case ISL_FORMAT_R16_SINT:
+ case ISL_FORMAT_R16_UINT:
+ case ISL_FORMAT_R16_FLOAT:
+ return ISL_FORMAT_R16_UINT;
+
+ case ISL_FORMAT_R8G8_UNORM:
+ case ISL_FORMAT_R8G8_SNORM:
+ case ISL_FORMAT_R8G8_SINT:
+ case ISL_FORMAT_R8G8_UINT:
+ return ISL_FORMAT_R8G8_UINT;
+
+ case ISL_FORMAT_B5G5R5X1_UNORM:
+ case ISL_FORMAT_B5G5R5X1_UNORM_SRGB:
+ case ISL_FORMAT_B5G5R5A1_UNORM:
+ case ISL_FORMAT_B5G5R5A1_UNORM_SRGB:
+ return ISL_FORMAT_B5G5R5A1_UNORM;
+
+ case ISL_FORMAT_A4B4G4R4_UNORM:
+ case ISL_FORMAT_B4G4R4A4_UNORM:
+ case ISL_FORMAT_B4G4R4A4_UNORM_SRGB:
+ return ISL_FORMAT_B4G4R4A4_UNORM;
+
+ case ISL_FORMAT_B5G6R5_UNORM:
+ case ISL_FORMAT_B5G6R5_UNORM_SRGB:
+ return ISL_FORMAT_B5G6R5_UNORM;
+
+ case ISL_FORMAT_A1B5G5R5_UNORM:
+ return ISL_FORMAT_A1B5G5R5_UNORM;
+
+ case ISL_FORMAT_A8_UNORM:
+ case ISL_FORMAT_R8_UNORM:
+ case ISL_FORMAT_R8_SNORM:
+ case ISL_FORMAT_R8_SINT:
+ case ISL_FORMAT_R8_UINT:
+ return ISL_FORMAT_R8_UINT;
+
default:
unreachable("Not a compressible format");
}
*y /= fmtl->bh;
}
- info->surf.logical_level0_px.width =
- DIV_ROUND_UP(info->surf.logical_level0_px.width, fmtl->bw);
- info->surf.logical_level0_px.height =
- DIV_ROUND_UP(info->surf.logical_level0_px.height, fmtl->bh);
-
- assert(info->surf.phys_level0_sa.width % fmtl->bw == 0);
- assert(info->surf.phys_level0_sa.height % fmtl->bh == 0);
- info->surf.phys_level0_sa.width /= fmtl->bw;
- info->surf.phys_level0_sa.height /= fmtl->bh;
+ info->surf.logical_level0_px = isl_surf_get_logical_level0_el(&info->surf);
+ info->surf.phys_level0_sa = isl_surf_get_phys_level0_el(&info->surf);
assert(info->tile_x_sa % fmtl->bw == 0);
assert(info->tile_y_sa % fmtl->bh == 0);
isl_format_get_layout(params.dst.surf.format);
assert(params.src.aux_usage == ISL_AUX_USAGE_NONE ||
+ params.src.aux_usage == ISL_AUX_USAGE_HIZ ||
+ params.src.aux_usage == ISL_AUX_USAGE_HIZ_CCS_WT ||
params.src.aux_usage == ISL_AUX_USAGE_MCS ||
- params.src.aux_usage == ISL_AUX_USAGE_CCS_E);
- assert(params.dst.aux_usage == ISL_AUX_USAGE_NONE ||
- params.dst.aux_usage == ISL_AUX_USAGE_MCS ||
- params.dst.aux_usage == ISL_AUX_USAGE_CCS_E);
-
- if (params.dst.aux_usage == ISL_AUX_USAGE_CCS_E) {
- params.dst.view.format = get_ccs_compatible_uint_format(dst_fmtl);
- if (params.src.aux_usage == ISL_AUX_USAGE_CCS_E) {
- params.src.view.format = get_ccs_compatible_uint_format(src_fmtl);
+ params.src.aux_usage == ISL_AUX_USAGE_MCS_CCS ||
+ params.src.aux_usage == ISL_AUX_USAGE_CCS_E ||
+ params.src.aux_usage == ISL_AUX_USAGE_GEN12_CCS_E ||
+ params.src.aux_usage == ISL_AUX_USAGE_STC_CCS);
+
+ if (isl_aux_usage_has_hiz(params.src.aux_usage)) {
+ /* In order to use HiZ, we have to use the real format for the source.
+ * Depth <-> Color copies are not allowed.
+ */
+ params.src.view.format = params.src.surf.format;
+ params.dst.view.format = params.src.surf.format;
+ } else if ((params.dst.surf.usage & ISL_SURF_USAGE_DEPTH_BIT) &&
+ isl_dev->info->gen >= 7) {
+ /* On Gen7 and higher, we use actual depth writes for blits into depth
+ * buffers so we need the real format.
+ */
+ params.src.view.format = params.dst.surf.format;
+ params.dst.view.format = params.dst.surf.format;
+ } else if (params.dst.aux_usage == ISL_AUX_USAGE_CCS_E ||
+ params.dst.aux_usage == ISL_AUX_USAGE_GEN12_CCS_E) {
+ params.dst.view.format = get_ccs_compatible_copy_format(dst_fmtl);
+ if (params.src.aux_usage == ISL_AUX_USAGE_CCS_E ||
+ params.src.aux_usage == ISL_AUX_USAGE_GEN12_CCS_E) {
+ params.src.view.format = get_ccs_compatible_copy_format(src_fmtl);
} else if (src_fmtl->bpb == dst_fmtl->bpb) {
params.src.view.format = params.dst.view.format;
} else {
params.src.view.format =
get_copy_format_for_bpb(isl_dev, src_fmtl->bpb);
}
- } else if (params.src.aux_usage == ISL_AUX_USAGE_CCS_E) {
- params.src.view.format = get_ccs_compatible_uint_format(src_fmtl);
+ } else if (params.src.aux_usage == ISL_AUX_USAGE_CCS_E ||
+ params.src.aux_usage == ISL_AUX_USAGE_GEN12_CCS_E) {
+ params.src.view.format = get_ccs_compatible_copy_format(src_fmtl);
if (src_fmtl->bpb == dst_fmtl->bpb) {
params.dst.view.format = params.src.view.format;
} else {
* because BLORP likes to treat things as if they have vec4 colors all
* the time anyway.
*/
- if (isl_format_is_rgb(src_cast_format))
+ if (isl_format_get_layout(src_cast_format)->bpb % 3 == 0)
src_cast_format = isl_format_rgb_to_rgba(src_cast_format);
- if (isl_format_is_rgb(dst_cast_format))
+ if (isl_format_get_layout(dst_cast_format)->bpb % 3 == 0)
dst_cast_format = isl_format_rgb_to_rgba(dst_cast_format);
if (src_cast_format != dst_cast_format) {
.levels = 1,
.array_len = 1,
.samples = 1,
- .row_pitch = width * block_size,
+ .row_pitch_B = width * block_size,
.usage = ISL_SURF_USAGE_TEXTURE_BIT |
ISL_SURF_USAGE_RENDER_TARGET_BIT,
.tiling_flags = ISL_TILING_LINEAR_BIT);