#include "main/teximage.h"
#include "main/fbobject.h"
+#include "main/renderbuffer.h"
#include "glsl/ralloc.h"
#include "brw_eu.h"
#include "brw_state.h"
+#define FILE_DEBUG_FLAG DEBUG_BLORP
/**
* Helper function for handling mirror image blits.
* If coord0 > coord1, swap them and invert the "mirror" boolean.
*/
static inline void
-fixup_mirroring(bool &mirror, GLint &coord0, GLint &coord1)
+fixup_mirroring(bool &mirror, GLfloat &coord0, GLfloat &coord1)
{
if (coord0 > coord1) {
mirror = !mirror;
- GLint tmp = coord0;
+ GLfloat tmp = coord0;
coord0 = coord1;
coord1 = tmp;
}
* coordinates, by swapping the roles of src and dst.
*/
static inline bool
-clip_or_scissor(bool mirror, GLint &src_x0, GLint &src_x1, GLint &dst_x0,
- GLint &dst_x1, GLint fb_xmin, GLint fb_xmax)
+clip_or_scissor(bool mirror, GLfloat &src_x0, GLfloat &src_x1, GLfloat &dst_x0,
+ GLfloat &dst_x1, GLfloat fb_xmin, GLfloat fb_xmax)
{
+ float scale = (float) (src_x1 - src_x0) / (dst_x1 - dst_x0);
/* If we are going to scissor everything away, stop. */
if (!(fb_xmin < fb_xmax &&
dst_x0 < fb_xmax &&
/* Adjust the source rectangle to remove the pixels corresponding to those
* that were clipped/scissored out of the destination rectangle.
*/
- src_x0 += pixels_clipped_left;
- src_x1 -= pixels_clipped_right;
+ src_x0 += pixels_clipped_left * scale;
+ src_x1 -= pixels_clipped_right * scale;
return true;
}
void
brw_blorp_blit_miptrees(struct intel_context *intel,
struct intel_mipmap_tree *src_mt,
+ unsigned src_level, unsigned src_layer,
struct intel_mipmap_tree *dst_mt,
- int src_x0, int src_y0,
- int dst_x0, int dst_y0,
- int dst_x1, int dst_y1,
+ unsigned dst_level, unsigned dst_layer,
+ float src_x0, float src_y0,
+ float src_x1, float src_y1,
+ float dst_x0, float dst_y0,
+ float dst_x1, float dst_y1,
bool mirror_x, bool mirror_y)
{
+ /* Get ready to blit. This includes depth resolving the src and dst
+ * buffers if necessary. Note: it's not necessary to do a color resolve on
+ * the destination buffer because we use the standard render path to render
+ * to destination color buffers, and the standard render path is
+ * fast-color-aware.
+ */
+ intel_miptree_resolve_color(intel, src_mt);
+ intel_miptree_slice_resolve_depth(intel, src_mt, src_level, src_layer);
+ intel_miptree_slice_resolve_depth(intel, dst_mt, dst_level, dst_layer);
+
+ DBG("%s from %s mt %p %d %d (%f,%f) (%f,%f)"
+ "to %s mt %p %d %d (%f,%f) (%f,%f) (flip %d,%d)\n",
+ __FUNCTION__,
+ _mesa_get_format_name(src_mt->format), src_mt,
+ src_level, src_layer, src_x0, src_y0, src_x1, src_y1,
+ _mesa_get_format_name(dst_mt->format), dst_mt,
+ dst_level, dst_layer, dst_x0, dst_y0, dst_x1, dst_y1,
+ mirror_x, mirror_y);
+
brw_blorp_blit_params params(brw_context(&intel->ctx),
- src_mt, dst_mt,
+ src_mt, src_level, src_layer,
+ dst_mt, dst_level, dst_layer,
src_x0, src_y0,
+ src_x1, src_y1,
dst_x0, dst_y0,
dst_x1, dst_y1,
mirror_x, mirror_y);
brw_blorp_exec(intel, ¶ms);
+
+ intel_miptree_slice_set_needs_hiz_resolve(dst_mt, dst_level, dst_layer);
}
static void
do_blorp_blit(struct intel_context *intel, GLbitfield buffer_bit,
struct intel_renderbuffer *src_irb,
struct intel_renderbuffer *dst_irb,
- GLint srcX0, GLint srcY0,
- GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
+ GLfloat srcX0, GLfloat srcY0, GLfloat srcX1, GLfloat srcY1,
+ GLfloat dstX0, GLfloat dstY0, GLfloat dstX1, GLfloat dstY1,
bool mirror_x, bool mirror_y)
{
/* Find source/dst miptrees */
struct intel_mipmap_tree *src_mt = find_miptree(buffer_bit, src_irb);
struct intel_mipmap_tree *dst_mt = find_miptree(buffer_bit, dst_irb);
- /* Get ready to blit. This includes depth resolving the src and dst
- * buffers if necessary.
- */
- intel_renderbuffer_resolve_depth(intel, src_irb);
- intel_renderbuffer_resolve_depth(intel, dst_irb);
-
/* Do the blit */
- brw_blorp_blit_miptrees(intel, src_mt, dst_mt,
- srcX0, srcY0, dstX0, dstY0, dstX1, dstY1,
+ brw_blorp_blit_miptrees(intel,
+ src_mt, src_irb->mt_level, src_irb->mt_layer,
+ dst_mt, dst_irb->mt_level, dst_irb->mt_layer,
+ srcX0, srcY0, srcX1, srcY1,
+ dstX0, dstY0, dstX1, dstY1,
mirror_x, mirror_y);
- intel_renderbuffer_set_needs_hiz_resolve(dst_irb);
intel_renderbuffer_set_needs_downsample(dst_irb);
}
+static bool
+color_formats_match(gl_format src_format, gl_format dst_format)
+{
+ gl_format linear_src_format = _mesa_get_srgb_format_linear(src_format);
+ gl_format linear_dst_format = _mesa_get_srgb_format_linear(dst_format);
+
+ /* Normally, we require the formats to be equal. However, we also support
+ * blitting from ARGB to XRGB (discarding alpha), and from XRGB to ARGB
+ * (overriding alpha to 1.0 via blending).
+ */
+ return linear_src_format == linear_dst_format ||
+ (linear_src_format == MESA_FORMAT_XRGB8888 &&
+ linear_dst_format == MESA_FORMAT_ARGB8888) ||
+ (linear_src_format == MESA_FORMAT_ARGB8888 &&
+ linear_dst_format == MESA_FORMAT_XRGB8888);
+}
static bool
formats_match(GLbitfield buffer_bit, struct intel_renderbuffer *src_irb,
* example MESA_FORMAT_X8_Z24 and MESA_FORMAT_S8_Z24), and we can blit
* between those formats.
*/
- return find_miptree(buffer_bit, src_irb)->format ==
- find_miptree(buffer_bit, dst_irb)->format;
-}
+ gl_format src_format = find_miptree(buffer_bit, src_irb)->format;
+ gl_format dst_format = find_miptree(buffer_bit, dst_irb)->format;
+ return color_formats_match(src_format, dst_format);
+}
static bool
try_blorp_blit(struct intel_context *intel,
- GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
- GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
+ GLfloat srcX0, GLfloat srcY0, GLfloat srcX1, GLfloat srcY1,
+ GLfloat dstX0, GLfloat dstY0, GLfloat dstX1, GLfloat dstY1,
GLenum filter, GLbitfield buffer_bit)
{
struct gl_context *ctx = &intel->ctx;
fixup_mirroring(mirror_y, srcY0, srcY1);
fixup_mirroring(mirror_y, dstY0, dstY1);
- /* Make sure width and height match */
- if (srcX1 - srcX0 != dstX1 - dstX0) return false;
- if (srcY1 - srcY0 != dstY1 - dstY0) return false;
+ /* Linear filtering is not yet implemented in blorp engine. So, fallback
+ * to other blit paths.
+ */
+ if ((srcX1 - srcX0 != dstX1 - dstX0 ||
+ srcY1 - srcY0 != dstY1 - dstY0) &&
+ filter == GL_LINEAR)
+ return false;
/* If the destination rectangle needs to be clipped or scissored, do so.
*/
}
for (unsigned i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; ++i) {
dst_irb = intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i]);
- do_blorp_blit(intel, buffer_bit, src_irb, dst_irb, srcX0, srcY0,
- dstX0, dstY0, dstX1, dstY1, mirror_x, mirror_y);
+ if (dst_irb)
+ do_blorp_blit(intel, buffer_bit, src_irb, dst_irb, srcX0, srcY0,
+ srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
+ mirror_x, mirror_y);
}
break;
case GL_DEPTH_BUFFER_BIT:
if (!formats_match(buffer_bit, src_irb, dst_irb))
return false;
do_blorp_blit(intel, buffer_bit, src_irb, dst_irb, srcX0, srcY0,
- dstX0, dstY0, dstX1, dstY1, mirror_x, mirror_y);
+ srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
+ mirror_x, mirror_y);
break;
case GL_STENCIL_BUFFER_BIT:
src_irb =
if (!formats_match(buffer_bit, src_irb, dst_irb))
return false;
do_blorp_blit(intel, buffer_bit, src_irb, dst_irb, srcX0, srcY0,
- dstX0, dstY0, dstX1, dstY1, mirror_x, mirror_y);
+ srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
+ mirror_x, mirror_y);
break;
default:
assert(false);
return true;
}
+bool
+brw_blorp_copytexsubimage(struct intel_context *intel,
+ struct gl_renderbuffer *src_rb,
+ struct gl_texture_image *dst_image,
+ int slice,
+ int srcX0, int srcY0,
+ int dstX0, int dstY0,
+ int width, int height)
+{
+ struct gl_context *ctx = &intel->ctx;
+ struct intel_renderbuffer *src_irb = intel_renderbuffer(src_rb);
+ struct intel_texture_image *intel_image = intel_texture_image(dst_image);
+
+ /* Sync up the state of window system buffers. We need to do this before
+ * we go looking at the src renderbuffer's miptree.
+ */
+ intel_prepare_render(intel);
+
+ struct intel_mipmap_tree *src_mt = src_irb->mt;
+ struct intel_mipmap_tree *dst_mt = intel_image->mt;
+
+ /* BLORP is not supported before Gen6. */
+ if (intel->gen < 6)
+ return false;
+
+ if (!color_formats_match(src_mt->format, dst_mt->format)) {
+ return false;
+ }
+
+ /* Source clipping shouldn't be necessary, since copytexsubimage (in
+ * src/mesa/main/teximage.c) calls _mesa_clip_copytexsubimage() which
+ * takes care of it.
+ *
+ * Destination clipping shouldn't be necessary since the restrictions on
+ * glCopyTexSubImage prevent the user from specifying a destination rectangle
+ * that falls outside the bounds of the destination texture.
+ * See error_check_subtexture_dimensions().
+ */
+
+ int srcY1 = srcY0 + height;
+ int srcX1 = srcX0 + width;
+ int dstX1 = dstX0 + width;
+ int dstY1 = dstY0 + height;
+
+ /* Account for the fact that in the system framebuffer, the origin is at
+ * the lower left.
+ */
+ bool mirror_y = false;
+ if (_mesa_is_winsys_fbo(ctx->ReadBuffer)) {
+ GLint tmp = src_rb->Height - srcY0;
+ srcY0 = src_rb->Height - srcY1;
+ srcY1 = tmp;
+ mirror_y = true;
+ }
+
+ brw_blorp_blit_miptrees(intel,
+ src_mt, src_irb->mt_level, src_irb->mt_layer,
+ dst_mt, dst_image->Level, dst_image->Face + slice,
+ srcX0, srcY0, srcX1, srcY1,
+ dstX0, dstY0, dstX1, dstY1,
+ false, mirror_y);
+
+ /* If we're copying to a packed depth stencil texture and the source
+ * framebuffer has separate stencil, we need to also copy the stencil data
+ * over.
+ */
+ src_rb = ctx->ReadBuffer->Attachment[BUFFER_STENCIL].Renderbuffer;
+ if (_mesa_get_format_bits(dst_image->TexFormat, GL_STENCIL_BITS) > 0 &&
+ src_rb != NULL) {
+ src_irb = intel_renderbuffer(src_rb);
+ src_mt = src_irb->mt;
+
+ if (src_mt->stencil_mt)
+ src_mt = src_mt->stencil_mt;
+ if (dst_mt->stencil_mt)
+ dst_mt = dst_mt->stencil_mt;
+
+ if (src_mt != dst_mt) {
+ brw_blorp_blit_miptrees(intel,
+ src_mt, src_irb->mt_level, src_irb->mt_layer,
+ dst_mt, dst_image->Level,
+ dst_image->Face + slice,
+ srcX0, srcY0, srcX1, srcY1,
+ dstX0, dstY0, dstX1, dstY1,
+ false, mirror_y);
+ }
+ }
+
+ return true;
+}
+
+
GLbitfield
brw_blorp_framebuffer(struct intel_context *intel,
GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
void sample(struct brw_reg dst);
void texel_fetch(struct brw_reg dst);
void mcs_fetch();
- void expand_to_32_bits(struct brw_reg src, struct brw_reg dst);
void texture_lookup(struct brw_reg dst, GLuint msg_type,
const sampler_message_arg *args, int num_args);
void render_target_write();
* terminate the thread.
*/
render_target_write();
+
+ if (unlikely(INTEL_DEBUG & DEBUG_BLORP)) {
+ printf("Native code for BLORP blit:\n");
+ brw_dump_compile(&func, stdout, 0, func.next_insn_offset);
+ printf("\n");
+ }
return brw_get_program(&func, program_size);
}
#define CONST_LOC(name) offsetof(brw_blorp_wm_push_constants, name)
#define ALLOC_REG(name) \
this->name = \
- brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, base_reg, CONST_LOC(name) / 2)
+ brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, base_reg, CONST_LOC(name) / 4)
ALLOC_REG(dst_x0);
ALLOC_REG(dst_x1);
}
this->mcs_data =
retype(brw_vec8_grf(reg, 0), BRW_REGISTER_TYPE_UD); reg += 8;
+
for (int i = 0; i < 2; ++i) {
this->x_coords[i]
- = vec16(retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW));
+ = retype(brw_vec8_grf(reg, 0), BRW_REGISTER_TYPE_UD);
+ reg += 2;
this->y_coords[i]
- = vec16(retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW));
+ = retype(brw_vec8_grf(reg, 0), BRW_REGISTER_TYPE_UD);
+ reg += 2;
}
this->xy_coord_index = 0;
this->sample_index
- = vec16(retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW));
- this->t1 = vec16(retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW));
- this->t2 = vec16(retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW));
+ = retype(brw_vec8_grf(reg, 0), BRW_REGISTER_TYPE_UD);
+ reg += 2;
+ this->t1 = retype(brw_vec8_grf(reg, 0), BRW_REGISTER_TYPE_UD);
+ reg += 2;
+ this->t2 = retype(brw_vec8_grf(reg, 0), BRW_REGISTER_TYPE_UD);
+ reg += 2;
/* Make sure we didn't run out of registers */
assert(reg <= GEN7_MRF_HACK_START);
* Then, we need to add the repeating sequence (0, 1, 0, 1, ...) to the
* result, since pixels n+1 and n+3 are in the right half of the subspan.
*/
- brw_ADD(&func, X, stride(suboffset(R1, 4), 2, 4, 0), brw_imm_v(0x10101010));
+ brw_ADD(&func, vec16(retype(X, BRW_REGISTER_TYPE_UW)),
+ stride(suboffset(R1, 4), 2, 4, 0), brw_imm_v(0x10101010));
/* Similarly, Y coordinates for subspans come from R1.2[31:16] through
* R1.5[31:16], so to get pixel Y coordinates we need to start at the 5th
* And we need to add the repeating sequence (0, 0, 1, 1, ...), since
* pixels n+2 and n+3 are in the bottom half of the subspan.
*/
- brw_ADD(&func, Y, stride(suboffset(R1, 5), 2, 4, 0), brw_imm_v(0x11001100));
+ brw_ADD(&func, vec16(retype(Y, BRW_REGISTER_TYPE_UW)),
+ stride(suboffset(R1, 5), 2, 4, 0), brw_imm_v(0x11001100));
+
+ /* Move the coordinates to UD registers. */
+ brw_MOV(&func, vec16(Xp), retype(X, BRW_REGISTER_TYPE_UW));
+ brw_MOV(&func, vec16(Yp), retype(Y, BRW_REGISTER_TYPE_UW));
+ SWAP_XY_AND_XPYP();
if (key->persample_msaa_dispatch) {
switch (key->rt_samples) {
- case 4:
+ case 4: {
/* The WM will be run in MSDISPMODE_PERSAMPLE with num_samples == 4.
* Therefore, subspan 0 will represent sample 0, subspan 1 will
* represent sample 1, and so on.
* populate a temporary variable with the sequence (0, 1, 2, 3), and
* then copy from it using vstride=1, width=4, hstride=0.
*/
- brw_MOV(&func, t1, brw_imm_v(0x3210));
- brw_MOV(&func, S, stride(t1, 1, 4, 0));
+ struct brw_reg t1_uw1 = retype(t1, BRW_REGISTER_TYPE_UW);
+ brw_MOV(&func, vec16(t1_uw1), brw_imm_v(0x3210));
+ /* Move to UD sample_index register. */
+ brw_MOV(&func, S, stride(t1_uw1, 1, 4, 0));
+ brw_MOV(&func, offset(S, 1), suboffset(stride(t1_uw1, 1, 4, 0), 2));
break;
+ }
case 8: {
/* The WM will be run in MSDISPMODE_PERSAMPLE with num_samples == 8.
* Therefore, subspan 0 will represent sample N (where N is 0 or 4),
* using vstride=1, width=4, hstride=0.
*/
struct brw_reg t1_ud1 = vec1(retype(t1, BRW_REGISTER_TYPE_UD));
+ struct brw_reg t2_uw1 = retype(t2, BRW_REGISTER_TYPE_UW);
struct brw_reg r0_ud1 = vec1(retype(R0, BRW_REGISTER_TYPE_UD));
brw_AND(&func, t1_ud1, r0_ud1, brw_imm_ud(0xc0));
brw_SHR(&func, t1_ud1, t1_ud1, brw_imm_ud(5));
- brw_MOV(&func, t2, brw_imm_v(0x3210));
- brw_ADD(&func, S, retype(t1_ud1, BRW_REGISTER_TYPE_UW),
- stride(t2, 1, 4, 0));
+ brw_MOV(&func, vec16(t2_uw1), brw_imm_v(0x3210));
+ brw_ADD(&func, vec16(S), retype(t1_ud1, BRW_REGISTER_TYPE_UW),
+ stride(t2_uw1, 1, 4, 0));
+ brw_ADD(&func, offset(S, 1),
+ retype(t1_ud1, BRW_REGISTER_TYPE_UW),
+ suboffset(stride(t2_uw1, 1, 4, 0), 2));
break;
}
default:
*/
assert(s_is_zero);
+ brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED);
if (new_tiled_w) {
/* Given X and Y coordinates that describe an address using Y tiling,
* translate to the X and Y coordinates that describe the same address
brw_OR(&func, Yp, t1, t2);
SWAP_XY_AND_XPYP();
}
+ brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
}
/**
brw_blorp_blit_program::encode_msaa(unsigned num_samples,
intel_msaa_layout layout)
{
+ brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED);
switch (layout) {
case INTEL_MSAA_LAYOUT_NONE:
/* No translation necessary, and S should already be zero. */
s_is_zero = true;
break;
}
+ brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
}
/**
brw_blorp_blit_program::decode_msaa(unsigned num_samples,
intel_msaa_layout layout)
{
+ brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED);
switch (layout) {
case INTEL_MSAA_LAYOUT_NONE:
/* No translation necessary, and S should already be zero. */
SWAP_XY_AND_XPYP();
break;
}
+ brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
}
/**
void
brw_blorp_blit_program::kill_if_outside_dst_rect()
{
- struct brw_reg f0 = brw_flag_reg();
+ struct brw_reg f0 = brw_flag_reg(0, 0);
struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
- struct brw_reg null16 = vec16(retype(brw_null_reg(), BRW_REGISTER_TYPE_UW));
+ struct brw_reg null32 = vec16(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
- brw_CMP(&func, null16, BRW_CONDITIONAL_GE, X, dst_x0);
- brw_CMP(&func, null16, BRW_CONDITIONAL_GE, Y, dst_y0);
- brw_CMP(&func, null16, BRW_CONDITIONAL_L, X, dst_x1);
- brw_CMP(&func, null16, BRW_CONDITIONAL_L, Y, dst_y1);
+ brw_CMP(&func, null32, BRW_CONDITIONAL_GE, X, dst_x0);
+ brw_CMP(&func, null32, BRW_CONDITIONAL_GE, Y, dst_y0);
+ brw_CMP(&func, null32, BRW_CONDITIONAL_L, X, dst_x1);
+ brw_CMP(&func, null32, BRW_CONDITIONAL_L, Y, dst_y1);
brw_set_predicate_control(&func, BRW_PREDICATE_NONE);
brw_push_insn_state(&func);
void
brw_blorp_blit_program::translate_dst_to_src()
{
- brw_MUL(&func, Xp, X, x_transform.multiplier);
- brw_MUL(&func, Yp, Y, y_transform.multiplier);
- brw_ADD(&func, Xp, Xp, x_transform.offset);
- brw_ADD(&func, Yp, Yp, y_transform.offset);
+ struct brw_reg X_f = retype(X, BRW_REGISTER_TYPE_F);
+ struct brw_reg Y_f = retype(Y, BRW_REGISTER_TYPE_F);
+ struct brw_reg Xp_f = retype(Xp, BRW_REGISTER_TYPE_F);
+ struct brw_reg Yp_f = retype(Yp, BRW_REGISTER_TYPE_F);
+
+ brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED);
+ /* Move the UD coordinates to float registers. */
+ brw_MOV(&func, Xp_f, X);
+ brw_MOV(&func, Yp_f, Y);
+ /* Scale and offset */
+ brw_MUL(&func, X_f, Xp_f, x_transform.multiplier);
+ brw_MUL(&func, Y_f, Yp_f, y_transform.multiplier);
+ brw_ADD(&func, X_f, X_f, x_transform.offset);
+ brw_ADD(&func, Y_f, Y_f, y_transform.offset);
+ /* Round the float coordinates down to nearest integer by moving to
+ * UD registers.
+ */
+ brw_MOV(&func, Xp, X_f);
+ brw_MOV(&func, Yp, Y_f);
SWAP_XY_AND_XPYP();
+ brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
}
/**
* that maxe up a pixel). So we need to multiply our X and Y coordinates
* each by 2 and then add 1.
*/
+ brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED);
brw_SHL(&func, t1, X, brw_imm_w(1));
brw_SHL(&func, t2, Y, brw_imm_w(1));
brw_ADD(&func, Xp, t1, brw_imm_w(1));
brw_ADD(&func, Yp, t2, brw_imm_w(1));
+ brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
SWAP_XY_AND_XPYP();
}
s_is_zero = true;
} else {
s_is_zero = false;
- brw_MOV(&func, S, brw_imm_uw(i));
+ brw_MOV(&func, vec16(S), brw_imm_ud(i));
}
texel_fetch(texture_data[stack_depth++]);
gen7_ld_mcs_args, ARRAY_SIZE(gen7_ld_mcs_args));
}
-void
-brw_blorp_blit_program::expand_to_32_bits(struct brw_reg src,
- struct brw_reg dst)
-{
- brw_MOV(&func, vec8(dst), vec8(src));
- brw_set_compression_control(&func, BRW_COMPRESSION_2NDHALF);
- brw_MOV(&func, offset(vec8(dst), 1), suboffset(vec8(src), 8));
- brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
-}
-
void
brw_blorp_blit_program::texture_lookup(struct brw_reg dst,
GLuint msg_type,
for (int arg = 0; arg < num_args; ++arg) {
switch (args[arg]) {
case SAMPLER_MESSAGE_ARG_U_FLOAT:
- expand_to_32_bits(X, retype(mrf, BRW_REGISTER_TYPE_F));
+ brw_MOV(&func, retype(mrf, BRW_REGISTER_TYPE_F), X);
break;
case SAMPLER_MESSAGE_ARG_V_FLOAT:
- expand_to_32_bits(Y, retype(mrf, BRW_REGISTER_TYPE_F));
+ brw_MOV(&func, retype(mrf, BRW_REGISTER_TYPE_F), Y);
break;
case SAMPLER_MESSAGE_ARG_U_INT:
- expand_to_32_bits(X, mrf);
+ brw_MOV(&func, mrf, X);
break;
case SAMPLER_MESSAGE_ARG_V_INT:
- expand_to_32_bits(Y, mrf);
+ brw_MOV(&func, mrf, Y);
break;
case SAMPLER_MESSAGE_ARG_SI_INT:
/* Note: on Gen7, this code may be reached with s_is_zero==true
if (s_is_zero)
brw_MOV(&func, mrf, brw_imm_ud(0));
else
- expand_to_32_bits(S, mrf);
+ brw_MOV(&func, mrf, S);
break;
case SAMPLER_MESSAGE_ARG_MCS_INT:
switch (key->tex_layout) {
}
brw_SAMPLE(&func,
- retype(dst, BRW_REGISTER_TYPE_UW) /* dest */,
+ retype(dst, BRW_REGISTER_TYPE_F) /* dest */,
base_mrf /* msg_reg_nr */,
brw_message_reg(base_mrf) /* src0 */,
BRW_BLORP_TEXTURE_BINDING_TABLE_INDEX,
0 /* sampler */,
- WRITEMASK_XYZW,
msg_type,
8 /* response_length. TODO: should be smaller for non-RGBA formats? */,
mrf.nr - base_mrf /* msg_length */,
void
-brw_blorp_coord_transform_params::setup(GLuint src0, GLuint dst0, GLuint dst1,
+brw_blorp_coord_transform_params::setup(GLfloat src0, GLfloat src1,
+ GLfloat dst0, GLfloat dst1,
bool mirror)
{
+ float scale = (src1 - src0) / (dst1 - dst0);
if (!mirror) {
/* When not mirroring a coordinate (say, X), we need:
- * x' - src_x0 = x - dst_x0
+ * src_x - src_x0 = (dst_x - dst_x0 + 0.5) * scale
* Therefore:
- * x' = 1*x + (src_x0 - dst_x0)
+ * src_x = src_x0 + (dst_x - dst_x0 + 0.5) * scale
+ *
+ * blorp program uses "round toward zero" to convert the
+ * transformed floating point coordinates to integer coordinates,
+ * whereas the behaviour we actually want is "round to nearest",
+ * so 0.5 provides the necessary correction.
*/
- multiplier = 1;
- offset = src0 - dst0;
+ multiplier = scale;
+ offset = src0 + (-dst0 + 0.5) * scale;
} else {
/* When mirroring X we need:
- * x' - src_x0 = dst_x1 - x - 1
+ * src_x - src_x0 = dst_x1 - dst_x - 0.5
* Therefore:
- * x' = -1*x + (src_x0 + dst_x1 - 1)
+ * src_x = src_x0 + (dst_x1 -dst_x - 0.5) * scale
*/
- multiplier = -1;
- offset = src0 + dst1 - 1;
+ multiplier = -scale;
+ offset = src0 + (dst1 - 0.5) * scale;
}
}
brw_blorp_blit_params::brw_blorp_blit_params(struct brw_context *brw,
struct intel_mipmap_tree *src_mt,
+ unsigned src_level, unsigned src_layer,
struct intel_mipmap_tree *dst_mt,
- GLuint src_x0, GLuint src_y0,
- GLuint dst_x0, GLuint dst_y0,
- GLuint dst_x1, GLuint dst_y1,
+ unsigned dst_level, unsigned dst_layer,
+ GLfloat src_x0, GLfloat src_y0,
+ GLfloat src_x1, GLfloat src_y1,
+ GLfloat dst_x0, GLfloat dst_y0,
+ GLfloat dst_x1, GLfloat dst_y1,
bool mirror_x, bool mirror_y)
{
- src.set(brw, src_mt, 0, 0);
- dst.set(brw, dst_mt, 0, 0);
+ src.set(brw, src_mt, src_level, src_layer);
+ dst.set(brw, dst_mt, dst_level, dst_layer);
+
+ src.brw_surfaceformat = dst.brw_surfaceformat;
use_wm_prog = true;
memset(&wm_prog_key, 0, sizeof(wm_prog_key));
y0 = wm_push_consts.dst_y0 = dst_y0;
x1 = wm_push_consts.dst_x1 = dst_x1;
y1 = wm_push_consts.dst_y1 = dst_y1;
- wm_push_consts.x_transform.setup(src_x0, dst_x0, dst_x1, mirror_x);
- wm_push_consts.y_transform.setup(src_y0, dst_y0, dst_y1, mirror_y);
+ wm_push_consts.x_transform.setup(src_x0, src_x1, dst_x0, dst_x1, mirror_x);
+ wm_push_consts.y_transform.setup(src_y0, src_y1, dst_y0, dst_y1, mirror_y);
if (dst.num_samples <= 1 && dst_mt->num_samples > 1) {
/* We must expand the rectangle we send through the rendering pipeline,
}
if (dst.map_stencil_as_y_tiled) {
- /* We must modify the rectangle we send through the rendering pipeline,
- * to account for the fact that we are mapping it as Y-tiled when it is
- * in fact W-tiled. Y tiles have dimensions 128x32 whereas W tiles have
- * dimensions 64x64. We must also align it to a multiple of the tile
- * size, because the differences between W and Y tiling formats will
- * mean that pixels are scrambled within the tile.
+ /* We must modify the rectangle we send through the rendering pipeline
+ * (and the size and x/y offset of the destination surface), to account
+ * for the fact that we are mapping it as Y-tiled when it is in fact
+ * W-tiled.
*
- * Note: if the destination surface configured to use IMS layout, then
- * the effective tile size we need to align it to is smaller, because
- * each pixel covers a 2x2 or a 4x2 block of samples.
+ * Both Y tiling and W tiling can be understood as organizations of
+ * 32-byte sub-tiles; within each 32-byte sub-tile, the layout of pixels
+ * is different, but the layout of the 32-byte sub-tiles within the 4k
+ * tile is the same (8 sub-tiles across by 16 sub-tiles down, in
+ * column-major order). In Y tiling, the sub-tiles are 16 bytes wide
+ * and 2 rows high; in W tiling, they are 8 bytes wide and 4 rows high.
*
- * TODO: what if this makes the coordinates too large?
+ * Therefore, to account for the layout differences within the 32-byte
+ * sub-tiles, we must expand the rectangle so the X coordinates of its
+ * edges are multiples of 8 (the W sub-tile width), and its Y
+ * coordinates of its edges are multiples of 4 (the W sub-tile height).
+ * Then we need to scale the X and Y coordinates of the rectangle to
+ * account for the differences in aspect ratio between the Y and W
+ * sub-tiles. We need to modify the layer width and height similarly.
+ *
+ * A correction needs to be applied when MSAA is in use: since
+ * INTEL_MSAA_LAYOUT_IMS uses an interleaving pattern whose height is 4,
+ * we need to align the Y coordinates to multiples of 8, so that when
+ * they are divided by two they are still multiples of 4.
+ *
+ * Note: Since the x/y offset of the surface will be applied using the
+ * SURFACE_STATE command packet, it will be invisible to the swizzling
+ * code in the shader; therefore it needs to be in a multiple of the
+ * 32-byte sub-tile size. Fortunately it is, since the sub-tile is 8
+ * pixels wide and 4 pixels high (when viewed as a W-tiled stencil
+ * buffer), and the miplevel alignment used for stencil buffers is 8
+ * pixels horizontally and either 4 or 8 pixels vertically (see
+ * intel_horizontal_texture_alignment_unit() and
+ * intel_vertical_texture_alignment_unit()).
+ *
+ * Note: Also, since the SURFACE_STATE command packet can only apply
+ * offsets that are multiples of 4 pixels horizontally and 2 pixels
+ * vertically, it is important that the offsets will be multiples of
+ * these sizes after they are converted into Y-tiled coordinates.
+ * Fortunately they will be, since we know from above that the offsets
+ * are a multiple of the 32-byte sub-tile size, and in Y-tiled
+ * coordinates the sub-tile is 16 pixels wide and 2 pixels high.
+ *
+ * TODO: what if this makes the coordinates (or the texture size) too
+ * large?
*/
- unsigned x_align = 64, y_align = 64;
- if (dst_mt->msaa_layout == INTEL_MSAA_LAYOUT_IMS) {
- x_align /= (dst_mt->num_samples == 4 ? 2 : 4);
- y_align /= 2;
- }
+ const unsigned x_align = 8, y_align = dst.num_samples != 0 ? 8 : 4;
x0 = ROUND_DOWN_TO(x0, x_align) * 2;
y0 = ROUND_DOWN_TO(y0, y_align) / 2;
x1 = ALIGN(x1, x_align) * 2;
y1 = ALIGN(y1, y_align) / 2;
+ dst.width = ALIGN(dst.width, x_align) * 2;
+ dst.height = ALIGN(dst.height, y_align) / 2;
+ dst.x_offset *= 2;
+ dst.y_offset /= 2;
wm_prog_key.use_kill = true;
}
+
+ if (src.map_stencil_as_y_tiled) {
+ /* We must modify the size and x/y offset of the source surface to
+ * account for the fact that we are mapping it as Y-tiled when it is in
+ * fact W tiled.
+ *
+ * See the comments above concerning x/y offset alignment for the
+ * destination surface.
+ *
+ * TODO: what if this makes the texture size too large?
+ */
+ const unsigned x_align = 8, y_align = src.num_samples != 0 ? 8 : 4;
+ src.width = ALIGN(src.width, x_align) * 2;
+ src.height = ALIGN(src.height, y_align) / 2;
+ src.x_offset *= 2;
+ src.y_offset /= 2;
+ }
}
uint32_t