i965/fs: Lower 32x32 bit multiplication on BXT.

[mesa.git] / src / mesa / drivers / dri / i965 / brw_blorp_blit.cpp
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp

index 936feafdae4af9e0c0e21b40183116c25d982863..205c905b447e0655d8c21d2f5f372ae03956129e 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -78,7 +78,7 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
  
     DBG("%s from %dx %s mt %p %d %d (%f,%f) (%f,%f)"
         "to %dx %s mt %p %d %d (%f,%f) (%f,%f) (flip %d,%d)\n",
-       __FUNCTION__,
+       __func__,
         src_mt->num_samples, _mesa_get_format_name(src_mt->format), src_mt,
         src_level, src_layer, src_x0, src_y0, src_x1, src_y1,
         dst_mt->num_samples, _mesa_get_format_name(dst_mt->format), dst_mt,
@@ -125,6 +125,8 @@ do_blorp_blit(struct brw_context *brw, GLbitfield buffer_bit,
  
  static bool
  try_blorp_blit(struct brw_context *brw,
+               const struct gl_framebuffer *read_fb,
+               const struct gl_framebuffer *draw_fb,
                 GLfloat srcX0, GLfloat srcY0, GLfloat srcX1, GLfloat srcY1,
                 GLfloat dstX0, GLfloat dstY0, GLfloat dstX1, GLfloat dstY1,
                 GLenum filter, GLbitfield buffer_bit)
@@ -136,11 +138,8 @@ try_blorp_blit(struct brw_context *brw,
      */
     intel_prepare_render(brw);
  
-   const struct gl_framebuffer *read_fb = ctx->ReadBuffer;
-   const struct gl_framebuffer *draw_fb = ctx->DrawBuffer;
-
     bool mirror_x, mirror_y;
-   if (brw_meta_mirror_clip_and_scissor(ctx,
+   if (brw_meta_mirror_clip_and_scissor(ctx, read_fb, draw_fb,
                                          &srcX0, &srcY0, &srcX1, &srcY1,
                                          &dstX0, &dstY0, &dstX1, &dstY1,
                                          &mirror_x, &mirror_y))
@@ -154,8 +153,8 @@ try_blorp_blit(struct brw_context *brw,
     switch (buffer_bit) {
     case GL_COLOR_BUFFER_BIT:
        src_irb = intel_renderbuffer(read_fb->_ColorReadBuffer);
-      for (unsigned i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; ++i) {
-         dst_irb = intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i]);
+      for (unsigned i = 0; i < draw_fb->_NumColorDrawBuffers; ++i) {
+         dst_irb = intel_renderbuffer(draw_fb->_ColorDrawBuffers[i]);
          if (dst_irb)
              do_blorp_blit(brw, buffer_bit,
                            src_irb, src_irb->Base.Base.Format,
@@ -224,8 +223,8 @@ brw_blorp_copytexsubimage(struct brw_context *brw,
     struct intel_mipmap_tree *src_mt = src_irb->mt;
     struct intel_mipmap_tree *dst_mt = intel_image->mt;
  
-   /* BLORP is not supported before Gen6. */
-   if (brw->gen < 6 || brw->gen >= 8)
+   /* BLORP is only supported for Gen6-7. */
+   if (brw->gen < 6 || brw->gen > 7)
        return false;
  
     if (_mesa_get_format_base_format(src_rb->Format) !=
@@ -317,6 +316,8 @@ brw_blorp_copytexsubimage(struct brw_context *brw,
  
  GLbitfield
  brw_blorp_framebuffer(struct brw_context *brw,
+                      struct gl_framebuffer *readFb,
+                      struct gl_framebuffer *drawFb,
                        GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
                        GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
                        GLbitfield mask, GLenum filter)
@@ -333,7 +334,7 @@ brw_blorp_framebuffer(struct brw_context *brw,
  
     for (unsigned int i = 0; i < ARRAY_SIZE(buffer_bits); ++i) {
        if ((mask & buffer_bits[i]) &&
-       try_blorp_blit(brw,
+       try_blorp_blit(brw, readFb, drawFb,
                        srcX0, srcY0, srcX1, srcY1,
                        dstX0, dstY0, dstX1, dstY1,
                        filter, buffer_bits[i])) {
@@ -1254,10 +1255,8 @@ brw_blorp_blit_program::translate_dst_to_src()
     emit_mov(Xp_f, X);
     emit_mov(Yp_f, Y);
     /* Scale and offset */
-   emit_mul(X_f, Xp_f, x_transform.multiplier);
-   emit_mul(Y_f, Yp_f, y_transform.multiplier);
-   emit_add(X_f, X_f, x_transform.offset);
-   emit_add(Y_f, Y_f, y_transform.offset);
+   emit_mad(X_f, x_transform.offset, Xp_f, x_transform.multiplier);
+   emit_mad(Y_f, y_transform.offset, Yp_f, y_transform.multiplier);
     if (key->blit_scaled && key->blend) {
        /* Translate coordinates to lay out the samples in a rectangular  grid
         * roughly corresponding to sample locations.
@@ -1286,8 +1285,8 @@ brw_blorp_blit_program::translate_dst_to_src()
        /* Round the float coordinates down to nearest integer */
        emit_rndd(Xp_f, X_f);
        emit_rndd(Yp_f, Y_f);
-      emit_mul(X_f, Xp_f, brw_imm_f(1 / key->x_scale));
-      emit_mul(Y_f, Yp_f, brw_imm_f(1 / key->y_scale));
+      emit_mul(X_f, Xp_f, brw_imm_f(1.0f / key->x_scale));
+      emit_mul(Y_f, Yp_f, brw_imm_f(1.0f / key->y_scale));
        SWAP_XY_AND_XPYP();
     } else if (!key->bilinear_filter) {
        /* Round the float coordinates down to nearest integer by moving to
@@ -1307,10 +1306,10 @@ brw_blorp_blit_program::clamp_tex_coords(struct brw_reg regX,
                                           struct brw_reg clampX1,
                                           struct brw_reg clampY1)
  {
-   emit_cond_mov(regX, clampX0, BRW_CONDITIONAL_L, regX, clampX0);
-   emit_cond_mov(regX, clampX1, BRW_CONDITIONAL_G, regX, clampX1);
-   emit_cond_mov(regY, clampY0, BRW_CONDITIONAL_L, regY, clampY0);
-   emit_cond_mov(regY, clampY1, BRW_CONDITIONAL_G, regY, clampY1);
+   emit_max(regX, regX, clampX0);
+   emit_max(regY, regY, clampY0);
+   emit_min(regX, regX, clampX1);
+   emit_min(regY, regY, clampY1);
  }
  
  /**
@@ -1443,7 +1442,7 @@ brw_blorp_blit_program::manual_blend_average(unsigned num_samples)
        for (int j = 0; j < 4; ++j) {
           emit_mul(offset(texture_data[0], 2*j),
                   offset(vec8(texture_data[0]), 2*j),
-                 brw_imm_f(1.0/num_samples));
+                 brw_imm_f(1.0f / num_samples));
        }
     }
  
@@ -1476,9 +1475,9 @@ brw_blorp_blit_program::manual_blend_bilinear(unsigned num_samples)
  
        /* Compute pixel coordinates */
        emit_add(vec16(x_sample_coords), Xp_f,
-              brw_imm_f((float)(i & 0x1) * (1.0 / key->x_scale)));
+              brw_imm_f((float)(i & 0x1) * (1.0f / key->x_scale)));
        emit_add(vec16(y_sample_coords), Yp_f,
-              brw_imm_f((float)((i >> 1) & 0x1) * (1.0 / key->y_scale)));
+              brw_imm_f((float)((i >> 1) & 0x1) * (1.0f / key->y_scale)));
        emit_mov(vec16(X), x_sample_coords);
        emit_mov(vec16(Y), y_sample_coords);
  
@@ -1790,7 +1789,7 @@ brw_blorp_coord_transform_params::setup(GLfloat src0, GLfloat src1,
         * so 0.5 provides the necessary correction.
         */
        multiplier = scale;
-      offset = src0 + (-dst0 + 0.5) * scale;
+      offset = src0 + (-dst0 + 0.5f) * scale;
     } else {
        /* When mirroring X we need:
         *   src_x - src_x0 = dst_x1 - dst_x - 0.5
@@ -1798,7 +1797,7 @@ brw_blorp_coord_transform_params::setup(GLfloat src0, GLfloat src1,
         *   src_x = src_x0 + (dst_x1 -dst_x - 0.5) * scale
         */
        multiplier = -scale;
-      offset = src0 + (dst1 - 0.5) * scale;
+      offset = src0 + (dst1 - 0.5f) * scale;
     }
  }
  
@@ -1953,8 +1952,8 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct brw_context *brw,
     /* Scaling factors used for bilinear filtering in multisample scaled
      * blits.
      */
-   wm_prog_key.x_scale = 2.0;
-   wm_prog_key.y_scale = src_mt->num_samples / 2.0;
+   wm_prog_key.x_scale = 2.0f;
+   wm_prog_key.y_scale = src_mt->num_samples / 2.0f;
  
     if (filter == GL_LINEAR && src.num_samples <= 1 && dst.num_samples <= 1)
        wm_prog_key.bilinear_filter = true;
@@ -1993,14 +1992,17 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct brw_context *brw,
  
     wm_prog_key.src_tiled_w = src.map_stencil_as_y_tiled;
     wm_prog_key.dst_tiled_w = dst.map_stencil_as_y_tiled;
-   x0 = wm_push_consts.dst_x0 = dst_x0;
-   y0 = wm_push_consts.dst_y0 = dst_y0;
-   x1 = wm_push_consts.dst_x1 = dst_x1;
-   y1 = wm_push_consts.dst_y1 = dst_y1;
+   /* Round floating point values to nearest integer to avoid "off by one texel"
+    * kind of errors when blitting.
+    */
+   x0 = wm_push_consts.dst_x0 = roundf(dst_x0);
+   y0 = wm_push_consts.dst_y0 = roundf(dst_y0);
+   x1 = wm_push_consts.dst_x1 = roundf(dst_x1);
+   y1 = wm_push_consts.dst_y1 = roundf(dst_y1);
     wm_push_consts.rect_grid_x1 = (minify(src_mt->logical_width0, src_level) *
-                                  wm_prog_key.x_scale - 1.0);
+                                  wm_prog_key.x_scale - 1.0f);
     wm_push_consts.rect_grid_y1 = (minify(src_mt->logical_height0, src_level) *
-                                  wm_prog_key.y_scale - 1.0);
+                                  wm_prog_key.y_scale - 1.0f);
  
     wm_push_consts.x_transform.setup(src_x0, src_x1, dst_x0, dst_x1, mirror_x);
     wm_push_consts.y_transform.setup(src_y0, src_y1, dst_y0, dst_y1, mirror_y);