mesa: remove _mesa_ffs(), implement ffs() for non-GNU platforms
[mesa.git] / src / mesa / drivers / dri / intel / intel_blit.c
index 167140d274a37b94f0578f558a6af4e8dba701da..e484fd34bfd14b0373357bab1f9903efe8688602 100644 (file)
@@ -38,6 +38,7 @@
 #include "intel_reg.h"
 #include "intel_regions.h"
 #include "intel_batchbuffer.h"
+#include "intel_mipmap_tree.h"
 
 #define FILE_DEBUG_FLAG DEBUG_BLIT
 
@@ -64,18 +65,36 @@ static GLuint translate_raster_op(GLenum logicop)
    }
 }
 
+static uint32_t
+br13_for_cpp(int cpp)
+{
+   switch (cpp) {
+   case 4:
+      return BR13_8888;
+      break;
+   case 2:
+      return BR13_565;
+      break;
+   case 1:
+      return BR13_8;
+      break;
+   default:
+      assert(0);
+      return 0;
+   }
+}
 
 /* Copy BitBlt
  */
-GLboolean
+bool
 intelEmitCopyBlit(struct intel_context *intel,
                  GLuint cpp,
                  GLshort src_pitch,
-                 dri_bo *src_buffer,
+                 drm_intel_bo *src_buffer,
                  GLuint src_offset,
                  uint32_t src_tiling,
                  GLshort dst_pitch,
-                 dri_bo *dst_buffer,
+                 drm_intel_bo *dst_buffer,
                  GLuint dst_offset,
                  uint32_t dst_tiling,
                  GLshort src_x, GLshort src_y,
@@ -86,43 +105,39 @@ intelEmitCopyBlit(struct intel_context *intel,
    GLuint CMD, BR13, pass = 0;
    int dst_y2 = dst_y + h;
    int dst_x2 = dst_x + w;
-   dri_bo *aper_array[3];
+   drm_intel_bo *aper_array[3];
    BATCH_LOCALS;
 
-   /* Blits are in a different ringbuffer so we don't use them. */
-   if (intel->gen >= 6)
-      return GL_FALSE;
-
    if (dst_tiling != I915_TILING_NONE) {
       if (dst_offset & 4095)
-        return GL_FALSE;
+        return false;
       if (dst_tiling == I915_TILING_Y)
-        return GL_FALSE;
+        return false;
    }
    if (src_tiling != I915_TILING_NONE) {
       if (src_offset & 4095)
-        return GL_FALSE;
+        return false;
       if (src_tiling == I915_TILING_Y)
-        return GL_FALSE;
+        return false;
    }
 
    /* do space check before going any further */
    do {
-       aper_array[0] = intel->batch->buf;
+       aper_array[0] = intel->batch.bo;
        aper_array[1] = dst_buffer;
        aper_array[2] = src_buffer;
 
        if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) {
-           intel_batchbuffer_flush(intel->batch);
+           intel_batchbuffer_flush(intel);
            pass++;
        } else
            break;
    } while (pass < 2);
 
    if (pass >= 2)
-      return GL_FALSE;
+      return false;
 
-   intel_batchbuffer_require_space(intel->batch, 8 * 4);
+   intel_batchbuffer_require_space(intel, 8 * 4, true);
    DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
        __FUNCTION__,
        src_buffer, src_pitch, src_offset, src_x, src_y,
@@ -131,22 +146,35 @@ intelEmitCopyBlit(struct intel_context *intel,
    src_pitch *= cpp;
    dst_pitch *= cpp;
 
-   BR13 = translate_raster_op(logic_op) << 16;
+   /* Blit pitch must be dword-aligned.  Otherwise, the hardware appears to drop
+    * the low bits.
+    */
+   assert(src_pitch % 4 == 0);
+   assert(dst_pitch % 4 == 0);
+
+   /* For big formats (such as floating point), do the copy using 32bpp and
+    * multiply the coordinates.
+    */
+   if (cpp > 4) {
+      assert(cpp % 4 == 0);
+      dst_x *= cpp / 4;
+      dst_x2 *= cpp / 4;
+      src_x *= cpp / 4;
+      cpp = 4;
+   }
+
+   BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16;
 
    switch (cpp) {
    case 1:
-      CMD = XY_SRC_COPY_BLT_CMD;
-      break;
    case 2:
-      BR13 |= BR13_565;
       CMD = XY_SRC_COPY_BLT_CMD;
       break;
    case 4:
-      BR13 |= BR13_8888;
       CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
       break;
    default:
-      return GL_FALSE;
+      return false;
    }
 
 #ifndef I915
@@ -161,13 +189,13 @@ intelEmitCopyBlit(struct intel_context *intel,
 #endif
 
    if (dst_y2 <= dst_y || dst_x2 <= dst_x) {
-      return GL_TRUE;
+      return true;
    }
 
    assert(dst_x < dst_x2);
    assert(dst_y < dst_y2);
 
-   BEGIN_BATCH(8);
+   BEGIN_BATCH_BLT(8);
    OUT_BATCH(CMD);
    OUT_BATCH(BR13 | (uint16_t)dst_pitch);
    OUT_BATCH((dst_y << 16) | dst_x);
@@ -182,9 +210,9 @@ intelEmitCopyBlit(struct intel_context *intel,
                    src_offset);
    ADVANCE_BATCH();
 
-   intel_batchbuffer_emit_mi_flush(intel->batch);
+   intel_batchbuffer_emit_mi_flush(intel);
 
-   return GL_TRUE;
+   return true;
 }
 
 
@@ -195,28 +223,28 @@ intelEmitCopyBlit(struct intel_context *intel,
  * which we're clearing with triangles.
  * \param mask  bitmask of BUFFER_BIT_* values indicating buffers to clear
  */
-void
-intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
+GLbitfield
+intelClearWithBlit(struct gl_context *ctx, GLbitfield mask)
 {
    struct intel_context *intel = intel_context(ctx);
    struct gl_framebuffer *fb = ctx->DrawBuffer;
-   GLuint clear_depth;
-   GLboolean all;
+   GLuint clear_depth_value, clear_depth_mask;
    GLint cx, cy, cw, ch;
+   GLbitfield fail_mask = 0;
    BATCH_LOCALS;
 
-   /* Blits are in a different ringbuffer so we don't use them. */
-   assert(intel->gen < 6);
-
    /*
     * Compute values for clearing the buffers.
     */
-   clear_depth = 0;
+   clear_depth_value = 0;
+   clear_depth_mask = 0;
    if (mask & BUFFER_BIT_DEPTH) {
-      clear_depth = (GLuint) (fb->_DepthMax * ctx->Depth.Clear);
+      clear_depth_value = (GLuint) (fb->_DepthMax * ctx->Depth.Clear);
+      clear_depth_mask = XY_BLT_WRITE_RGB;
    }
    if (mask & BUFFER_BIT_STENCIL) {
-      clear_depth |= (ctx->Stencil.Clear & 0xff) << 24;
+      clear_depth_value |= (ctx->Stencil.Clear & 0xff) << 24;
+      clear_depth_mask |= XY_BLT_WRITE_ALPHA;
    }
 
    cx = fb->_Xmin;
@@ -228,43 +256,45 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
    ch = fb->_Ymax - fb->_Ymin;
 
    if (cw == 0 || ch == 0)
-      return;
-
-   GLuint buf;
-   all = (cw == fb->Width && ch == fb->Height);
-
-   intel_prepare_render(intel);
+      return 0;
 
    /* Loop over all renderbuffers */
-   for (buf = 0; buf < BUFFER_COUNT && mask; buf++) {
-      const GLbitfield bufBit = 1 << buf;
+   mask &= (1 << BUFFER_COUNT) - 1;
+   while (mask) {
+      GLuint buf = ffs(mask) - 1;
+      bool is_depth_stencil = buf == BUFFER_DEPTH || buf == BUFFER_STENCIL;
       struct intel_renderbuffer *irb;
-      drm_intel_bo *write_buffer;
       int x1, y1, x2, y2;
       uint32_t clear_val;
       uint32_t BR13, CMD;
+      struct intel_region *region;
       int pitch, cpp;
       drm_intel_bo *aper_array[2];
 
-      if (!(mask & bufBit))
-        continue;
+      mask &= ~(1 << buf);
 
-      /* OK, clear this renderbuffer */
       irb = intel_get_renderbuffer(fb, buf);
-      write_buffer = intel_region_buffer(intel, irb->region,
-                                        all ? INTEL_WRITE_FULL :
-                                        INTEL_WRITE_PART);
-      x1 = cx + irb->region->draw_x;
-      y1 = cy + irb->region->draw_y;
-      x2 = cx + cw + irb->region->draw_x;
-      y2 = cy + ch + irb->region->draw_y;
+      if (irb && irb->mt) {
+        region = irb->mt->region;
+        assert(region);
+        assert(region->bo);
+      } else {
+         fail_mask |= 1 << buf;
+         continue;
+      }
 
-      pitch = irb->region->pitch;
-      cpp = irb->region->cpp;
+      /* OK, clear this renderbuffer */
+      x1 = cx + irb->draw_x;
+      y1 = cy + irb->draw_y;
+      x2 = cx + cw + irb->draw_x;
+      y2 = cy + ch + irb->draw_y;
+
+      pitch = region->pitch;
+      cpp = region->cpp;
 
       DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n",
          __FUNCTION__,
-         irb->region->buffer, (pitch * cpp),
+         region->bo, (pitch * cpp),
          x1, y1, x2 - x1, y2 - y1);
 
       BR13 = 0xf0 << 16;
@@ -272,41 +302,31 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
 
       /* Setup the blit command */
       if (cpp == 4) {
-        BR13 |= BR13_8888;
-        if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL) {
-           if (mask & BUFFER_BIT_DEPTH)
-              CMD |= XY_BLT_WRITE_RGB;
-           if (mask & BUFFER_BIT_STENCIL)
-              CMD |= XY_BLT_WRITE_ALPHA;
+        if (is_depth_stencil) {
+           CMD |= clear_depth_mask;
         } else {
            /* clearing RGBA */
            CMD |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
         }
-      } else {
-        ASSERT(cpp == 2);
-        BR13 |= BR13_565;
       }
 
-      assert(irb->region->tiling != I915_TILING_Y);
+      assert(region->tiling != I915_TILING_Y);
 
 #ifndef I915
-      if (irb->region->tiling != I915_TILING_NONE) {
+      if (region->tiling != I915_TILING_NONE) {
         CMD |= XY_DST_TILED;
         pitch /= 4;
       }
 #endif
       BR13 |= (pitch * cpp);
 
-      if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL) {
-        clear_val = clear_depth;
+      if (is_depth_stencil) {
+        clear_val = clear_depth_value;
       } else {
         uint8_t clear[4];
-        GLclampf *color = ctx->Color.ClearColor;
+        GLfloat *color = ctx->Color.ClearColor.f;
 
-        CLAMPED_FLOAT_TO_UBYTE(clear[0], color[0]);
-        CLAMPED_FLOAT_TO_UBYTE(clear[1], color[1]);
-        CLAMPED_FLOAT_TO_UBYTE(clear[2], color[2]);
-        CLAMPED_FLOAT_TO_UBYTE(clear[3], color[3]);
+        _mesa_unclamped_float_rgba_to_ubyte(clear, color);
 
         switch (irb->Base.Format) {
         case MESA_FORMAT_ARGB8888:
@@ -325,50 +345,58 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
            clear_val = PACK_COLOR_1555(clear[3], clear[0],
                                        clear[1], clear[2]);
            break;
+        case MESA_FORMAT_A8:
+           clear_val = PACK_COLOR_8888(clear[3], clear[3],
+                                       clear[3], clear[3]);
+           break;
         default:
-           _mesa_problem(ctx, "Unexpected renderbuffer format: %d\n",
-                         irb->Base.Format);
-           clear_val = 0;
+           fail_mask |= 1 << buf;
+           continue;
         }
       }
 
+      BR13 |= br13_for_cpp(cpp);
+
       assert(x1 < x2);
       assert(y1 < y2);
 
       /* do space check before going any further */
-      aper_array[0] = intel->batch->buf;
-      aper_array[1] = write_buffer;
+      aper_array[0] = intel->batch.bo;
+      aper_array[1] = region->bo;
 
       if (drm_intel_bufmgr_check_aperture_space(aper_array,
                                                ARRAY_SIZE(aper_array)) != 0) {
-        intel_batchbuffer_flush(intel->batch);
+        intel_batchbuffer_flush(intel);
       }
 
-      BEGIN_BATCH(6);
+      BEGIN_BATCH_BLT(6);
       OUT_BATCH(CMD);
       OUT_BATCH(BR13);
       OUT_BATCH((y1 << 16) | x1);
       OUT_BATCH((y2 << 16) | x2);
-      OUT_RELOC_FENCED(write_buffer,
+      OUT_RELOC_FENCED(region->bo,
                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                       0);
       OUT_BATCH(clear_val);
       ADVANCE_BATCH();
 
+      if (intel->always_flush_cache)
+        intel_batchbuffer_emit_mi_flush(intel);
+
       if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL)
         mask &= ~(BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL);
-      else
-        mask &= ~bufBit;    /* turn off bit, for faster loop exit */
    }
+
+   return fail_mask;
 }
 
-GLboolean
+bool
 intelEmitImmediateColorExpandBlit(struct intel_context *intel,
                                  GLuint cpp,
                                  GLubyte *src_bits, GLuint src_size,
                                  GLuint fg_color,
                                  GLshort dst_pitch,
-                                 dri_bo *dst_buffer,
+                                 drm_intel_bo *dst_buffer,
                                  GLuint dst_offset,
                                  uint32_t dst_tiling,
                                  GLshort x, GLshort y,
@@ -378,15 +406,11 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
    int dwords = ALIGN(src_size, 8) / 4;
    uint32_t opcode, br13, blit_cmd;
 
-   /* Blits are in a different ringbuffer so we don't use them. */
-   if (intel->gen >= 6)
-      return GL_FALSE;
-
    if (dst_tiling != I915_TILING_NONE) {
       if (dst_offset & 4095)
-        return GL_FALSE;
+        return false;
       if (dst_tiling == I915_TILING_Y)
-        return GL_FALSE;
+        return false;
    }
 
    assert( logic_op - GL_CLEAR >= 0 );
@@ -394,7 +418,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
    assert(dst_pitch > 0);
 
    if (w < 0 || h < 0)
-      return GL_TRUE;
+      return true;
 
    dst_pitch *= cpp;
 
@@ -402,10 +426,10 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
        __FUNCTION__,
        dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords);
 
-   intel_batchbuffer_require_space( intel->batch,
-                                   (8 * 4) +
-                                   (3 * 4) +
-                                   dwords * 4 );
+   intel_batchbuffer_require_space(intel,
+                                  (8 * 4) +
+                                  (3 * 4) +
+                                  dwords * 4, true);
 
    opcode = XY_SETUP_BLT_CMD;
    if (cpp == 4)
@@ -418,16 +442,13 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
 #endif
 
    br13 = dst_pitch | (translate_raster_op(logic_op) << 16) | (1 << 29);
-   if (cpp == 2)
-      br13 |= BR13_565;
-   else
-      br13 |= BR13_8888;
+   br13 |= br13_for_cpp(cpp);
 
    blit_cmd = XY_TEXT_IMMEDIATE_BLIT_CMD | XY_TEXT_BYTE_PACKED; /* packing? */
    if (dst_tiling != I915_TILING_NONE)
       blit_cmd |= XY_DST_TILED;
 
-   BEGIN_BATCH(8 + 3);
+   BEGIN_BATCH_BLT(8 + 3);
    OUT_BATCH(opcode);
    OUT_BATCH(br13);
    OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */
@@ -444,13 +465,11 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
    OUT_BATCH(((y + h) << 16) | (x + w));
    ADVANCE_BATCH();
 
-   intel_batchbuffer_data( intel->batch,
-                          src_bits,
-                          dwords * 4 );
+   intel_batchbuffer_data(intel, src_bits, dwords * 4, true);
 
-   intel_batchbuffer_emit_mi_flush(intel->batch);
+   intel_batchbuffer_emit_mi_flush(intel);
 
-   return GL_TRUE;
+   return true;
 }
 
 /* We don't have a memmove-type blit like some other hardware, so we'll do a
@@ -466,13 +485,13 @@ intel_emit_linear_blit(struct intel_context *intel,
                       unsigned int size)
 {
    GLuint pitch, height;
-   GLboolean ok;
-
-   /* Blits are in a different ringbuffer so we don't use them. */
-   assert(intel->gen < 6);
+   bool ok;
 
-   /* The pitch is a signed value. */
-   pitch = MIN2(size, (1 << 15) - 1);
+   /* The pitch given to the GPU must be DWORD aligned, and
+    * we want width to match pitch. Max width is (1 << 15 - 1),
+    * rounding that down to the nearest DWORD is 1 << 15 - 4
+    */
+   pitch = ROUND_DOWN_TO(MIN2(size, (1 << 15) - 1), 4);
    height = size / pitch;
    ok = intelEmitCopyBlit(intel, 1,
                          pitch, src_bo, src_offset, I915_TILING_NONE,
@@ -487,10 +506,11 @@ intel_emit_linear_blit(struct intel_context *intel,
    dst_offset += pitch * height;
    size -= pitch * height;
    assert (size < (1 << 15));
+   pitch = ALIGN(size, 4);
    if (size != 0) {
       ok = intelEmitCopyBlit(intel, 1,
-                            size, src_bo, src_offset, I915_TILING_NONE,
-                            size, dst_bo, dst_offset, I915_TILING_NONE,
+                            pitch, src_bo, src_offset, I915_TILING_NONE,
+                            pitch, dst_bo, dst_offset, I915_TILING_NONE,
                             0, 0, /* src x/y */
                             0, 0, /* dst x/y */
                             size, 1, /* w, h */
@@ -498,3 +518,86 @@ intel_emit_linear_blit(struct intel_context *intel,
       assert(ok);
    }
 }
+
+/**
+ * Used to initialize the alpha value of an ARGB8888 teximage after
+ * loading it from an XRGB8888 source.
+ *
+ * This is very common with glCopyTexImage2D().
+ */
+void
+intel_set_teximage_alpha_to_one(struct gl_context *ctx,
+                               struct intel_texture_image *intel_image)
+{
+   struct intel_context *intel = intel_context(ctx);
+   unsigned int image_x, image_y;
+   uint32_t x1, y1, x2, y2;
+   uint32_t BR13, CMD;
+   int pitch, cpp;
+   drm_intel_bo *aper_array[2];
+   struct intel_region *region = intel_image->mt->region;
+   int width, height, depth;
+   BATCH_LOCALS;
+
+   intel_miptree_get_dimensions_for_image(&intel_image->base.Base,
+                                          &width, &height, &depth);
+   assert(depth == 1);
+
+   assert(intel_image->base.Base.TexFormat == MESA_FORMAT_ARGB8888);
+
+   /* get dest x/y in destination texture */
+   intel_miptree_get_image_offset(intel_image->mt,
+                                 intel_image->base.Base.Level,
+                                 intel_image->base.Base.Face,
+                                 0,
+                                 &image_x, &image_y);
+
+   x1 = image_x;
+   y1 = image_y;
+   x2 = image_x + width;
+   y2 = image_y + height;
+
+   pitch = region->pitch;
+   cpp = region->cpp;
+
+   DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n",
+       __FUNCTION__,
+       intel_image->mt->region->bo, (pitch * cpp),
+       x1, y1, x2 - x1, y2 - y1);
+
+   BR13 = br13_for_cpp(cpp) | 0xf0 << 16;
+   CMD = XY_COLOR_BLT_CMD;
+   CMD |= XY_BLT_WRITE_ALPHA;
+
+   assert(region->tiling != I915_TILING_Y);
+
+#ifndef I915
+   if (region->tiling != I915_TILING_NONE) {
+      CMD |= XY_DST_TILED;
+      pitch /= 4;
+   }
+#endif
+   BR13 |= (pitch * cpp);
+
+   /* do space check before going any further */
+   aper_array[0] = intel->batch.bo;
+   aper_array[1] = region->bo;
+
+   if (drm_intel_bufmgr_check_aperture_space(aper_array,
+                                            ARRAY_SIZE(aper_array)) != 0) {
+      intel_batchbuffer_flush(intel);
+   }
+
+   BEGIN_BATCH_BLT(6);
+   OUT_BATCH(CMD);
+   OUT_BATCH(BR13);
+   OUT_BATCH((y1 << 16) | x1);
+   OUT_BATCH((y2 << 16) | x2);
+   OUT_RELOC_FENCED(region->bo,
+                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                   0);
+   OUT_BATCH(0xffffffff); /* white, but only alpha gets written */
+   ADVANCE_BATCH();
+
+   intel_batchbuffer_emit_mi_flush(intel);
+}