Merge commit 'fj/mesa-next'
[mesa.git] / src / mesa / drivers / dri / r300 / r300_blit.c
index 515a85caa292361d15d75b7be26712923dd13889..54ac2510e7a1f5419e22c4ed772c7ac90dfab5fb 100644 (file)
 #include "compiler/radeon_compiler.h"
 #include "compiler/radeon_opcodes.h"
 
-/**
- * TODO:
- * - handle depth buffer
- * - r300 fp and rs setup
- */
-
 static void vp_ins_outs(struct r300_vertex_program_compiler *c)
 {
     c->code->inputs[VERT_ATTRIB_POS] = 0;
@@ -120,7 +114,7 @@ static void create_fragment_program(struct r300_context *r300)
     inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
 
     compiler.Base.Program.InputsRead = (1 << FRAG_ATTRIB_TEX0);
-    compiler.OutputColor = FRAG_RESULT_COLOR;
+    compiler.OutputColor[0] = FRAG_RESULT_COLOR;
     compiler.OutputDepth = FRAG_RESULT_DEPTH;
     compiler.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515);
     compiler.code = &r300->blit.fp_code;
@@ -135,37 +129,6 @@ void r300_blit_init(struct r300_context *r300)
     create_fragment_program(r300);
 }
 
-static void r500_emit_rs_setup(struct r300_context *r300)
-{
-    BATCH_LOCALS(&r300->radeon);
-
-    BEGIN_BATCH(7);
-    OUT_BATCH_REGSEQ(R300_RS_COUNT, 2);
-    OUT_BATCH((4 << R300_IT_COUNT_SHIFT) | R300_HIRES_EN);
-    OUT_BATCH(0);
-    OUT_BATCH_REGVAL(R500_RS_INST_0,
-                     (0 << R500_RS_INST_TEX_ID_SHIFT) |
-                     (0 << R500_RS_INST_TEX_ADDR_SHIFT) |
-                     R500_RS_INST_TEX_CN_WRITE |
-                     R500_RS_INST_COL_CN_NO_WRITE);
-    OUT_BATCH_REGVAL(R500_RS_IP_0,
-                     (0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
-                     (1 << R500_RS_IP_TEX_PTR_T_SHIFT) |
-                     (2 << R500_RS_IP_TEX_PTR_R_SHIFT) |
-                     (3 << R500_RS_IP_TEX_PTR_Q_SHIFT));
-    END_BATCH();
-}
-
-static void r300_emit_fp_setup(struct r300_context *r300)
-{
-    assert(0);
-}
-
-static void r300_emit_rs_setup(struct r300_context *r300)
-{
-    assert(0);
-}
-
 static void r300_emit_tx_setup(struct r300_context *r300,
                                gl_format mesa_format,
                                struct radeon_bo *bo,
@@ -178,7 +141,7 @@ static void r300_emit_tx_setup(struct r300_context *r300,
 
     assert(width <= 2048);
     assert(height <= 2048);
-    assert(r300TranslateTexFormat(mesa_format) != 0);
+    assert(r300TranslateTexFormat(mesa_format) >= 0);
     assert(offset % 32 == 0);
 
     BEGIN_BATCH(17);
@@ -187,8 +150,8 @@ static void r300_emit_tx_setup(struct r300_context *r300,
                      (R300_TX_CLAMP_TO_EDGE  << R300_TX_WRAP_T_SHIFT) |
                      (R300_TX_CLAMP_TO_EDGE  << R300_TX_WRAP_R_SHIFT) |
                      R300_TX_MIN_FILTER_MIP_NONE |
-                     R300_TX_MIN_FILTER_LINEAR |
-                     R300_TX_MAG_FILTER_LINEAR |
+                     R300_TX_MIN_FILTER_NEAREST |
+                     R300_TX_MAG_FILTER_NEAREST |
                      (0 << 28));
     OUT_BATCH_REGVAL(R300_TX_FILTER1_0, 0);
     OUT_BATCH_REGVAL(R300_TX_SIZE_0,
@@ -199,7 +162,7 @@ static void r300_emit_tx_setup(struct r300_context *r300,
                      R300_TX_SIZE_TXPITCH_EN);
 
     OUT_BATCH_REGVAL(R300_TX_FORMAT_0, r300TranslateTexFormat(mesa_format));
-    OUT_BATCH_REGVAL(R300_TX_FORMAT2_0, pitch/_mesa_get_format_bytes(mesa_format) - 1);
+    OUT_BATCH_REGVAL(R300_TX_FORMAT2_0, pitch - 1);
     OUT_BATCH_REGSEQ(R300_TX_OFFSET_0, 1);
     OUT_BATCH_RELOC(0, bo, offset, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
 
@@ -251,7 +214,8 @@ static uint32_t mesa_format_to_us_format(gl_format mesa_format)
             return EASY_US_FORMAT(R500_OUT_FMT_C4_16, R, G, B, A, 0xf);
 
         default:
-            assert(!"Invalid format for US output\n");
+            fprintf(stderr, "Unsupported format %s for US output\n", _mesa_get_format_name(mesa_format));
+            assert(0);
             return 0;
     }
 }
@@ -275,6 +239,87 @@ static void r500_emit_fp_setup(struct r300_context *r300,
     END_BATCH();
 }
 
+static void r500_emit_rs_setup(struct r300_context *r300)
+{
+    BATCH_LOCALS(&r300->radeon);
+
+    BEGIN_BATCH(7);
+    OUT_BATCH_REGSEQ(R300_RS_COUNT, 2);
+    OUT_BATCH((4 << R300_IT_COUNT_SHIFT) | R300_HIRES_EN);
+    OUT_BATCH(0);
+    OUT_BATCH_REGVAL(R500_RS_INST_0,
+                     (0 << R500_RS_INST_TEX_ID_SHIFT) |
+                     (0 << R500_RS_INST_TEX_ADDR_SHIFT) |
+                     R500_RS_INST_TEX_CN_WRITE |
+                     R500_RS_INST_COL_CN_NO_WRITE);
+    OUT_BATCH_REGVAL(R500_RS_IP_0,
+                     (0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
+                     (1 << R500_RS_IP_TEX_PTR_T_SHIFT) |
+                     (2 << R500_RS_IP_TEX_PTR_R_SHIFT) |
+                     (3 << R500_RS_IP_TEX_PTR_Q_SHIFT));
+    END_BATCH();
+}
+
+static void r300_emit_fp_setup(struct r300_context *r300,
+                               struct r300_fragment_program_code *code,
+                               gl_format dst_format)
+{
+    unsigned i;
+    BATCH_LOCALS(&r300->radeon);
+
+    BEGIN_BATCH((code->alu.length + 1) * 4 + code->tex.length + 1 + 11);
+
+    OUT_BATCH_REGSEQ(R300_US_ALU_RGB_INST_0, code->alu.length);
+    for (i = 0; i < code->alu.length; i++) {
+        OUT_BATCH(code->alu.inst[i].rgb_inst);
+    }
+    OUT_BATCH_REGSEQ(R300_US_ALU_RGB_ADDR_0, code->alu.length);
+    for (i = 0; i < code->alu.length; i++) {
+        OUT_BATCH(code->alu.inst[i].rgb_addr);
+    }
+    OUT_BATCH_REGSEQ(R300_US_ALU_ALPHA_INST_0, code->alu.length);
+    for (i = 0; i < code->alu.length; i++) {
+        OUT_BATCH(code->alu.inst[i].alpha_inst);
+    }
+    OUT_BATCH_REGSEQ(R300_US_ALU_ALPHA_ADDR_0, code->alu.length);
+    for (i = 0; i < code->alu.length; i++) {
+        OUT_BATCH(code->alu.inst[i].alpha_addr);
+    }
+
+    OUT_BATCH_REGSEQ(R300_US_TEX_INST_0, code->tex.length);
+    OUT_BATCH_TABLE(code->tex.inst, code->tex.length);
+
+    OUT_BATCH_REGSEQ(R300_US_CONFIG, 3);
+    OUT_BATCH(R300_PFS_CNTL_FIRST_NODE_HAS_TEX);
+    OUT_BATCH(code->pixsize);
+    OUT_BATCH(code->code_offset);
+    OUT_BATCH_REGSEQ(R300_US_CODE_ADDR_0, 4);
+    OUT_BATCH_TABLE(code->code_addr, 4);
+    OUT_BATCH_REGVAL(R500_US_OUT_FMT_0, mesa_format_to_us_format(dst_format));
+    END_BATCH();
+}
+
+static void r300_emit_rs_setup(struct r300_context *r300)
+{
+    BATCH_LOCALS(&r300->radeon);
+
+    BEGIN_BATCH(7);
+    OUT_BATCH_REGSEQ(R300_RS_COUNT, 2);
+    OUT_BATCH((4 << R300_IT_COUNT_SHIFT) | R300_HIRES_EN);
+    OUT_BATCH(0);
+    OUT_BATCH_REGVAL(R300_RS_INST_0,
+                     R300_RS_INST_TEX_ID(0) |
+                     R300_RS_INST_TEX_ADDR(0) |
+                     R300_RS_INST_TEX_CN_WRITE);
+    OUT_BATCH_REGVAL(R300_RS_IP_0,
+                     R300_RS_TEX_PTR(0) |
+                     R300_RS_SEL_S(R300_RS_SEL_C0) |
+                     R300_RS_SEL_T(R300_RS_SEL_C1) |
+                     R300_RS_SEL_R(R300_RS_SEL_K0) |
+                     R300_RS_SEL_Q(R300_RS_SEL_K1));
+    END_BATCH();
+}
+
 static void emit_pvs_setup(struct r300_context *r300,
                            uint32_t *vp_code,
                            unsigned vp_len)
@@ -293,7 +338,7 @@ static void emit_pvs_setup(struct r300_context *r300,
     END_BATCH();
 }
 
-static void emit_vap_setup(struct r300_context *r300, unsigned width, unsigned height)
+static void emit_vap_setup(struct r300_context *r300)
 {
     BATCH_LOCALS(&r300->radeon);
 
@@ -343,12 +388,48 @@ static GLboolean validate_buffers(struct r300_context *r300,
     return GL_TRUE;
 }
 
-static void emit_draw_packet(struct r300_context *r300, float width, float height)
+/**
+ * Calculate texcoords for given image region.
+ * Output values are [minx, maxx, miny, maxy]
+ */
+static void calc_tex_coords(float img_width, float img_height,
+                            float x, float y,
+                            float reg_width, float reg_height,
+                            unsigned flip_y, float *buf)
+{
+    buf[0] = x / img_width;
+    buf[1] = buf[0] + reg_width / img_width;
+    buf[2] = y / img_height;
+    buf[3] = buf[2] + reg_height / img_height;
+    if (flip_y)
+    {
+        buf[2] = 1.0 - buf[2];
+        buf[3] = 1.0 - buf[3];
+    }
+}
+
+static void emit_draw_packet(struct r300_context *r300,
+                             unsigned src_width, unsigned src_height,
+                             unsigned src_x_offset, unsigned src_y_offset,
+                             unsigned dst_x_offset, unsigned dst_y_offset,
+                             unsigned reg_width, unsigned reg_height,
+                             unsigned flip_y)
 {
-    float verts[] = {   0.0,    0.0, 0.0, 1.0,
-                        0.0, height, 0.0, 0.0,
-                      width, height, 1.0, 0.0,
-                      width,    0.0, 1.0, 1.0 };
+    float texcoords[4];
+
+    calc_tex_coords(src_width, src_height,
+                    src_x_offset, src_y_offset,
+                    reg_width, reg_height,
+                    flip_y, texcoords);
+
+    float verts[] = { dst_x_offset, dst_y_offset,
+                      texcoords[0], texcoords[2],
+                      dst_x_offset, dst_y_offset + reg_height,
+                      texcoords[0], texcoords[3],
+                      dst_x_offset + reg_width, dst_y_offset + reg_height,
+                      texcoords[1], texcoords[3],
+                      dst_x_offset + reg_width, dst_y_offset,
+                      texcoords[1], texcoords[2] };
 
     BATCH_LOCALS(&r300->radeon);
 
@@ -382,6 +463,7 @@ static void emit_cb_setup(struct r300_context *r300,
                           struct radeon_bo *bo,
                           intptr_t offset,
                           gl_format mesa_format,
+                          unsigned pitch,
                           unsigned width,
                           unsigned height)
 {
@@ -402,50 +484,136 @@ static void emit_cb_setup(struct r300_context *r300,
 
     r300_emit_cb_setup(r300, bo, offset, mesa_format,
                        _mesa_get_format_bytes(mesa_format),
-                       _mesa_format_row_stride(mesa_format, width));
+                       _mesa_format_row_stride(mesa_format, pitch));
 
-    BEGIN_BATCH_NO_AUTOSTATE(3);
+    BEGIN_BATCH_NO_AUTOSTATE(5);
     OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2);
     OUT_BATCH((x1 << R300_SCISSORS_X_SHIFT)|(y1 << R300_SCISSORS_Y_SHIFT));
     OUT_BATCH((x2 << R300_SCISSORS_X_SHIFT)|(y2 << R300_SCISSORS_Y_SHIFT));
+    OUT_BATCH_REGVAL(R300_RB3D_CCTL, 0);
     END_BATCH();
 }
 
-GLboolean r300_blit(struct r300_context *r300,
-                    struct radeon_bo *src_bo,
-                    intptr_t src_offset,
-                    gl_format src_mesaformat,
-                    unsigned src_pitch,
-                    unsigned src_width,
-                    unsigned src_height,
-                    struct radeon_bo *dst_bo,
-                    intptr_t dst_offset,
-                    gl_format dst_mesaformat,
-                    unsigned dst_width,
-                    unsigned dst_height)
+static unsigned is_blit_supported(gl_format dst_format)
+{
+    switch (dst_format) {
+        case MESA_FORMAT_RGB565:
+        case MESA_FORMAT_ARGB1555:
+        case MESA_FORMAT_RGBA8888:
+        case MESA_FORMAT_RGBA8888_REV:
+        case MESA_FORMAT_ARGB8888:
+        case MESA_FORMAT_ARGB8888_REV:
+        case MESA_FORMAT_XRGB8888:
+            break;
+        default:
+            return 0;
+    }
+
+    if (_mesa_get_format_bits(dst_format, GL_DEPTH_BITS) > 0)
+        return 0;
+
+    return 1;
+}
+
+/**
+ * Copy a region of [@a width x @a height] pixels from source buffer
+ * to destination buffer.
+ * @param[in] r300 r300 context
+ * @param[in] src_bo source radeon buffer object
+ * @param[in] src_offset offset of the source image in the @a src_bo
+ * @param[in] src_mesaformat source image format
+ * @param[in] src_pitch aligned source image width
+ * @param[in] src_width source image width
+ * @param[in] src_height source image height
+ * @param[in] src_x_offset x offset in the source image
+ * @param[in] src_y_offset y offset in the source image
+ * @param[in] dst_bo destination radeon buffer object
+ * @param[in] dst_offset offset of the destination image in the @a dst_bo
+ * @param[in] dst_mesaformat destination image format
+ * @param[in] dst_pitch aligned destination image width
+ * @param[in] dst_width destination image width
+ * @param[in] dst_height destination image height
+ * @param[in] dst_x_offset x offset in the destination image
+ * @param[in] dst_y_offset y offset in the destination image
+ * @param[in] width region width
+ * @param[in] height region height
+ * @param[in] flip_y set if y coords of the source image need to be flipped
+ */
+unsigned r300_blit(GLcontext *ctx,
+                   struct radeon_bo *src_bo,
+                   intptr_t src_offset,
+                   gl_format src_mesaformat,
+                   unsigned src_pitch,
+                   unsigned src_width,
+                   unsigned src_height,
+                   unsigned src_x_offset,
+                   unsigned src_y_offset,
+                   struct radeon_bo *dst_bo,
+                   intptr_t dst_offset,
+                   gl_format dst_mesaformat,
+                   unsigned dst_pitch,
+                   unsigned dst_width,
+                   unsigned dst_height,
+                   unsigned dst_x_offset,
+                   unsigned dst_y_offset,
+                   unsigned reg_width,
+                   unsigned reg_height,
+                   unsigned flip_y)
 {
-    //assert(src_width == dst_width);
-    //assert(src_height == dst_height);
+    r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+    if (!is_blit_supported(dst_mesaformat))
+        return 0;
+
+    /* Make sure that colorbuffer has even width - hw limitation */
+    if (dst_pitch % 2 > 0)
+        ++dst_pitch;
+
+    /* Rendering to small buffer doesn't work.
+     * Looks like a hw limitation.
+     */
+    if (dst_pitch < 32)
+        return 0;
+
+    /* Need to clamp the region size to make sure
+     * we don't read outside of the source buffer
+     * or write outside of the destination buffer.
+     */
+    if (reg_width + src_x_offset > src_width)
+        reg_width = src_width - src_x_offset;
+    if (reg_height + src_y_offset > src_height)
+        reg_height = src_height - src_y_offset;
+    if (reg_width + dst_x_offset > dst_width)
+        reg_width = dst_width - dst_x_offset;
+    if (reg_height + dst_y_offset > dst_height)
+        reg_height = dst_height - dst_y_offset;
 
     if (src_bo == dst_bo) {
-        return GL_FALSE;
+        return 0;
     }
 
-    //return GL_FALSE;
+    if (src_offset % 32 || dst_offset % 32) {
+        return GL_FALSE;
+    }
 
-    if (1) {
-        fprintf(stderr, "src: width %d, height %d, pitch %d vs %d, format %s\n",
+    if (0) {
+        fprintf(stderr, "src: size [%d x %d], pitch %d, "
+                "offset [%d x %d], format %s, bo %p\n",
                 src_width, src_height, src_pitch,
-                _mesa_format_row_stride(src_mesaformat, src_width),
-                _mesa_get_format_name(src_mesaformat));
-        fprintf(stderr, "dst: width %d, height %d, pitch %d, format %s\n",
-                dst_width, dst_height,
-                _mesa_format_row_stride(dst_mesaformat, dst_width),
-                _mesa_get_format_name(dst_mesaformat));
+                src_x_offset, src_y_offset,
+                _mesa_get_format_name(src_mesaformat),
+                src_bo);
+        fprintf(stderr, "dst: pitch %d, offset[%d x %d], format %s, bo %p\n",
+                dst_pitch, dst_x_offset, dst_y_offset,
+                _mesa_get_format_name(dst_mesaformat), dst_bo);
+        fprintf(stderr, "region: %d x %d\n", reg_width, reg_height);
     }
 
+    /* Flush is needed to make sure that source buffer has correct data */
+    radeonFlush(r300->radeon.glCtx);
+
     if (!validate_buffers(r300, src_bo, dst_bo))
-        return GL_FALSE;
+        return 0;
 
     rcommonEnsureCmdBufSpace(&r300->radeon, 200, __FUNCTION__);
 
@@ -457,21 +625,24 @@ GLboolean r300_blit(struct r300_context *r300,
         r500_emit_fp_setup(r300, &r300->blit.fp_code.code.r500, dst_mesaformat);
         r500_emit_rs_setup(r300);
     } else {
-        r300_emit_fp_setup(r300);
+        r300_emit_fp_setup(r300, &r300->blit.fp_code.code.r300, dst_mesaformat);
         r300_emit_rs_setup(r300);
     }
 
     emit_pvs_setup(r300, r300->blit.vp_code.body.d, 2);
-    emit_vap_setup(r300, dst_width, dst_height);
+    emit_vap_setup(r300);
 
-    emit_cb_setup(r300, dst_bo, dst_offset, dst_mesaformat, dst_width, dst_height);
+    emit_cb_setup(r300, dst_bo, dst_offset, dst_mesaformat, dst_pitch, dst_width, dst_height);
 
-    emit_draw_packet(r300, dst_width, dst_height);
+    emit_draw_packet(r300, src_width, src_height,
+                     src_x_offset, src_y_offset,
+                     dst_x_offset, dst_y_offset,
+                     reg_width, reg_height,
+                     flip_y);
 
     r300EmitCacheFlush(r300);
 
     radeonFlush(r300->radeon.glCtx);
-    //r300ResetHwState(r300);
 
-    return GL_TRUE;
-}
\ No newline at end of file
+    return 1;
+}