Merge remote branch 'origin/master' into gallium_draw_llvm
[mesa.git] / src / mesa / drivers / dri / r600 / r700_render.c
index 3e1ce9fb72f6d425f5b479a265cd21f33bf44499..1929b7cc129798afb1fa9ff51ae53429715bd536 100644 (file)
@@ -42,7 +42,6 @@
 #include "tnl/t_vp_build.h"
 #include "tnl/t_context.h"
 #include "tnl/t_vertex.h"
-#include "tnl/t_pipeline.h"
 #include "vbo/vbo_context.h"
 
 #include "r600_context.h"
@@ -59,9 +58,7 @@
 
 void r700WaitForIdle(context_t *context);
 void r700WaitForIdleClean(context_t *context);
-GLboolean r700SendTextureState(context_t *context);
 static unsigned int r700PrimitiveType(int prim);
-void r600UpdateTextureState(GLcontext * ctx);
 GLboolean r700SyncSurf(context_t *context,
                       struct radeon_bo *pbo,
                       uint32_t read_domain,
@@ -118,8 +115,6 @@ void r700Start3D(context_t *context)
     END_BATCH();
 
     COMMIT_BATCH();
-
-    r700WaitForIdleClean(context);
 }
 
 GLboolean r700SyncSurf(context_t *context,
@@ -253,24 +248,12 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim
 {
     context_t *context = R700_CONTEXT(ctx);
     BATCH_LOCALS(&context->radeon);
-    int type, i, total_emit;
+    int type, total_emit;
     int num_indices;
     uint32_t vgt_draw_initiator = 0;
     uint32_t vgt_index_type     = 0;
     uint32_t vgt_primitive_type = 0;
     uint32_t vgt_num_indices    = 0;
-    TNLcontext *tnl = TNL_CONTEXT(ctx);
-    struct vertex_buffer *vb = &tnl->vb;
-    GLboolean bUseDrawIndex;
-
-    if(NULL != context->ind_buf.bo)
-    {
-        bUseDrawIndex = GL_TRUE;
-    }
-    else
-    {
-        bUseDrawIndex = GL_FALSE;
-    }
 
     type = r700PrimitiveType(prim);
     num_indices = r700NumVerts(end - start, prim);
@@ -282,90 +265,153 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim
     if (type < 0 || num_indices <= 0)
            return;
 
-    if(GL_TRUE == bUseDrawIndex)
-    {
-        total_emit =   3  /* VGT_PRIMITIVE_TYPE */
-                    + 2  /* VGT_INDEX_TYPE */
-                    + 2  /* NUM_INSTANCES */
-                     + 5 + 2; /* DRAW_INDEX */
-    }
-    else
-    {
-        total_emit =   3 /* VGT_PRIMITIVE_TYPE */
-                    + 2 /* VGT_INDEX_TYPE */
-                    + 2 /* NUM_INSTANCES */
-                     + num_indices + 3; /* DRAW_INDEX_IMMD */
-    }
-
-    BEGIN_BATCH_NO_AUTOSTATE(total_emit);
-    // prim
     SETfield(vgt_primitive_type, type,
             VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask);
-    R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
-    R600_OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX);
-    R600_OUT_BATCH(vgt_primitive_type);
 
-       // index type
     SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
 
-    if(GL_TRUE == bUseDrawIndex)
+    if(GL_TRUE != context->ind_buf.is_32bit)
     {
-        if(GL_TRUE != context->ind_buf.is_32bit)
-        {
             SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
-        }
     }
 
+    vgt_num_indices = num_indices;
+    SETfield(vgt_draw_initiator, DI_SRC_SEL_DMA, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
+    SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask);
+
+    total_emit =   3  /* VGT_PRIMITIVE_TYPE */
+                + 2  /* VGT_INDEX_TYPE */
+                + 2  /* NUM_INSTANCES */
+                + 5 + 2; /* DRAW_INDEX */
+
+    BEGIN_BATCH_NO_AUTOSTATE(total_emit);
+    // prim
+    R600_OUT_BATCH_REGSEQ(VGT_PRIMITIVE_TYPE, 1);
+    R600_OUT_BATCH(vgt_primitive_type);
+    // index type
     R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
     R600_OUT_BATCH(vgt_index_type);
-
     // num instances
     R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
     R600_OUT_BATCH(1);
-
     // draw packet
+    R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX, 3));
+    R600_OUT_BATCH(context->ind_buf.bo_offset);
+    R600_OUT_BATCH(0);
+    R600_OUT_BATCH(vgt_num_indices);
+    R600_OUT_BATCH(vgt_draw_initiator);
+    R600_OUT_BATCH_RELOC(context->ind_buf.bo_offset,
+                        context->ind_buf.bo,
+                        context->ind_buf.bo_offset,
+                        RADEON_GEM_DOMAIN_GTT, 0, 0);
+    END_BATCH();
+    COMMIT_BATCH();
+}
+
+static void r700RunRenderPrimitiveImmediate(GLcontext * ctx, int start, int end, int prim)
+{
+    context_t *context = R700_CONTEXT(ctx);
+    BATCH_LOCALS(&context->radeon);
+    int type, i;
+    uint32_t num_indices, total_emit = 0;
+    uint32_t vgt_draw_initiator = 0;
+    uint32_t vgt_index_type     = 0;
+    uint32_t vgt_primitive_type = 0;
+    uint32_t vgt_num_indices    = 0;
+
+    type = r700PrimitiveType(prim);
+    num_indices = r700NumVerts(end - start, prim);
+
+    radeon_print(RADEON_RENDER, RADEON_TRACE,
+                "%s type %x num_indices %d\n",
+                __func__, type, num_indices);
+
+    if (type < 0 || num_indices <= 0)
+           return;
+
+    SETfield(vgt_primitive_type, type,
+            VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask);
+
+    if (num_indices > 0xffff)
+    {
+           SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
+    }
+    else
+    {
+            SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
+    }
+
     vgt_num_indices = num_indices;
+    SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask);
 
-    if(GL_TRUE == bUseDrawIndex)
+    if (start == 0)
     {
-        SETfield(vgt_draw_initiator, DI_SRC_SEL_DMA, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
+       SETfield(vgt_draw_initiator, DI_SRC_SEL_AUTO_INDEX, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
     }
     else
     {
-        SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
+       if (num_indices > 0xffff)
+       {
+               total_emit += num_indices;
+       }
+       else
+       {
+               total_emit += (num_indices + 1) / 2;
+       }
+       SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
     }
 
-       SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask);
+    total_emit +=   3 /* VGT_PRIMITIVE_TYPE */
+                 + 2 /* VGT_INDEX_TYPE */
+                 + 2 /* NUM_INSTANCES */
+                 + 3; /* DRAW */
 
-    if(GL_TRUE == bUseDrawIndex)
+    BEGIN_BATCH_NO_AUTOSTATE(total_emit);
+    // prim
+    R600_OUT_BATCH_REGSEQ(VGT_PRIMITIVE_TYPE, 1);
+    R600_OUT_BATCH(vgt_primitive_type);
+    // index type
+    R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
+    R600_OUT_BATCH(vgt_index_type);
+    // num instances
+    R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
+    R600_OUT_BATCH(1);
+    // draw packet
+    if(start == 0)
     {
-        R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX, 3));
-        R600_OUT_BATCH(context->ind_buf.bo_offset);
-        R600_OUT_BATCH(0);
+        R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1));
         R600_OUT_BATCH(vgt_num_indices);
         R600_OUT_BATCH(vgt_draw_initiator);
-        R600_OUT_BATCH_RELOC(context->ind_buf.bo_offset,
-                             context->ind_buf.bo,
-                             context->ind_buf.bo_offset,
-                             RADEON_GEM_DOMAIN_GTT, 0, 0);
     }
     else
     {
-        R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1)));
-        R600_OUT_BATCH(vgt_num_indices);
-        R600_OUT_BATCH(vgt_draw_initiator);
-
-        for (i = start; i < (start + num_indices); i++)
-       {
-            if(vb->Elts)
-            {
-                R600_OUT_BATCH(vb->Elts[i]);
-            }
-            else
+       if (num_indices > 0xffff)
+        {
+           R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1)));
+           R600_OUT_BATCH(vgt_num_indices);
+           R600_OUT_BATCH(vgt_draw_initiator);
+           for (i = start; i < (start + num_indices); i++)
            {
-                R600_OUT_BATCH(i);
+               R600_OUT_BATCH(i);
            }
-        }
+       }
+       else
+        {
+           R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (((num_indices + 1) / 2) + 1)));
+           R600_OUT_BATCH(vgt_num_indices);
+           R600_OUT_BATCH(vgt_draw_initiator);
+           for (i = start; i < (start + num_indices); i += 2)
+           {
+               if ((i + 1) == (start + num_indices))
+               {
+                   R600_OUT_BATCH(i);
+               }
+               else
+               {
+                   R600_OUT_BATCH(((i + 1) << 16) | (i));
+               }
+           }
+       }
     }
 
     END_BATCH();
@@ -373,7 +419,7 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim
 }
 
 /* start 3d, idle, cb/db flush */
-#define PRE_EMIT_STATE_BUFSZ 10 + 5 + 14
+#define PRE_EMIT_STATE_BUFSZ 5 + 5 + 14
 
 static GLuint r700PredictRenderSize(GLcontext* ctx,
                                    const struct _mesa_prim *prim,
@@ -391,7 +437,12 @@ static GLuint r700PredictRenderSize(GLcontext* ctx,
     else {
            for (i = 0; i < nr_prims; ++i)
            {
-                   dwords += prim[i].count + 10;
+                   if (prim[i].start == 0)
+                           dwords += 10;
+                   else if (prim[i].count > 0xffff)
+                           dwords += prim[i].count + 10;
+                   else
+                           dwords += ((prim[i].count + 1) / 2) + 10;
            }
     }
 
@@ -472,6 +523,9 @@ static void r700ConvertAttrib(GLcontext *ctx, int count,
 
     radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, 
                          sizeof(GLfloat) * input->Size * count, 32);
+
+    radeon_bo_map(attr->bo, 1);
+
     dst_ptr = (GLfloat *)ADD_POINTERS(attr->bo->ptr, attr->bo_offset);
 
     assert(src_ptr != NULL);
@@ -505,6 +559,8 @@ static void r700ConvertAttrib(GLcontext *ctx, int count,
             break;
     }
 
+    radeon_bo_unmap(attr->bo);
+
     if (mapped_named_bo) 
     {
         ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
@@ -523,6 +579,8 @@ static void r700AlignDataToDword(GLcontext *ctx,
 
     radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, size, 32);
 
+    radeon_bo_map(attr->bo, 1);
+
     if (!input->BufferObj->Pointer) 
     {
         ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
@@ -536,12 +594,13 @@ static void r700AlignDataToDword(GLcontext *ctx,
 
         for (i = 0; i < count; ++i) 
         {
-            _mesa_memcpy(dst_ptr, src_ptr, input->StrideB);
+            memcpy(dst_ptr, src_ptr, input->StrideB);
             src_ptr += input->StrideB;
             dst_ptr += dst_stride;
         }
     }
 
+    radeon_bo_unmap(attr->bo);
     if (mapped_named_bo) 
     {
         ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
@@ -610,32 +669,33 @@ static void r700SetupStreams(GLcontext *ctx, const struct gl_client_array *input
 
                 radeonAllocDmaRegion(&context->radeon, &context->stream_desc[index].bo, 
                                      &context->stream_desc[index].bo_offset, size, 32);
+
+                radeon_bo_map(context->stream_desc[index].bo, 1);
                 assert(context->stream_desc[index].bo->ptr != NULL);
+
+
                 dst = (uint32_t *)ADD_POINTERS(context->stream_desc[index].bo->ptr, 
                                                context->stream_desc[index].bo_offset);
 
                 switch (context->stream_desc[index].dwords) 
                 {
                 case 1:                     
-                    radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count);                         
-                                       context->stream_desc[index].stride = 4; 
+                    radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count);
                     break;
                 case 2: 
                     radeonEmitVec8(dst, input[i]->Ptr, input[i]->StrideB, local_count); 
-                                       context->stream_desc[index].stride = 8; 
                     break;
                 case 3: 
                     radeonEmitVec12(dst, input[i]->Ptr, input[i]->StrideB, local_count); 
-                                       context->stream_desc[index].stride = 12; 
                     break;
                 case 4: 
                     radeonEmitVec16(dst, input[i]->Ptr, input[i]->StrideB, local_count); 
-                                       context->stream_desc[index].stride = 16; 
                     break;
                 default: 
                     assert(0); 
                     break;
                 }
+               radeon_bo_unmap(context->stream_desc[index].bo);
             }
         }
 
@@ -707,6 +767,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer
        radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo,
                             &context->ind_buf.bo_offset, size, 4);
 
+       radeon_bo_map(context->ind_buf.bo, 1);
        assert(context->ind_buf.bo->ptr != NULL);
        out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset);
 
@@ -720,6 +781,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer
             *out++ = in[i];
         }
 
+       radeon_bo_unmap(context->ind_buf.bo);
 #if MESA_BIG_ENDIAN
     }
     else
@@ -730,6 +792,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer
        radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo,
                             &context->ind_buf.bo_offset, size, 4);
 
+       radeon_bo_map(context->ind_buf.bo, 1);
        assert(context->ind_buf.bo->ptr != NULL);
        out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset);
 
@@ -742,6 +805,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer
         {
             *out++ = in[i];
         }
+       radeon_bo_unmap(context->ind_buf.bo);
 #endif
     }
 
@@ -765,11 +829,10 @@ static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer
 
 #if MESA_BIG_ENDIAN
     if (mesa_ind_buf->type == GL_UNSIGNED_INT)
-    {
 #else
     if (mesa_ind_buf->type != GL_UNSIGNED_BYTE)
-    {
 #endif
+    {
         const GLvoid *src_ptr;
         GLvoid *dst_ptr;
         GLboolean mapped_named_bo = GL_FALSE;
@@ -787,11 +850,13 @@ static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer
 
        radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo,
                             &context->ind_buf.bo_offset, size, 4);
+       radeon_bo_map(context->ind_buf.bo, 1);
        assert(context->ind_buf.bo->ptr != NULL);
        dst_ptr = ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset);
 
-        _mesa_memcpy(dst_ptr, src_ptr, size);
+        memcpy(dst_ptr, src_ptr, size);
 
+       radeon_bo_unmap(context->ind_buf.bo);
         context->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT);
         context->ind_buf.count = mesa_ind_buf->count;
 
@@ -806,6 +871,14 @@ static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer
     }
 }
 
+static GLboolean check_fallbacks(GLcontext *ctx)
+{
+       if (ctx->RenderMode != GL_RENDER)
+               return GL_TRUE;
+
+       return GL_FALSE;
+}
+
 static GLboolean r700TryDrawPrims(GLcontext *ctx,
                                  const struct gl_client_array *arrays[],
                                  const struct _mesa_prim *prim,
@@ -822,6 +895,9 @@ static GLboolean r700TryDrawPrims(GLcontext *ctx,
     if (ctx->NewState)
         _mesa_update_state( ctx );
 
+    if (check_fallbacks(ctx))
+           return GL_FALSE;
+
     _tnl_UpdateFixedFunctionProgram(ctx);
     r700SetVertexFormat(ctx, arrays, max_index + 1);
     /* shaders need to be updated before buffers are validated */
@@ -839,7 +915,7 @@ static GLboolean r700TryDrawPrims(GLcontext *ctx,
     r700SetScissor(context);
     r700SetupVertexProgram(ctx);
     r700SetupFragmentProgram(ctx);
-    r600UpdateTextureState(ctx);
+    r700UpdateShaderStates(ctx);
 
     GLuint emit_end = r700PredictRenderSize(ctx, prim, ib, nr_prims)
                     + context->radeon.cmdbuf.cs->cdw;
@@ -852,14 +928,21 @@ static GLboolean r700TryDrawPrims(GLcontext *ctx,
     radeon_debug_add_indent();
     for (i = 0; i < nr_prims; ++i)
     {
-           r700RunRenderPrimitive(ctx,
-                               prim[i].start,
-                               prim[i].start + prim[i].count,
-                               prim[i].mode);
+           if (context->ind_buf.bo)
+                   r700RunRenderPrimitive(ctx,
+                                          prim[i].start,
+                                          prim[i].start + prim[i].count,
+                                          prim[i].mode);
+           else
+                   r700RunRenderPrimitiveImmediate(ctx,
+                                                   prim[i].start,
+                                                   prim[i].start + prim[i].count,
+                                                   prim[i].mode);
     }
     radeon_debug_remove_indent();
 
     /* Flush render op cached for last several quads. */
+    /* XXX drm should handle this in fence submit */
     r700WaitForIdleClean(context);
 
     rrb = radeon_get_colorbuffer(&context->radeon);
@@ -910,8 +993,10 @@ static void r700DrawPrims(GLcontext *ctx,
        retval = r700TryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
 
        /* If failed run tnl pipeline - it should take care of fallbacks */
-       if (!retval)
+       if (!retval) {
+               _swsetup_Wakeup(ctx);
                _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+       }
 }
 
 void r700InitDraw(GLcontext *ctx)