draw: cleanup and fix instance id computation
authorZack Rusin <zackr@vmware.com>
Tue, 23 Jul 2013 05:16:55 +0000 (01:16 -0400)
committerZack Rusin <zackr@vmware.com>
Thu, 25 Jul 2013 06:02:36 +0000 (02:02 -0400)
The instance id system value always starts at 0, even if the
specified start instance is larger than 0. Instead of implicitly
setting instance id to instance id plus start instance and then
having to subtract instance id when computing the buffer offsets
lets just set instance id to the proper instance id. This fixes
instance id computation and cleansup buffer offset computation.

Signed-off-by: Zack Rusin <zackr@vmware.com>
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
src/gallium/auxiliary/draw/draw_llvm.c
src/gallium/auxiliary/draw/draw_pt.c
src/gallium/auxiliary/translate/translate_generic.c
src/gallium/auxiliary/translate/translate_sse.c

index 79e7a9b6d12cff81125b98c48dfba7234ac98ca7..a3174b404c1d163ba255b4243282bd9cbb3c1e64 100644 (file)
@@ -707,15 +707,14 @@ generate_fetch(struct gallivm_state *gallivm,
    if (velem->instance_divisor) {
       /* Index is equal to the start instance plus the number of current 
        * instance divided by the divisor. In this case we compute it as:
-       * index = start_instance + ((instance_id - start_instance) / divisor)
+       * index = start_instance + (instance_id  / divisor)
        */
       LLVMValueRef current_instance;
       index = lp_build_const_int32(gallivm, draw->start_instance);
-      current_instance = LLVMBuildSub(builder, instance_id, index, "");
-      current_instance = LLVMBuildUDiv(builder, current_instance,
+      current_instance = LLVMBuildUDiv(builder, instance_id,
                                        lp_build_const_int32(gallivm, velem->instance_divisor),
                                        "instance_divisor");
-      index = LLVMBuildAdd(builder, index, current_instance, "instance");
+      index = lp_build_uadd_overflow(gallivm, index, current_instance, &ofbit);
    }
 
    stride = lp_build_umul_overflow(gallivm, vb_stride, index, &ofbit);
index ccde371ffcd7ee4ce7a12d04c521bce0788aaae8..fcc24057db074a8a1b35e6e5f6d6ce0efe6be3d7 100644 (file)
@@ -542,11 +542,12 @@ draw_vbo(struct draw_context *draw,
     */
 
    for (instance = 0; instance < info->instance_count; instance++) {
-      draw->instance_id = instance + info->start_instance;
+      unsigned instance_idx = instance + info->start_instance;
       draw->start_instance = info->start_instance;
+      draw->instance_id = instance;
       /* check for overflow */
-      if (draw->instance_id < instance ||
-          draw->instance_id < info->start_instance) {
+      if (instance_idx < instance ||
+          instance_idx < draw->start_instance) {
          /* if we overflown just set the instance id to the max */
          draw->instance_id = 0xffffffff;
       }
index 96e35b0eb41f1431217b4102423c81c852725c24..fdab0f34a1a944225cb36b1bb3c7fbeded2e54cf 100644 (file)
@@ -625,8 +625,7 @@ static ALWAYS_INLINE void PIPE_CDECL generic_run_one( struct translate_generic *
 
          if (tg->attrib[attr].instance_divisor) {
             index = start_instance;
-            index += (instance_id - start_instance) /
-               tg->attrib[attr].instance_divisor;
+            index += (instance_id  / tg->attrib[attr].instance_divisor);
             /* XXX we need to clamp the index here too, but to a
              * per-array max value, not the draw->pt.max_index value
              * that's being given to us via translate->set_buffer().
index a4f7b243c135c03cddd23e9d0ab6c3b9fd074a11..726a9b1e34b683ea48772d4cf8747d5e8bef0a7b 100644 (file)
@@ -1094,10 +1094,6 @@ static boolean init_inputs( struct translate_sse *p,
                struct x86_reg tmp_EDX = p->tmp2_EDX;
                struct x86_reg tmp_ECX = p->src_ECX;
 
-               /* instance_num = instance_id - start_instance */
-               x86_mov(p->func, tmp_EDX, start_instance);
-               x86_sub(p->func, tmp_EAX, tmp_EDX);
-
                /* TODO: Add x86_shr() to rtasm and use it whenever
                 *       instance divisor is power of two.
                 */