i965/vs: Use samplers for UBOs in the VS like we do for non-UBO pulls.
authorEric Anholt <eric@anholt.net>
Sat, 15 Feb 2014 00:06:31 +0000 (16:06 -0800)
committerEric Anholt <eric@anholt.net>
Thu, 20 Feb 2014 18:15:13 +0000 (10:15 -0800)
Improves performance of a dolphin emulator trace I had laying around by
3.60131% +/- 0.995887% (n=128).

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp

index ba299be72de877172a1d48cf35c05dd58839bb14..601b364b29521d06758e76f57028044e9e410143 100644 (file)
@@ -1573,14 +1573,27 @@ vec4_visitor::visit(ir_expression *ir)
          emit(SHR(dst_reg(offset), op[1], src_reg(4)));
       }
 
-      vec4_instruction *pull =
+      if (brw->gen >= 7) {
+         dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
+         grf_offset.type = offset.type;
+
+         emit(MOV(grf_offset, offset));
+
          emit(new(mem_ctx) vec4_instruction(this,
-                                            VS_OPCODE_PULL_CONSTANT_LOAD,
+                                            VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
                                             dst_reg(packed_consts),
                                             surf_index,
-                                            offset));
-      pull->base_mrf = 14;
-      pull->mlen = 1;
+                                            src_reg(grf_offset)));
+      } else {
+         vec4_instruction *pull =
+            emit(new(mem_ctx) vec4_instruction(this,
+                                               VS_OPCODE_PULL_CONSTANT_LOAD,
+                                               dst_reg(packed_consts),
+                                               surf_index,
+                                               offset));
+         pull->base_mrf = 14;
+         pull->mlen = 1;
+      }
 
       packed_consts.swizzle = swizzle_for_size(ir->type->vector_elements);
       packed_consts.swizzle += BRW_SWIZZLE4(const_offset % 16 / 4,