With the switch-statement, GCC 4.8.3 produces a small pile of code with
a branch.
00000000 <brw_get_index_type>:
000000: 8b 54 24 04 mov 0x4(%esp),%edx
000004: b8 01 00 00 00 mov $0x1,%eax
000009: 81 fa 03 14 00 00 cmp $0x1403,%edx
00000f: 74 0d je 00001e <brw_get_index_type+0x1e>
000011: 31 c0 xor %eax,%eax
000013: 81 fa 05 14 00 00 cmp $0x1405,%edx
000019: 0f 94 c0 sete %al
00001c: 01 c0 add %eax,%eax
00001e: c3 ret
However, this could be two instructions.
00000000 <brw_get_index_type>:
000000: 2d 01 14 00 00 sub $0x1401,%eax
000005: d1 e8 shr %eax
000007: 90 nop
000008: 90 nop
000009: 90 nop
00000a: 90 nop
00000b: c3 ret
The function was also moved to the header so that it could be inlined at
the two call sites. Without this, 32-bit also needs to pull the
parameter from the stack. This means there is a push, a call, a move,
and a ret added to a two instruction function. The above code shows the
function with __attribute__((regparm=1)), but even this adds several
extra instructions. There is also an extra instruction on 64-bit to
move the parameter to %eax for the subtract.
On Bay Trail-D using Fedora 20 compile flags (-m64 -O2 -mtune=generic
for 64-bit and -m32 -march=i686 -mtune=atom for 32-bit), affects
Gl32Batch7:
32-bit: Difference at 95.0% confidence 0.818589% +/- 0.234661% (n=40)
64-bit: Difference at 95.0% confidence 0.54554% +/- 0.354092% (n=40)
Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>
/* brw_draw_upload.c */
unsigned brw_get_vertex_surface_type(struct brw_context *brw,
const struct gl_client_array *glarray);
-unsigned brw_get_index_type(GLenum type);
+
+static inline unsigned
+brw_get_index_type(GLenum type)
+{
+ assert((type == GL_UNSIGNED_BYTE)
+ || (type == GL_UNSIGNED_SHORT)
+ || (type == GL_UNSIGNED_INT));
+
+ /* The possible values for type are GL_UNSIGNED_BYTE (0x1401),
+ * GL_UNSIGNED_SHORT (0x1403), and GL_UNSIGNED_INT (0x1405) which we want
+ * to map to scale factors of 0, 1, and 2, respectively. These scale
+ * factors are then left-shfited by 8 to be in the correct position in the
+ * CMD_INDEX_BUFFER packet.
+ *
+ * Subtracting 0x1401 gives 0, 2, and 4. Shifting left by 7 afterwards
+ * gives 0x00000000, 0x00000100, and 0x00000200. These just happen to be
+ * the values the need to be written in the CMD_INDEX_BUFFER packet.
+ */
+ return (type - 0x1401) << 7;
+}
+
void brw_prepare_vertices(struct brw_context *brw);
/* brw_wm_surface_state.c */
}
}
-unsigned
-brw_get_index_type(GLenum type)
-{
- switch (type) {
- case GL_UNSIGNED_BYTE: return BRW_INDEX_BYTE;
- case GL_UNSIGNED_SHORT: return BRW_INDEX_WORD;
- case GL_UNSIGNED_INT: return BRW_INDEX_DWORD;
- default: unreachable("not reached");
- }
-}
-
static void
copy_array_to_vbo_array(struct brw_context *brw,
struct brw_vertex_element *element,
BEGIN_BATCH(3);
OUT_BATCH(CMD_INDEX_BUFFER << 16 |
cut_index_setting |
- brw_get_index_type(index_buffer->type) << 8 |
+ brw_get_index_type(index_buffer->type) |
1);
OUT_RELOC(brw->ib.bo,
I915_GEM_DOMAIN_VERTEX, 0,
BEGIN_BATCH(5);
OUT_BATCH(CMD_INDEX_BUFFER << 16 | (5 - 2));
- OUT_BATCH(brw_get_index_type(index_buffer->type) << 8 | mocs_wb);
+ OUT_BATCH(brw_get_index_type(index_buffer->type) | mocs_wb);
OUT_RELOC64(brw->ib.bo, I915_GEM_DOMAIN_VERTEX, 0, 0);
OUT_BATCH(brw->ib.bo->size);
ADVANCE_BATCH();