From: Eric Anholt Date: Fri, 6 Jan 2012 18:20:09 +0000 (-0800) Subject: i965/vs: Try to emit more components of constant vectors at once. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=2c623c2be529edc974ca293f3c71929c639ec0c9;p=mesa.git i965/vs: Try to emit more components of constant vectors at once. We were naively emitting each component at a time, even if we were emitting the same value to multiple channels. Improves on a codegen regression from the old VS to the new VS on some unigine shaders (because we emit constant vecs/matrices as immediates instead of loading them as push constants, so we had over 4x the instructions for using them). shader-db results: Total instructions: 58594 -> 58540 11/870 programs affected (1.3%) 765 -> 711 instructions in affected programs (7.1% reduction) --- diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 20da487399f..5df2470dc1b 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1701,22 +1701,45 @@ vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir) if (ir->type->is_matrix()) { for (int i = 0; i < ir->type->matrix_columns; i++) { + float *vec = &ir->value.f[i * ir->type->vector_elements]; + for (int j = 0; j < ir->type->vector_elements; j++) { dst->writemask = 1 << j; dst->type = BRW_REGISTER_TYPE_F; - emit(MOV(*dst, - src_reg(ir->value.f[i * ir->type->vector_elements + j]))); + emit(MOV(*dst, src_reg(vec[j]))); } dst->reg_offset++; } return; } + int remaining_writemask = (1 << ir->type->vector_elements) - 1; + for (int i = 0; i < ir->type->vector_elements; i++) { + if (!(remaining_writemask & (1 << i))) + continue; + dst->writemask = 1 << i; dst->type = brw_type_for_base_type(ir->type); + /* Find other components that match the one we're about to + * write. Emits fewer instructions for things like vec4(0.5, + * 1.5, 1.5, 1.5). + */ + for (int j = i + 1; j < ir->type->vector_elements; j++) { + if (ir->type->base_type == GLSL_TYPE_BOOL) { + if (ir->value.b[i] == ir->value.b[j]) + dst->writemask |= (1 << j); + } else { + /* u, i, and f storage all line up, so no need for a + * switch case for comparing each type. + */ + if (ir->value.u[i] == ir->value.u[j]) + dst->writemask |= (1 << j); + } + } + switch (ir->type->base_type) { case GLSL_TYPE_FLOAT: emit(MOV(*dst, src_reg(ir->value.f[i]))); @@ -1734,6 +1757,8 @@ vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir) assert(!"Non-float/uint/int/bool constant"); break; } + + remaining_writemask &= ~dst->writemask; } dst->reg_offset++; }