From c1da15709a0c0c2775bd9e534f67c60f7dc95ce8 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Sun, 12 Jul 2015 00:13:45 -0700 Subject: [PATCH] i965: Use float calculations when double is unnecessary. Literals without an f/F suffix are of type double, and implicit conversion rules specify that the float in (float op double) be converted to a double before the operation is performed. I believe float execution was intended (in nearly all cases) or is sufficient (in the case of gen7_urb.c). Removes a lot of float <-> double conversion instructions and replaces many double instructions with float instructions which are cheaper. text data bss dec hex filename 4928659 195160 26192 5150011 4e953b i965_dri.so before 4928315 195152 26192 5149659 4e93db i965_dri.so after Reviewed-by: Iago Toral Quiroga --- src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 22 +++++++++---------- src/mesa/drivers/dri/i965/brw_fs.cpp | 4 ++-- .../drivers/dri/i965/brw_meta_fast_clear.c | 4 ++-- .../drivers/dri/i965/brw_meta_stencil_blit.c | 4 ++-- src/mesa/drivers/dri/i965/brw_misc_state.c | 4 ++-- src/mesa/drivers/dri/i965/brw_sampler_state.c | 4 ++-- src/mesa/drivers/dri/i965/brw_sf_state.c | 9 ++++---- src/mesa/drivers/dri/i965/brw_state_cache.c | 2 +- src/mesa/drivers/dri/i965/brw_util.h | 4 ++-- .../drivers/dri/i965/gen6_multisample_state.c | 4 ++-- src/mesa/drivers/dri/i965/gen6_sf_state.c | 2 +- src/mesa/drivers/dri/i965/gen7_sf_state.c | 2 +- src/mesa/drivers/dri/i965/gen7_urb.c | 2 +- src/mesa/drivers/dri/i965/gen8_sf_state.c | 2 +- 14 files changed, 35 insertions(+), 34 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp index 1561b593969..205c905b447 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp @@ -1285,8 +1285,8 @@ brw_blorp_blit_program::translate_dst_to_src() /* Round the float coordinates down to nearest integer */ emit_rndd(Xp_f, X_f); emit_rndd(Yp_f, Y_f); - emit_mul(X_f, Xp_f, brw_imm_f(1 / key->x_scale)); - emit_mul(Y_f, Yp_f, brw_imm_f(1 / key->y_scale)); + emit_mul(X_f, Xp_f, brw_imm_f(1.0f / key->x_scale)); + emit_mul(Y_f, Yp_f, brw_imm_f(1.0f / key->y_scale)); SWAP_XY_AND_XPYP(); } else if (!key->bilinear_filter) { /* Round the float coordinates down to nearest integer by moving to @@ -1442,7 +1442,7 @@ brw_blorp_blit_program::manual_blend_average(unsigned num_samples) for (int j = 0; j < 4; ++j) { emit_mul(offset(texture_data[0], 2*j), offset(vec8(texture_data[0]), 2*j), - brw_imm_f(1.0/num_samples)); + brw_imm_f(1.0f / num_samples)); } } @@ -1475,9 +1475,9 @@ brw_blorp_blit_program::manual_blend_bilinear(unsigned num_samples) /* Compute pixel coordinates */ emit_add(vec16(x_sample_coords), Xp_f, - brw_imm_f((float)(i & 0x1) * (1.0 / key->x_scale))); + brw_imm_f((float)(i & 0x1) * (1.0f / key->x_scale))); emit_add(vec16(y_sample_coords), Yp_f, - brw_imm_f((float)((i >> 1) & 0x1) * (1.0 / key->y_scale))); + brw_imm_f((float)((i >> 1) & 0x1) * (1.0f / key->y_scale))); emit_mov(vec16(X), x_sample_coords); emit_mov(vec16(Y), y_sample_coords); @@ -1789,7 +1789,7 @@ brw_blorp_coord_transform_params::setup(GLfloat src0, GLfloat src1, * so 0.5 provides the necessary correction. */ multiplier = scale; - offset = src0 + (-dst0 + 0.5) * scale; + offset = src0 + (-dst0 + 0.5f) * scale; } else { /* When mirroring X we need: * src_x - src_x0 = dst_x1 - dst_x - 0.5 @@ -1797,7 +1797,7 @@ brw_blorp_coord_transform_params::setup(GLfloat src0, GLfloat src1, * src_x = src_x0 + (dst_x1 -dst_x - 0.5) * scale */ multiplier = -scale; - offset = src0 + (dst1 - 0.5) * scale; + offset = src0 + (dst1 - 0.5f) * scale; } } @@ -1952,8 +1952,8 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct brw_context *brw, /* Scaling factors used for bilinear filtering in multisample scaled * blits. */ - wm_prog_key.x_scale = 2.0; - wm_prog_key.y_scale = src_mt->num_samples / 2.0; + wm_prog_key.x_scale = 2.0f; + wm_prog_key.y_scale = src_mt->num_samples / 2.0f; if (filter == GL_LINEAR && src.num_samples <= 1 && dst.num_samples <= 1) wm_prog_key.bilinear_filter = true; @@ -2000,9 +2000,9 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct brw_context *brw, x1 = wm_push_consts.dst_x1 = roundf(dst_x1); y1 = wm_push_consts.dst_y1 = roundf(dst_y1); wm_push_consts.rect_grid_x1 = (minify(src_mt->logical_width0, src_level) * - wm_prog_key.x_scale - 1.0); + wm_prog_key.x_scale - 1.0f); wm_push_consts.rect_grid_y1 = (minify(src_mt->logical_height0, src_level) * - wm_prog_key.y_scale - 1.0); + wm_prog_key.y_scale - 1.0f); wm_push_consts.x_transform.setup(src_x0, src_x1, dst_x0, dst_x1, mirror_x); wm_push_consts.y_transform.setup(src_y0, src_y1, dst_y0, dst_y1, mirror_y); diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 565edeb401e..15fe3648af8 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -975,11 +975,11 @@ fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer, bld.MOV(wpos, this->pixel_y); } else { fs_reg pixel_y = this->pixel_y; - float offset = (pixel_center_integer ? 0.0 : 0.5); + float offset = (pixel_center_integer ? 0.0f : 0.5f); if (flip) { pixel_y.negate = true; - offset += key->drawable_height - 1.0; + offset += key->drawable_height - 1.0f; } bld.ADD(wpos, pixel_y, fs_reg(offset)); diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c index c5e556ee9eb..e7e8df5bded 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c +++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c @@ -348,7 +348,7 @@ is_color_fast_clear_compatible(struct brw_context *brw, } for (int i = 0; i < 4; i++) { - if (color->f[i] != 0.0 && color->f[i] != 1.0 && + if (color->f[i] != 0.0f && color->f[i] != 1.0f && _mesa_format_has_color_component(format, i)) { return false; } @@ -366,7 +366,7 @@ compute_fast_clear_color_bits(const union gl_color_union *color) uint32_t bits = 0; for (int i = 0; i < 4; i++) { /* Testing for non-0 works for integer and float colors */ - if (color->f[i] != 0.0) + if (color->f[i] != 0.0f) bits |= 1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i)); } return bits; diff --git a/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c b/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c index d4abfe63de7..aa6df16eb04 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c +++ b/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c @@ -239,10 +239,10 @@ setup_coord_coeff(GLuint prog, GLuint multiplier, GLuint offset, if (mirror) { _mesa_Uniform1f(multiplier, -scale); - _mesa_Uniform1f(offset, src_0 + (dst_1 - 0.5) * scale); + _mesa_Uniform1f(offset, src_0 + (dst_1 - 0.5f) * scale); } else { _mesa_Uniform1f(multiplier, scale); - _mesa_Uniform1f(offset, src_0 + (-dst_0 + 0.5) * scale); + _mesa_Uniform1f(offset, src_0 + (-dst_0 + 0.5f) * scale); } } diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 1bbb16cf697..16b0ed28d0d 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -834,13 +834,13 @@ static void upload_line_stipple(struct brw_context *brw) if (brw->gen >= 7) { /* in U1.16 */ - tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor; + tmp = 1.0f / ctx->Line.StippleFactor; tmpi = tmp * (1<<16); OUT_BATCH(tmpi << 15 | ctx->Line.StippleFactor); } else { /* in U1.13 */ - tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor; + tmp = 1.0f / ctx->Line.StippleFactor; tmpi = tmp * (1<<13); OUT_BATCH(tmpi << 16 | ctx->Line.StippleFactor); } diff --git a/src/mesa/drivers/dri/i965/brw_sampler_state.c b/src/mesa/drivers/dri/i965/brw_sampler_state.c index 22ccbfe8461..2021bb3b460 100644 --- a/src/mesa/drivers/dri/i965/brw_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_sampler_state.c @@ -425,11 +425,11 @@ brw_update_sampler_state(struct brw_context *brw, /* Enable anisotropic filtering if desired. */ unsigned max_anisotropy = BRW_ANISORATIO_2; - if (sampler->MaxAnisotropy > 1.0) { + if (sampler->MaxAnisotropy > 1.0f) { min_filter = BRW_MAPFILTER_ANISOTROPIC; mag_filter = BRW_MAPFILTER_ANISOTROPIC; - if (sampler->MaxAnisotropy > 2.0) { + if (sampler->MaxAnisotropy > 2.0f) { max_anisotropy = MIN2((sampler->MaxAnisotropy - 2) / 2, BRW_ANISORATIO_16); } diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index 3be6e4ab8e2..b126f82ebbf 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -220,7 +220,7 @@ static void upload_sf_unit( struct brw_context *brw ) /* _NEW_LINE */ sf->sf6.line_width = - CLAMP(ctx->Line.Width, 1.0, ctx->Const.MaxLineWidth) * (1<<1); + CLAMP(ctx->Line.Width, 1.0f, ctx->Const.MaxLineWidth) * (1<<1); sf->sf6.line_endcap_aa_region_width = 1; if (ctx->Line.SmoothFlag) @@ -259,9 +259,10 @@ static void upload_sf_unit( struct brw_context *brw ) /* _NEW_POINT */ sf->sf7.sprite_point = ctx->Point.PointSprite; - sf->sf7.point_size = CLAMP(rint(CLAMP(ctx->Point.Size, - ctx->Point.MinSize, - ctx->Point.MaxSize)), 1, 255) * (1<<3); + sf->sf7.point_size = CLAMP(rintf(CLAMP(ctx->Point.Size, + ctx->Point.MinSize, + ctx->Point.MaxSize)), 1.0f, 255.0f) * + (1<<3); /* _NEW_PROGRAM | _NEW_POINT */ sf->sf7.use_point_size_state = !(ctx->VertexProgram.PointSizeEnabled || ctx->Point._Attenuated); diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index 157b33d4f4c..693441c6f49 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -323,7 +323,7 @@ brw_upload_cache(struct brw_cache *cache, item->key = tmp; - if (cache->n_items > cache->size * 1.5) + if (cache->n_items > cache->size * 1.5f) rehash(cache); hash %= cache->size; diff --git a/src/mesa/drivers/dri/i965/brw_util.h b/src/mesa/drivers/dri/i965/brw_util.h index 04e4e944118..68f4318d371 100644 --- a/src/mesa/drivers/dri/i965/brw_util.h +++ b/src/mesa/drivers/dri/i965/brw_util.h @@ -53,14 +53,14 @@ brw_get_line_width(struct brw_context *brw) float line_width = CLAMP(!brw->ctx.Multisample._Enabled && !brw->ctx.Line.SmoothFlag ? roundf(brw->ctx.Line.Width) : brw->ctx.Line.Width, - 0.0, brw->ctx.Const.MaxLineWidth); + 0.0f, brw->ctx.Const.MaxLineWidth); uint32_t line_width_u3_7 = U_FIXED(line_width, 7); /* Line width of 0 is not allowed when MSAA enabled */ if (brw->ctx.Multisample._Enabled) { if (line_width_u3_7 == 0) line_width_u3_7 = 1; - } else if (brw->ctx.Line.SmoothFlag && line_width < 1.5) { + } else if (brw->ctx.Line.SmoothFlag && line_width < 1.5f) { /* For 1 pixel line thickness or less, the general * anti-aliasing algorithm gives up, and a garbage line is * generated. Setting a Line Width of 0.0 specifies the diff --git a/src/mesa/drivers/dri/i965/gen6_multisample_state.c b/src/mesa/drivers/dri/i965/gen6_multisample_state.c index 36734f598fe..cf1421e5e9f 100644 --- a/src/mesa/drivers/dri/i965/gen6_multisample_state.c +++ b/src/mesa/drivers/dri/i965/gen6_multisample_state.c @@ -148,7 +148,7 @@ unsigned gen6_determine_sample_mask(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; - float coverage = 1.0; + float coverage = 1.0f; float coverage_invert = false; unsigned sample_mask = ~0u; @@ -166,7 +166,7 @@ gen6_determine_sample_mask(struct brw_context *brw) } if (num_samples > 1) { - int coverage_int = (int) (num_samples * coverage + 0.5); + int coverage_int = (int) (num_samples * coverage + 0.5f); uint32_t coverage_bits = (1 << coverage_int) - 1; if (coverage_invert) coverage_bits ^= (1 << num_samples) - 1; diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index b00517ed81e..4068f2844a2 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -383,7 +383,7 @@ upload_sf_state(struct brw_context *brw) point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize); /* Clamp to the hardware limits and convert to fixed point */ - dw4 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3); + dw4 |= U_FIXED(CLAMP(point_size, 0.125f, 255.875f), 3); /* * Window coordinates in an FBO are inverted, which means point diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c index 4fa46a8eb97..698b3d491bc 100644 --- a/src/mesa/drivers/dri/i965/gen7_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c @@ -220,7 +220,7 @@ upload_sf_state(struct brw_context *brw) point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize); /* Clamp to the hardware limits and convert to fixed point */ - dw3 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3); + dw3 |= U_FIXED(CLAMP(point_size, 0.125f, 255.875f), 3); /* _NEW_LIGHT */ if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) { diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c b/src/mesa/drivers/dri/i965/gen7_urb.c index d371c193577..69162171c4e 100644 --- a/src/mesa/drivers/dri/i965/gen7_urb.c +++ b/src/mesa/drivers/dri/i965/gen7_urb.c @@ -228,7 +228,7 @@ gen7_upload_urb(struct brw_context *brw) remaining_space = total_wants; if (remaining_space > 0) { unsigned vs_additional = (unsigned) - round(vs_wants * (((double) remaining_space) / total_wants)); + roundf(vs_wants * (((float) remaining_space) / total_wants)); vs_chunks += vs_additional; remaining_space -= vs_additional; gs_chunks += remaining_space; diff --git a/src/mesa/drivers/dri/i965/gen8_sf_state.c b/src/mesa/drivers/dri/i965/gen8_sf_state.c index c2b585d0001..6b655ee493e 100644 --- a/src/mesa/drivers/dri/i965/gen8_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen8_sf_state.c @@ -169,7 +169,7 @@ upload_sf(struct brw_context *brw) point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize); /* Clamp to the hardware limits and convert to fixed point */ - dw3 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3); + dw3 |= U_FIXED(CLAMP(point_size, 0.125f, 255.875f), 3); /* _NEW_PROGRAM | _NEW_POINT */ if (!(ctx->VertexProgram.PointSizeEnabled || ctx->Point._Attenuated)) -- 2.30.2