Fix promotion of floats to doubles
authorAlbert Astals Cid <aacid@kde.org>
Wed, 26 Feb 2020 22:05:51 +0000 (23:05 +0100)
committerAlbert Astals Cid <tsdgeos@yahoo.es>
Sat, 18 Apr 2020 19:55:45 +0000 (19:55 +0000)
Use the f variants of the math functions if the input parameter is a
float, saves converting from float to double and running the double
variant of the math function for gaining no precision at all

Reviewed-by: Matt Turner <mattst88@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3969>

12 files changed:
src/amd/vulkan/radv_cmd_buffer.c
src/amd/vulkan/radv_pipeline.c
src/amd/vulkan/si_cmd_buffer.c
src/gallium/drivers/llvmpipe/lp_setup_line.c
src/gallium/drivers/r600/sb/sb_expr.cpp
src/gallium/drivers/softpipe/sp_tex_sample.c
src/gallium/state_trackers/xa/xa_renderer.c
src/intel/common/gen_l3_config.c
src/mesa/drivers/dri/nouveau/nv10_state_tnl.c
src/mesa/program/program.c
src/mesa/state_tracker/st_cb_bitmap.c
src/mesa/swrast/s_texfilter.c

index abc1cfbbd04f0055f44e9137f424c6b4d8c92ce4..6e5809ac492aba5a48cdb53622d23ce292fdaec8 100644 (file)
@@ -700,8 +700,8 @@ radv_convert_user_sample_locs(struct radv_sample_locations_state *state,
                float shifted_pos_x = user_locs[i].x - 0.5;
                float shifted_pos_y = user_locs[i].y - 0.5;
 
-               int32_t scaled_pos_x = floor(shifted_pos_x * 16);
-               int32_t scaled_pos_y = floor(shifted_pos_y * 16);
+               int32_t scaled_pos_x = floorf(shifted_pos_x * 16);
+               int32_t scaled_pos_y = floorf(shifted_pos_y * 16);
 
                sample_locs[i].x = CLAMP(scaled_pos_x, -8, 7);
                sample_locs[i].y = CLAMP(scaled_pos_y, -8, 7);
index b10ee86ac7c1d16f654c4f42fda73c0a84d0b8d9..f470432d20bf0b2e477048e977da614ee263ecdb 100644 (file)
@@ -988,7 +988,7 @@ static uint8_t radv_pipeline_get_ps_iter_samples(const VkGraphicsPipelineCreateI
        }
 
        if (vkms->sampleShadingEnable) {
-               ps_iter_samples = ceil(vkms->minSampleShading * num_samples);
+               ps_iter_samples = ceilf(vkms->minSampleShading * num_samples);
                ps_iter_samples = util_next_power_of_two(ps_iter_samples);
        }
        return ps_iter_samples;
index c0fd636de608a36dac97d7cf434bd48b64e6dc9a..43d288145d75b4f91a84008c3adde47fb56b4b87 100644 (file)
@@ -595,10 +595,10 @@ static VkRect2D si_scissor_from_viewport(const VkViewport *viewport)
 
        get_viewport_xform(viewport, scale, translate);
 
-       rect.offset.x = translate[0] - fabs(scale[0]);
-       rect.offset.y = translate[1] - fabs(scale[1]);
-       rect.extent.width = ceilf(translate[0] + fabs(scale[0])) - rect.offset.x;
-       rect.extent.height = ceilf(translate[1] + fabs(scale[1])) - rect.offset.y;
+       rect.offset.x = translate[0] - fabsf(scale[0]);
+       rect.offset.y = translate[1] - fabsf(scale[1]);
+       rect.extent.width = ceilf(translate[0] + fabsf(scale[0])) - rect.offset.x;
+       rect.extent.height = ceilf(translate[1] + fabsf(scale[1])) - rect.offset.y;
 
        return rect;
 }
index 3a7212326e304cc9e9fe639a045dd1d7ec3029b6..1357d026dfeaa9a0983e4d153857d31884c06d36 100644 (file)
@@ -361,10 +361,10 @@ try_setup_line( struct lp_setup_context *setup,
    if (fabsf(dx) >= fabsf(dy)) {
       float dydx = dy / dx;
 
-      x1diff = v1[0][0] - (float) floor(v1[0][0]) - 0.5;
-      y1diff = v1[0][1] - (float) floor(v1[0][1]) - 0.5;
-      x2diff = v2[0][0] - (float) floor(v2[0][0]) - 0.5;
-      y2diff = v2[0][1] - (float) floor(v2[0][1]) - 0.5;
+      x1diff = v1[0][0] - floorf(v1[0][0]) - 0.5f;
+      y1diff = v1[0][1] - floorf(v1[0][1]) - 0.5f;
+      x2diff = v2[0][0] - floorf(v2[0][0]) - 0.5f;
+      y2diff = v2[0][1] - floorf(v2[0][1]) - 0.5f;
 
       if (y2diff==-0.5 && dy<0){
          y2diff = 0.5;
@@ -459,10 +459,10 @@ try_setup_line( struct lp_setup_context *setup,
       const float dxdy = dx / dy;
 
       /* Y-MAJOR LINE */      
-      x1diff = v1[0][0] - (float) floor(v1[0][0]) - 0.5;
-      y1diff = v1[0][1] - (float) floor(v1[0][1]) - 0.5;
-      x2diff = v2[0][0] - (float) floor(v2[0][0]) - 0.5;
-      y2diff = v2[0][1] - (float) floor(v2[0][1]) - 0.5;
+      x1diff = v1[0][0] - floorf(v1[0][0]) - 0.5f;
+      y1diff = v1[0][1] - floorf(v1[0][1]) - 0.5f;
+      x2diff = v2[0][0] - floorf(v2[0][0]) - 0.5f;
+      y2diff = v2[0][1] - floorf(v2[0][1]) - 0.5f;
 
       if (x2diff==-0.5 && dx<0) {
          x2diff = 0.5;
index 05674ff24b88eaaae5235815050a475377c5c668..73287f126b06c5ac842cf7f7f3568a1de73499f4 100644 (file)
@@ -326,7 +326,7 @@ void expr_handler::apply_alu_src_mod(const bc_alu &bc, unsigned src,
        const bc_alu_src &s = bc.src[src];
 
        if (s.abs)
-               v = fabs(v.f);
+               v = fabsf(v.f);
        if (s.neg)
                v = -v.f;
 }
@@ -424,21 +424,21 @@ bool expr_handler::fold_alu_op1(alu_node& n) {
        apply_alu_src_mod(n.bc, 0, cv);
 
        switch (n.bc.op) {
-       case ALU_OP1_CEIL: dv = ceil(cv.f); break;
+       case ALU_OP1_CEIL: dv = ceilf(cv.f); break;
        case ALU_OP1_COS: dv = cos(cv.f * 2.0f * M_PI); break;
-       case ALU_OP1_EXP_IEEE: dv = exp2(cv.f); break;
-       case ALU_OP1_FLOOR: dv = floor(cv.f); break;
+       case ALU_OP1_EXP_IEEE: dv = exp2f(cv.f); break;
+       case ALU_OP1_FLOOR: dv = floorf(cv.f); break;
        case ALU_OP1_FLT_TO_INT: dv = (int)cv.f; break; // FIXME: round modes ????
-       case ALU_OP1_FLT_TO_INT_FLOOR: dv = (int32_t)floor(cv.f); break;
-       case ALU_OP1_FLT_TO_INT_RPI: dv = (int32_t)floor(cv.f + 0.5f); break;
-       case ALU_OP1_FLT_TO_INT_TRUNC: dv = (int32_t)trunc(cv.f); break;
+       case ALU_OP1_FLT_TO_INT_FLOOR: dv = (int32_t)floorf(cv.f); break;
+       case ALU_OP1_FLT_TO_INT_RPI: dv = (int32_t)floorf(cv.f + 0.5f); break;
+       case ALU_OP1_FLT_TO_INT_TRUNC: dv = (int32_t)truncf(cv.f); break;
        case ALU_OP1_FLT_TO_UINT: dv = (uint32_t)cv.f; break;
-       case ALU_OP1_FRACT: dv = cv.f - floor(cv.f); break;
+       case ALU_OP1_FRACT: dv = cv.f - floorf(cv.f); break;
        case ALU_OP1_INT_TO_FLT: dv = (float)cv.i; break;
        case ALU_OP1_LOG_CLAMPED:
        case ALU_OP1_LOG_IEEE:
                if (cv.f != 0.0f)
-                       dv = log2(cv.f);
+                       dv = log2f(cv.f);
                else
                        // don't fold to NAN, let the GPU handle it for now
                        // (prevents degenerate LIT tests from failing)
@@ -454,7 +454,7 @@ bool expr_handler::fold_alu_op1(alu_node& n) {
        case ALU_OP1_PRED_SET_RESTORE: dv = cv; break;
        case ALU_OP1_RECIPSQRT_CLAMPED:
        case ALU_OP1_RECIPSQRT_FF:
-       case ALU_OP1_RECIPSQRT_IEEE: dv = 1.0f / sqrt(cv.f); break;
+       case ALU_OP1_RECIPSQRT_IEEE: dv = 1.0f / sqrtf(cv.f); break;
        case ALU_OP1_RECIP_CLAMPED:
        case ALU_OP1_RECIP_FF:
        case ALU_OP1_RECIP_IEEE: dv = 1.0f / cv.f; break;
@@ -462,8 +462,8 @@ bool expr_handler::fold_alu_op1(alu_node& n) {
        case ALU_OP1_RECIP_UINT: dv.u = (1ull << 32) / cv.u; break;
 //     case ALU_OP1_RNDNE: dv = floor(cv.f + 0.5f); break;
        case ALU_OP1_SIN: dv = sin(cv.f * 2.0f * M_PI); break;
-       case ALU_OP1_SQRT_IEEE: dv = sqrt(cv.f); break;
-       case ALU_OP1_TRUNC: dv = trunc(cv.f); break;
+       case ALU_OP1_SQRT_IEEE: dv = sqrtf(cv.f); break;
+       case ALU_OP1_TRUNC: dv = truncf(cv.f); break;
 
        default:
                return false;
index d3f67c6426c5619db36980f68df918725651d62c..5a0ec40d9c8632e2ecc8f25e6a6075b902d2a9fa 100644 (file)
@@ -2320,7 +2320,7 @@ create_filter_table(void)
       for (i = 0; i < WEIGHT_LUT_SIZE; ++i) {
          const float alpha = 2;
          const float r2 = (float) i / (float) (WEIGHT_LUT_SIZE - 1);
-         const float weight = (float) exp(-alpha * r2);
+         const float weight = (float) expf(-alpha * r2);
          lut[i] = weight;
       }
       weightLut = lut;
index c1aeacd22ced538fcaf3cc96be7ffed864f10253..89548ad7019a6b9ef3a0cb9cd44f62d00f80570b 100644 (file)
@@ -34,7 +34,7 @@
 #include "util/u_sampler.h"
 #include "util/u_draw_quad.h"
 
-#define floatsEqual(x, y) (fabs(x - y) <= 0.00001f * MIN2(fabs(x), fabs(y)))
+#define floatsEqual(x, y) (fabsf(x - y) <= 0.00001f * MIN2(fabsf(x), fabsf(y)))
 #define floatIsZero(x) (floatsEqual((x) + 1, 1))
 
 #define NUM_COMPONENTS 4
index 46926f8bdf4952a435bddf9ad3fcddeff5c96d98..d54368556d37877349a906af09240a0d70577629 100644 (file)
@@ -245,7 +245,7 @@ gen_diff_l3_weights(struct gen_l3_weights w0, struct gen_l3_weights w1)
       float dw = 0;
 
       for (unsigned i = 0; i < GEN_NUM_L3P; i++)
-         dw += fabs(w0.w[i] - w1.w[i]);
+         dw += fabsf(w0.w[i] - w1.w[i]);
 
       return dw;
    }
index 9f80e413577f1746e351b6605952973bd0999bb4..247cad4a38b6832741bea795eebfefc1304cd1f2 100644 (file)
@@ -244,7 +244,7 @@ nv10_get_spot_coeff(struct gl_light *l, float k[7])
        float a0, b0, a1, a2, b2, a3;
 
        if (e > 0)
-               a0 = -1 - 5.36e-3 / sqrt(e);
+               a0 = -1 - 5.36e-3 / sqrtf(e);
        else
                a0 = -1;
        b0 = 1 / (1 + 0.273 * e);
index 6ab1bf50177b9919035740d622ac347cdc9fb381..754e3d480b81fec49fa7e53a6284fe4cd4fca9e8 100644 (file)
@@ -539,7 +539,7 @@ _mesa_get_min_invocations_per_fragment(struct gl_context *ctx,
                                             SYSTEM_BIT_SAMPLE_POS)))
          return MAX2(_mesa_geometric_samples(ctx->DrawBuffer), 1);
       else if (ctx->Multisample.SampleShading)
-         return MAX2(ceil(ctx->Multisample.MinSampleShadingValue *
+         return MAX2(ceilf(ctx->Multisample.MinSampleShadingValue *
                           _mesa_geometric_samples(ctx->DrawBuffer)), 1);
       else
          return 1;
index 44ad533417511a66e7fde6cba6716e9bbd09815d..475ab2fc8cd5f45ea94674029d5330d3a589a086 100644 (file)
@@ -479,7 +479,7 @@ accum_bitmap(struct gl_context *ctx,
       if (px < 0 || px + width > BITMAP_CACHE_WIDTH ||
           py < 0 || py + height > BITMAP_CACHE_HEIGHT ||
           !TEST_EQ_4V(ctx->Current.RasterColor, cache->color) ||
-          ((fabs(z - cache->zpos) > Z_EPSILON))) {
+          ((fabsf(z - cache->zpos) > Z_EPSILON))) {
          /* This bitmap would extend beyond cache bounds, or the bitmap
           * color is changing
           * so flush and continue.
index f73037791de1bd6dae6d0e5c0791c795b32d396a..cb91b81260e88d3f4b4237e31512778bfbf4cb4d 100644 (file)
@@ -1628,7 +1628,7 @@ create_filter_table(void)
       for (i = 0; i < WEIGHT_LUT_SIZE; ++i) {
          GLfloat alpha = 2;
          GLfloat r2 = (GLfloat) i / (GLfloat) (WEIGHT_LUT_SIZE - 1);
-         GLfloat weight = (GLfloat) exp(-alpha * r2);
+         GLfloat weight = expf(-alpha * r2);
          weightLut[i] = weight;
       }
    }