From 762a6333f21fd8606f69db6060027c4522d46678 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 7 Mar 2017 19:54:37 -0800 Subject: [PATCH] nir: Rework conversion opcodes The NIR story on conversion opcodes is a mess. We've had way too many of them, naming is inconsistent, and which ones have explicit sizes was sort-of random. This commit re-organizes things and makes them all consistent: - All non-bool conversion opcodes now have the explicit size in the destination and are named 2. - Integer <-> integer conversion opcodes now only come in i2i and u2u forms (i2u and u2i have been removed) since the only difference between the different integer conversions is whether or not they sign-extend when up-converting. - Boolean conversion opcodes all have the explicit size on the bool and are named 2. Making things consistent also allows nir_type_conversion_op to be moved to nir_opcodes.c and auto-generated using mako. This will make adding int8, int16, and float16 versions much easier when the time comes. Reviewed-by: Eric Anholt --- src/amd/common/ac_nir_to_llvm.c | 24 ++-- src/amd/vulkan/radv_meta_blit2d.c | 6 +- src/compiler/glsl/glsl_to_nir.cpp | 8 +- src/compiler/nir/nir.c | 122 ------------------ src/compiler/nir/nir_builder.h | 4 + src/compiler/nir/nir_lower_double_ops.c | 4 +- src/compiler/nir/nir_lower_idiv.c | 16 +-- src/compiler/nir/nir_lower_tex.c | 4 +- src/compiler/nir/nir_opcodes.py | 54 +++----- src/compiler/nir/nir_opcodes_c.py | 66 ++++++++++ src/compiler/nir/nir_opt_algebraic.py | 40 +++--- src/gallium/auxiliary/nir/tgsi_to_nir.c | 10 +- .../drivers/freedreno/ir3/ir3_compiler_nir.c | 8 +- src/gallium/drivers/vc4/vc4_nir_lower_blend.c | 2 +- src/gallium/drivers/vc4/vc4_nir_lower_io.c | 16 +-- src/gallium/drivers/vc4/vc4_program.c | 8 +- src/intel/blorp/blorp_blit.c | 18 +-- src/intel/compiler/brw_fs_nir.cpp | 35 ++--- .../compiler/brw_nir_attribute_workarounds.c | 10 +- src/intel/compiler/brw_vec4_nir.cpp | 67 +++++----- .../vulkan/anv_nir_lower_input_attachments.c | 2 +- src/mesa/program/prog_to_nir.c | 2 +- 22 files changed, 218 insertions(+), 308 deletions(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 0aba6797e7c..58f512ea997 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -1449,41 +1449,37 @@ static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr) src[i] = to_integer(ctx, src[i]); result = ac_build_gather_values(&ctx->ac, src, num_components); break; - case nir_op_d2i: - case nir_op_f2i: + case nir_op_f2i32: + case nir_op_f2i64: src[0] = to_float(ctx, src[0]); result = LLVMBuildFPToSI(ctx->builder, src[0], def_type, ""); break; - case nir_op_d2u: - case nir_op_f2u: + case nir_op_f2u32: + case nir_op_f2u64: src[0] = to_float(ctx, src[0]); result = LLVMBuildFPToUI(ctx->builder, src[0], def_type, ""); break; - case nir_op_i2d: - case nir_op_i2f: + case nir_op_i2f32: + case nir_op_i2f64: result = LLVMBuildSIToFP(ctx->builder, src[0], to_float_type(ctx, def_type), ""); break; - case nir_op_u2d: - case nir_op_u2f: + case nir_op_u2f32: + case nir_op_u2f64: result = LLVMBuildUIToFP(ctx->builder, src[0], to_float_type(ctx, def_type), ""); break; - case nir_op_f2d: + case nir_op_f2f64: result = LLVMBuildFPExt(ctx->builder, src[0], to_float_type(ctx, def_type), ""); break; - case nir_op_d2f: + case nir_op_f2f32: result = LLVMBuildFPTrunc(ctx->builder, src[0], to_float_type(ctx, def_type), ""); break; case nir_op_u2u32: case nir_op_u2u64: - case nir_op_u2i32: - case nir_op_u2i64: if (get_elem_bits(ctx, LLVMTypeOf(src[0])) < get_elem_bits(ctx, def_type)) result = LLVMBuildZExt(ctx->builder, src[0], def_type, ""); else result = LLVMBuildTrunc(ctx->builder, src[0], def_type, ""); break; - case nir_op_i2u32: - case nir_op_i2u64: case nir_op_i2i32: case nir_op_i2i64: if (get_elem_bits(ctx, LLVMTypeOf(src[0])) < get_elem_bits(ctx, def_type)) diff --git a/src/amd/vulkan/radv_meta_blit2d.c b/src/amd/vulkan/radv_meta_blit2d.c index 225b4b2d412..d0cf5885bdb 100644 --- a/src/amd/vulkan/radv_meta_blit2d.c +++ b/src/amd/vulkan/radv_meta_blit2d.c @@ -586,7 +586,7 @@ build_nir_copy_fragment_shader(struct radv_device *device, vec4, "f_color"); color_out->data.location = FRAG_RESULT_DATA0; - nir_ssa_def *pos_int = nir_f2i(&b, nir_load_var(&b, tex_pos_in)); + nir_ssa_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in)); unsigned swiz[4] = { 0, 1 }; nir_ssa_def *tex_pos = nir_swizzle(&b, pos_int, swiz, 2, false); @@ -615,7 +615,7 @@ build_nir_copy_fragment_shader_depth(struct radv_device *device, vec4, "f_color"); color_out->data.location = FRAG_RESULT_DEPTH; - nir_ssa_def *pos_int = nir_f2i(&b, nir_load_var(&b, tex_pos_in)); + nir_ssa_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in)); unsigned swiz[4] = { 0, 1 }; nir_ssa_def *tex_pos = nir_swizzle(&b, pos_int, swiz, 2, false); @@ -644,7 +644,7 @@ build_nir_copy_fragment_shader_stencil(struct radv_device *device, vec4, "f_color"); color_out->data.location = FRAG_RESULT_STENCIL; - nir_ssa_def *pos_int = nir_f2i(&b, nir_load_var(&b, tex_pos_in)); + nir_ssa_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in)); unsigned swiz[4] = { 0, 1 }; nir_ssa_def *tex_pos = nir_swizzle(&b, pos_int, swiz, 2, false); diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index e67688c50d0..f0557f985bf 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -1456,10 +1456,10 @@ nir_visitor::visit(ir_expression *ir) case ir_unop_exp2: result = nir_fexp2(&b, srcs[0]); break; case ir_unop_log2: result = nir_flog2(&b, srcs[0]); break; case ir_unop_i2f: - result = supports_ints ? nir_i2f(&b, srcs[0]) : nir_fmov(&b, srcs[0]); + result = supports_ints ? nir_i2f32(&b, srcs[0]) : nir_fmov(&b, srcs[0]); break; case ir_unop_u2f: - result = supports_ints ? nir_u2f(&b, srcs[0]) : nir_fmov(&b, srcs[0]); + result = supports_ints ? nir_u2f32(&b, srcs[0]) : nir_fmov(&b, srcs[0]); break; case ir_unop_b2f: result = supports_ints ? nir_b2f(&b, srcs[0]) : nir_fmov(&b, srcs[0]); @@ -1502,6 +1502,10 @@ nir_visitor::visit(ir_expression *ir) nir_alu_type dst_type = nir_get_nir_type_for_glsl_base_type(out_type); result = nir_build_alu(&b, nir_type_conversion_op(src_type, dst_type), srcs[0], NULL, NULL, NULL); + /* b2i and b2f don't have fixed bit-size versions so the builder will + * just assume 32 and we have to fix it up here. + */ + result->bit_size = nir_alu_type_get_type_size(dst_type); break; } diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c index 37fd9cb5c56..937b6300624 100644 --- a/src/compiler/nir/nir.c +++ b/src/compiler/nir/nir.c @@ -1958,125 +1958,3 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin) unreachable("intrinsic doesn't produce a system value"); } } - -nir_op -nir_type_conversion_op(nir_alu_type src, nir_alu_type dst) -{ - nir_alu_type src_base_type = (nir_alu_type) nir_alu_type_get_base_type(src); - nir_alu_type dst_base_type = (nir_alu_type) nir_alu_type_get_base_type(dst); - unsigned src_bitsize = nir_alu_type_get_type_size(src); - unsigned dst_bitsize = nir_alu_type_get_type_size(dst); - - if (src_bitsize == dst_bitsize) { - switch (src_base_type) { - case nir_type_int: - case nir_type_uint: - if (dst_base_type == nir_type_uint || dst_base_type == nir_type_int) - return nir_op_imov; - break; - case nir_type_float: - if (dst_base_type == nir_type_float) - return nir_op_fmov; - break; - case nir_type_bool: - if (dst_base_type == nir_type_bool) - return nir_op_imov; - break; - default: - unreachable("Invalid conversion"); - } - } - - switch (src_base_type) { - case nir_type_int: - switch (dst_base_type) { - case nir_type_int: - assert(src_bitsize != dst_bitsize); - return (dst_bitsize == 32) ? nir_op_i2i32 : nir_op_i2i64; - case nir_type_uint: - assert(src_bitsize != dst_bitsize); - return (dst_bitsize == 32) ? nir_op_i2u32 : nir_op_i2u64; - case nir_type_float: - switch (src_bitsize) { - case 32: - return (dst_bitsize == 32) ? nir_op_i2f : nir_op_i2d; - case 64: - return (dst_bitsize == 32) ? nir_op_i642f : nir_op_i642d; - default: - unreachable("Invalid conversion"); - } - case nir_type_bool: - return (src_bitsize == 32) ? nir_op_i2b : nir_op_i642b; - default: - unreachable("Invalid conversion"); - } - - case nir_type_uint: - switch (dst_base_type) { - case nir_type_int: - assert(src_bitsize != dst_bitsize); - return (dst_bitsize == 32) ? nir_op_u2i32 : nir_op_u2i64; - case nir_type_uint: - assert(src_bitsize != dst_bitsize); - return (dst_bitsize == 32) ? nir_op_u2u32 : nir_op_u2u64; - case nir_type_float: - switch (src_bitsize) { - case 32: - return (dst_bitsize == 32) ? nir_op_u2f : nir_op_u2d; - case 64: - return (dst_bitsize == 32) ? nir_op_u642f : nir_op_u642d; - default: - unreachable("Invalid conversion"); - } - case nir_type_bool: - return (src_bitsize == 32) ? nir_op_i2b : nir_op_i642b; - default: - unreachable("Invalid conversion"); - } - - case nir_type_float: - switch (dst_base_type) { - case nir_type_int: - switch (src_bitsize) { - case 32: - return (dst_bitsize == 32) ? nir_op_f2i : nir_op_f2i64; - case 64: - return (dst_bitsize == 32) ? nir_op_d2i : nir_op_f2i64; - default: - unreachable("Invalid conversion"); - } - case nir_type_uint: - switch (src_bitsize) { - case 32: - return (dst_bitsize == 32) ? nir_op_f2u : nir_op_f2u64; - case 64: - return (dst_bitsize == 32) ? nir_op_d2u : nir_op_f2u64; - default: - unreachable("Invalid conversion"); - } - case nir_type_float: - assert(src_bitsize != dst_bitsize); - return (dst_bitsize == 32) ? nir_op_d2f : nir_op_f2d; - case nir_type_bool: - return (src_bitsize == 32) ? nir_op_f2b : nir_op_d2b; - default: - unreachable("Invalid conversion"); - } - - case nir_type_bool: - switch (dst_base_type) { - case nir_type_int: - case nir_type_uint: - return (dst_bitsize == 32) ? nir_op_b2i : nir_op_b2i64; - case nir_type_float: - /* GLSL just emits f2d(b2f(x)) for b2d */ - assert(dst_bitsize == 32); - return nir_op_b2f; - default: - unreachable("Invalid conversion"); - } - - default: - unreachable("Invalid conversion"); - } -} diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h index 1dc56ebf53d..a4f15b6d335 100644 --- a/src/compiler/nir/nir_builder.h +++ b/src/compiler/nir/nir_builder.h @@ -328,6 +328,10 @@ nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0, } } + /* When in doubt, assume 32. */ + if (bit_size == 0) + bit_size = 32; + /* Make sure we don't swizzle from outside of our source vector (like if a * scalar value was passed into a multiply with a vector). */ diff --git a/src/compiler/nir/nir_lower_double_ops.c b/src/compiler/nir/nir_lower_double_ops.c index ad9631327b4..00eeb89b1bd 100644 --- a/src/compiler/nir/nir_lower_double_ops.c +++ b/src/compiler/nir/nir_lower_double_ops.c @@ -116,7 +116,7 @@ lower_rcp(nir_builder *b, nir_ssa_def *src) /* cast to float, do an rcp, and then cast back to get an approximate * result */ - nir_ssa_def *ra = nir_f2d(b, nir_frcp(b, nir_d2f(b, src_norm))); + nir_ssa_def *ra = nir_f2f64(b, nir_frcp(b, nir_f2f32(b, src_norm))); /* Fixup the exponent of the result - note that we check if this is too * small below. @@ -180,7 +180,7 @@ lower_sqrt_rsq(nir_builder *b, nir_ssa_def *src, bool sqrt) nir_iadd(b, nir_imm_int(b, 1023), even)); - nir_ssa_def *ra = nir_f2d(b, nir_frsq(b, nir_d2f(b, src_norm))); + nir_ssa_def *ra = nir_f2f64(b, nir_frsq(b, nir_f2f32(b, src_norm))); nir_ssa_def *new_exp = nir_isub(b, get_exponent(b, ra), half); ra = set_exponent(b, ra, new_exp); diff --git a/src/compiler/nir/nir_lower_idiv.c b/src/compiler/nir/nir_lower_idiv.c index 6726b718aaa..194ca5a75a8 100644 --- a/src/compiler/nir/nir_lower_idiv.c +++ b/src/compiler/nir/nir_lower_idiv.c @@ -56,15 +56,15 @@ convert_instr(nir_builder *bld, nir_alu_instr *alu) denom = nir_ssa_for_alu_src(bld, alu, 1); if (is_signed) { - af = nir_i2f(bld, numer); - bf = nir_i2f(bld, denom); + af = nir_i2f32(bld, numer); + bf = nir_i2f32(bld, denom); af = nir_fabs(bld, af); bf = nir_fabs(bld, bf); a = nir_iabs(bld, numer); b = nir_iabs(bld, denom); } else { - af = nir_u2f(bld, numer); - bf = nir_u2f(bld, denom); + af = nir_u2f32(bld, numer); + bf = nir_u2f32(bld, denom); a = numer; b = denom; } @@ -75,17 +75,17 @@ convert_instr(nir_builder *bld, nir_alu_instr *alu) q = nir_fmul(bld, af, bf); if (is_signed) { - q = nir_f2i(bld, q); + q = nir_f2i32(bld, q); } else { - q = nir_f2u(bld, q); + q = nir_f2u32(bld, q); } /* get error of first result: */ r = nir_imul(bld, q, b); r = nir_isub(bld, a, r); - r = nir_u2f(bld, r); + r = nir_u2f32(bld, r); r = nir_fmul(bld, r, bf); - r = nir_f2u(bld, r); + r = nir_f2u32(bld, r); /* add quotients: */ q = nir_iadd(bld, q, r); diff --git a/src/compiler/nir/nir_lower_tex.c b/src/compiler/nir/nir_lower_tex.c index 213406aaa98..70054679955 100644 --- a/src/compiler/nir/nir_lower_tex.c +++ b/src/compiler/nir/nir_lower_tex.c @@ -121,7 +121,7 @@ lower_offset(nir_builder *b, nir_tex_instr *tex) nir_ssa_def *offset_coord; if (nir_tex_instr_src_type(tex, coord_index) == nir_type_float) { assert(tex->sampler_dim == GLSL_SAMPLER_DIM_RECT); - offset_coord = nir_fadd(b, coord, nir_i2f(b, offset)); + offset_coord = nir_fadd(b, coord, nir_i2f32(b, offset)); } else { offset_coord = nir_iadd(b, coord, offset); } @@ -176,7 +176,7 @@ get_texture_size(nir_builder *b, nir_tex_instr *tex) nir_tex_instr_dest_size(txs), 32, NULL); nir_builder_instr_insert(b, &txs->instr); - return nir_i2f(b, &txs->dest.ssa); + return nir_i2f32(b, &txs->dest.ssa); } static void diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py index 8cad74832a6..52868d5f5a4 100644 --- a/src/compiler/nir/nir_opcodes.py +++ b/src/compiler/nir/nir_opcodes.py @@ -165,42 +165,26 @@ unop("frsq", tfloat, "bit_size == 64 ? 1.0 / sqrt(src0) : 1.0f / sqrtf(src0)") unop("fsqrt", tfloat, "bit_size == 64 ? sqrt(src0) : sqrtf(src0)") unop("fexp2", tfloat, "exp2f(src0)") unop("flog2", tfloat, "log2f(src0)") -unop_convert("f2i", tint32, tfloat32, "src0") # Float-to-integer conversion. -unop_convert("f2u", tuint32, tfloat32, "src0") # Float-to-unsigned conversion -unop_convert("d2i", tint32, tfloat64, "src0") # Double-to-integer conversion. -unop_convert("d2u", tuint32, tfloat64, "src0") # Double-to-unsigned conversion. -unop_convert("i2f", tfloat32, tint32, "src0") # Integer-to-float conversion. -unop_convert("i2d", tfloat64, tint32, "src0") # Integer-to-double conversion. -unop_convert("i2i32", tint32, tint, "src0") # General int (int8_t, int64_t, etc.) to int32_t conversion -unop_convert("u2i32", tint32, tuint, "src0") # General uint (uint8_t, uint64_t, etc.) to int32_t conversion -unop_convert("i2u32", tuint32, tint, "src0") # General int (int8_t, int64_t, etc.) to uint32_t conversion -unop_convert("u2u32", tuint32, tuint, "src0") # General uint (uint8_t, uint64_t, etc.) to uint32_t conversion -unop_convert("i2i64", tint64, tint, "src0") # General int (int8_t, int32_t, etc.) to int64_t conversion -unop_convert("u2i64", tint64, tuint, "src0") # General uint (uint8_t, uint64_t, etc.) to int64_t conversion -unop_convert("f2i64", tint64, tfloat, "src0") # General float (float or double) to int64_t conversion -unop_convert("i2u64", tuint64, tint, "src0") # General int (int8_t, int64_t, etc.) to uint64_t conversion -unop_convert("u2u64", tuint64, tuint, "src0") # General uint (uint8_t, uint32_t, etc.) to uint64_t conversion -unop_convert("f2u64", tuint64, tfloat, "src0") # General float (float or double) to uint64_t conversion -unop_convert("i642f", tfloat32, tint64, "src0") # int64_t-to-float conversion. -unop_convert("i642b", tbool, tint64, "src0") # int64_t-to-bool conversion. -unop_convert("i642d", tfloat64, tint64, "src0") # int64_t-to-double conversion. -unop_convert("u642f", tfloat32, tuint64, "src0") # uint64_t-to-float conversion. -unop_convert("u642d", tfloat64, tuint64, "src0") # uint64_t-to-double conversion. - -# Float-to-boolean conversion -unop_convert("f2b", tbool, tfloat32, "src0 != 0.0f") -unop_convert("d2b", tbool, tfloat64, "src0 != 0.0") -# Boolean-to-float conversion -unop_convert("b2f", tfloat32, tbool, "src0 ? 1.0f : 0.0f") -# Int-to-boolean conversion + +# Generate all of the numeric conversion opcodes +for src_t in [tint, tuint, tfloat]: + if src_t in (tint, tuint): + dst_types = [tfloat, src_t] + elif src_t == tfloat: + dst_types = [tint, tuint, tfloat] + + for dst_t in dst_types: + for bit_size in [32, 64]: + unop_convert("{}2{}{}".format(src_t[0], dst_t[0], bit_size), + dst_t + str(bit_size), src_t, "src0") + +# We'll hand-code the to/from bool conversion opcodes. Because bool doesn't +# have multiple bit-sizes, we can always infer the size from the other type. +unop_convert("f2b", tbool, tfloat, "src0 != 0.0") unop_convert("i2b", tbool, tint, "src0 != 0") -unop_convert("b2i", tint32, tbool, "src0 ? 1 : 0") # Boolean-to-int conversion -unop_convert("b2i64", tint64, tbool, "src0 ? 1 : 0") # Boolean-to-int64_t conversion. -unop_convert("u2f", tfloat32, tuint32, "src0") # Unsigned-to-float conversion. -unop_convert("u2d", tfloat64, tuint32, "src0") # Unsigned-to-double conversion. -# double-to-float conversion -unop_convert("d2f", tfloat32, tfloat64, "src0") # Double to single precision -unop_convert("f2d", tfloat64, tfloat32, "src0") # Single to double precision +unop_convert("b2f", tfloat, tbool, "src0 ? 1.0 : 0.0") +unop_convert("b2i", tint, tbool, "src0 ? 1 : 0") + # Unary floating-point rounding operations. diff --git a/src/compiler/nir/nir_opcodes_c.py b/src/compiler/nir/nir_opcodes_c.py index 5f8bdc12a07..c66f3bc7ad4 100644 --- a/src/compiler/nir/nir_opcodes_c.py +++ b/src/compiler/nir/nir_opcodes_c.py @@ -29,6 +29,72 @@ from mako.template import Template template = Template(""" #include "nir.h" +nir_op +nir_type_conversion_op(nir_alu_type src, nir_alu_type dst) +{ + nir_alu_type src_base = (nir_alu_type) nir_alu_type_get_base_type(src); + nir_alu_type dst_base = (nir_alu_type) nir_alu_type_get_base_type(dst); + unsigned src_bit_size = nir_alu_type_get_type_size(src); + unsigned dst_bit_size = nir_alu_type_get_type_size(dst); + + if (src == dst && src_base == nir_type_float) { + return nir_op_fmov; + } else if ((src_base == nir_type_int || src_base == nir_type_uint) && + (dst_base == nir_type_int || dst_base == nir_type_uint) && + src_bit_size == dst_bit_size) { + /* Integer <-> integer conversions with the same bit-size on both + * ends are just no-op moves. + */ + return nir_op_imov; + } + + switch (src_base) { +% for src_t in ['int', 'uint', 'float']: + case nir_type_${src_t}: + switch (dst_base) { +% for dst_t in ['int', 'uint', 'float']: + case nir_type_${dst_t}: +% if src_t in ['int', 'uint'] and dst_t in ['int', 'uint']: +% if dst_t == 'int': +<% continue %> +% else: +<% dst_t = src_t %> +% endif +% endif + switch (dst_bit_size) { +% for dst_bits in [32, 64]: + case ${dst_bits}: + return ${'nir_op_{}2{}{}'.format(src_t[0], dst_t[0], dst_bits)}; +% endfor + default: + unreachable("Invalid nir alu bit size"); + } +% endfor + case nir_type_bool: +% if src_t == 'float': + return nir_op_f2b; +% else: + return nir_op_i2b; +% endif + default: + unreachable("Invalid nir alu base type"); + } +% endfor + case nir_type_bool: + switch (dst_base) { + case nir_type_int: + case nir_type_uint: + return nir_op_b2i; + case nir_type_float: + return nir_op_b2f; + default: + unreachable("Invalid nir alu base type"); + } + default: + unreachable("Invalid nir alu base type"); + } +} + const nir_op_info nir_op_infos[nir_num_opcodes] = { % for name, opcode in sorted(opcodes.iteritems()): { diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index f60c338b624..49c1460e25a 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -78,7 +78,7 @@ optimizations = [ (('ineg', ('ineg', a)), a), (('fabs', ('fabs', a)), ('fabs', a)), (('fabs', ('fneg', a)), ('fabs', a)), - (('fabs', ('u2f', a)), ('u2f', a)), + (('fabs', ('u2f32', a)), ('u2f32', a)), (('iabs', ('iabs', a)), ('iabs', a)), (('iabs', ('ineg', a)), ('iabs', a)), (('~fadd', a, 0.0), a), @@ -212,7 +212,7 @@ optimizations = [ (('fsat', ('fadd', ('b2f', a), ('b2f', b))), ('b2f', ('ior', a, b))), (('iand', 'a@bool', 1.0), ('b2f', a)), # True/False are ~0 and 0 in NIR. b2i of True is 1, and -1 is ~0 (True). - (('ineg', ('b2i', a)), a), + (('ineg', ('b2i@32', a)), a), (('flt', ('fneg', ('b2f', a)), 0), a), # Generated by TGSI KILL_IF. (('flt', ('fsub', 0.0, ('b2f', a)), 0), a), # Generated by TGSI KILL_IF. # Comparison with the same args. Note that these are not done for @@ -298,8 +298,8 @@ optimizations = [ # Conversions (('i2b', ('b2i', a)), a), - (('f2i', ('ftrunc', a)), ('f2i', a)), - (('f2u', ('ftrunc', a)), ('f2u', a)), + (('f2i32', ('ftrunc', a)), ('f2i32', a)), + (('f2u32', ('ftrunc', a)), ('f2u32', a)), (('i2b', ('ineg', a)), ('i2b', a)), (('i2b', ('iabs', a)), ('i2b', a)), (('fabs', ('b2f', a)), ('b2f', a)), @@ -387,49 +387,49 @@ optimizations = [ (('pack_unorm_2x16', 'v'), ('pack_uvec2_to_uint', - ('f2u', ('fround_even', ('fmul', ('fsat', 'v'), 65535.0)))), + ('f2u32', ('fround_even', ('fmul', ('fsat', 'v'), 65535.0)))), 'options->lower_pack_unorm_2x16'), (('pack_unorm_4x8', 'v'), ('pack_uvec4_to_uint', - ('f2u', ('fround_even', ('fmul', ('fsat', 'v'), 255.0)))), + ('f2u32', ('fround_even', ('fmul', ('fsat', 'v'), 255.0)))), 'options->lower_pack_unorm_4x8'), (('pack_snorm_2x16', 'v'), ('pack_uvec2_to_uint', - ('f2i', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 32767.0)))), + ('f2i32', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 32767.0)))), 'options->lower_pack_snorm_2x16'), (('pack_snorm_4x8', 'v'), ('pack_uvec4_to_uint', - ('f2i', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 127.0)))), + ('f2i32', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 127.0)))), 'options->lower_pack_snorm_4x8'), (('unpack_unorm_2x16', 'v'), - ('fdiv', ('u2f', ('vec2', ('extract_u16', 'v', 0), - ('extract_u16', 'v', 1))), + ('fdiv', ('u2f32', ('vec2', ('extract_u16', 'v', 0), + ('extract_u16', 'v', 1))), 65535.0), 'options->lower_unpack_unorm_2x16'), (('unpack_unorm_4x8', 'v'), - ('fdiv', ('u2f', ('vec4', ('extract_u8', 'v', 0), - ('extract_u8', 'v', 1), - ('extract_u8', 'v', 2), - ('extract_u8', 'v', 3))), + ('fdiv', ('u2f32', ('vec4', ('extract_u8', 'v', 0), + ('extract_u8', 'v', 1), + ('extract_u8', 'v', 2), + ('extract_u8', 'v', 3))), 255.0), 'options->lower_unpack_unorm_4x8'), (('unpack_snorm_2x16', 'v'), - ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec2', ('extract_i16', 'v', 0), - ('extract_i16', 'v', 1))), + ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f32', ('vec2', ('extract_i16', 'v', 0), + ('extract_i16', 'v', 1))), 32767.0))), 'options->lower_unpack_snorm_2x16'), (('unpack_snorm_4x8', 'v'), - ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec4', ('extract_i8', 'v', 0), - ('extract_i8', 'v', 1), - ('extract_i8', 'v', 2), - ('extract_i8', 'v', 3))), + ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f32', ('vec4', ('extract_i8', 'v', 0), + ('extract_i8', 'v', 1), + ('extract_i8', 'v', 2), + ('extract_i8', 'v', 3))), 127.0))), 'options->lower_unpack_snorm_4x8'), ] diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index 5de74e0c3c8..de33375cb52 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -866,7 +866,7 @@ ttn_move_dest(nir_builder *b, nir_alu_dest dest, nir_ssa_def *def) static void ttn_arl(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src) { - ttn_move_dest(b, dest, nir_f2i(b, nir_ffloor(b, src[0]))); + ttn_move_dest(b, dest, nir_f2i32(b, nir_ffloor(b, src[0]))); } /* EXP - Approximate Exponential Base 2 @@ -1587,7 +1587,7 @@ static const nir_op op_trans[TGSI_OPCODE_LAST] = { [TGSI_OPCODE_POPA] = 0, /* XXX */ [TGSI_OPCODE_CEIL] = nir_op_fceil, - [TGSI_OPCODE_I2F] = nir_op_i2f, + [TGSI_OPCODE_I2F] = nir_op_i2f32, [TGSI_OPCODE_NOT] = nir_op_inot, [TGSI_OPCODE_TRUNC] = nir_op_ftrunc, [TGSI_OPCODE_SHL] = nir_op_ishl, @@ -1624,7 +1624,7 @@ static const nir_op op_trans[TGSI_OPCODE_LAST] = { [TGSI_OPCODE_END] = 0, - [TGSI_OPCODE_F2I] = nir_op_f2i, + [TGSI_OPCODE_F2I] = nir_op_f2i32, [TGSI_OPCODE_IDIV] = nir_op_idiv, [TGSI_OPCODE_IMAX] = nir_op_imax, [TGSI_OPCODE_IMIN] = nir_op_imin, @@ -1632,8 +1632,8 @@ static const nir_op op_trans[TGSI_OPCODE_LAST] = { [TGSI_OPCODE_ISGE] = nir_op_ige, [TGSI_OPCODE_ISHR] = nir_op_ishr, [TGSI_OPCODE_ISLT] = nir_op_ilt, - [TGSI_OPCODE_F2U] = nir_op_f2u, - [TGSI_OPCODE_U2F] = nir_op_u2f, + [TGSI_OPCODE_F2U] = nir_op_f2u32, + [TGSI_OPCODE_U2F] = nir_op_u2f32, [TGSI_OPCODE_UADD] = nir_op_iadd, [TGSI_OPCODE_UDIV] = nir_op_udiv, [TGSI_OPCODE_UMAD] = 0, diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index bb3f66c2300..fd4a1d6ecce 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -722,16 +722,16 @@ emit_alu(struct ir3_compile *ctx, nir_alu_instr *alu) } switch (alu->op) { - case nir_op_f2i: + case nir_op_f2i32: dst[0] = ir3_COV(b, src[0], TYPE_F32, TYPE_S32); break; - case nir_op_f2u: + case nir_op_f2u32: dst[0] = ir3_COV(b, src[0], TYPE_F32, TYPE_U32); break; - case nir_op_i2f: + case nir_op_i2f32: dst[0] = ir3_COV(b, src[0], TYPE_S32, TYPE_F32); break; - case nir_op_u2f: + case nir_op_u2f32: dst[0] = ir3_COV(b, src[0], TYPE_U32, TYPE_F32); break; case nir_op_imov: diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c index 56f6c3bd0e9..2ed89ead55b 100644 --- a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c +++ b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c @@ -637,7 +637,7 @@ vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b, * coordinate, instead. */ nir_ssa_def *num_samples = nir_imm_float(b, VC4_MAX_SAMPLES); - nir_ssa_def *num_bits = nir_f2i(b, nir_fmul(b, a, num_samples)); + nir_ssa_def *num_bits = nir_f2i32(b, nir_fmul(b, a, num_samples)); nir_ssa_def *bitmask = nir_isub(b, nir_ishl(b, nir_imm_int(b, 1), diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c index 4a795f8da0f..b7969a562a6 100644 --- a/src/gallium/drivers/vc4/vc4_nir_lower_io.c +++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c @@ -106,11 +106,11 @@ vc4_nir_get_vattr_channel_vpm(struct vc4_compile *c, } else if (chan->size == 32 && chan->type == UTIL_FORMAT_TYPE_SIGNED) { if (chan->normalized) { return nir_fmul(b, - nir_i2f(b, vpm_reads[swiz]), + nir_i2f32(b, vpm_reads[swiz]), nir_imm_float(b, 1.0 / 0x7fffffff)); } else { - return nir_i2f(b, vpm_reads[swiz]); + return nir_i2f32(b, vpm_reads[swiz]); } } else if (chan->size == 8 && (chan->type == UTIL_FORMAT_TYPE_UNSIGNED || @@ -125,16 +125,16 @@ vc4_nir_get_vattr_channel_vpm(struct vc4_compile *c, nir_imm_float(b, 1.0)); } else { return nir_fadd(b, - nir_i2f(b, - vc4_nir_unpack_8i(b, temp, - swiz)), + nir_i2f32(b, + vc4_nir_unpack_8i(b, temp, + swiz)), nir_imm_float(b, -128.0)); } } else { if (chan->normalized) { return vc4_nir_unpack_8f(b, vpm, swiz); } else { - return nir_i2f(b, vc4_nir_unpack_8i(b, vpm, swiz)); + return nir_i2f32(b, vc4_nir_unpack_8i(b, vpm, swiz)); } } } else if (chan->size == 16 && @@ -146,7 +146,7 @@ vc4_nir_get_vattr_channel_vpm(struct vc4_compile *c, * UNPACK_16_I for all of these. */ if (chan->type == UTIL_FORMAT_TYPE_SIGNED) { - temp = nir_i2f(b, vc4_nir_unpack_16i(b, vpm, swiz & 1)); + temp = nir_i2f32(b, vc4_nir_unpack_16i(b, vpm, swiz & 1)); if (chan->normalized) { return nir_fmul(b, temp, nir_imm_float(b, 1/32768.0f)); @@ -154,7 +154,7 @@ vc4_nir_get_vattr_channel_vpm(struct vc4_compile *c, return temp; } } else { - temp = nir_i2f(b, vc4_nir_unpack_16u(b, vpm, swiz & 1)); + temp = nir_i2f32(b, vc4_nir_unpack_16u(b, vpm, swiz & 1)); if (chan->normalized) { return nir_fmul(b, temp, nir_imm_float(b, 1 / 65535.0)); diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 3708b1bbb51..59368734d08 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -1150,12 +1150,12 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr) result = qir_FMAX(c, src[0], src[1]); break; - case nir_op_f2i: - case nir_op_f2u: + case nir_op_f2i32: + case nir_op_f2u32: result = qir_FTOI(c, src[0]); break; - case nir_op_i2f: - case nir_op_u2f: + case nir_op_i2f32: + case nir_op_u2f32: result = qir_ITOF(c, src[0]); break; case nir_op_b2f: diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c index 0cc5a840338..e650d5e7bf1 100644 --- a/src/intel/blorp/blorp_blit.c +++ b/src/intel/blorp/blorp_blit.c @@ -97,7 +97,7 @@ blorp_blit_get_frag_coords(nir_builder *b, const struct brw_blorp_blit_prog_key *key, struct brw_blorp_blit_vars *v) { - nir_ssa_def *coord = nir_f2i(b, nir_load_var(b, v->frag_coord)); + nir_ssa_def *coord = nir_f2i32(b, nir_load_var(b, v->frag_coord)); /* Account for destination surface intratile offset * @@ -764,7 +764,7 @@ blorp_nir_manual_blend_bilinear(nir_builder *b, nir_ssa_def *pos, nir_ssa_def *sample_off = nir_imm_vec2(b, sample_off_x, sample_off_y); nir_ssa_def *sample_coords = nir_fadd(b, pos_xy, sample_off); - nir_ssa_def *sample_coords_int = nir_f2i(b, sample_coords); + nir_ssa_def *sample_coords_int = nir_f2i32(b, sample_coords); /* The MCS value we fetch has to match up with the pixel that we're * sampling from. Since we sample from different pixels in each @@ -821,7 +821,7 @@ blorp_nir_manual_blend_bilinear(nir_builder *b, nir_ssa_def *pos, nir_ssa_def *sample = nir_fdot2(b, frac, nir_imm_vec2(b, key->x_scale, key->x_scale * key->y_scale)); - sample = nir_f2i(b, sample); + sample = nir_f2i32(b, sample); if (tex_samples == 8) { sample = nir_iand(b, nir_ishr(b, nir_imm_int(b, 0x64210573), @@ -1150,7 +1150,7 @@ brw_blorp_build_nir_shader(struct blorp_context *blorp, void *mem_ctx, blorp_nir_discard_if_outside_rect(&b, dst_pos, &v); } - src_pos = blorp_blit_apply_transform(&b, nir_i2f(&b, dst_pos), &v); + src_pos = blorp_blit_apply_transform(&b, nir_i2f32(&b, dst_pos), &v); if (dst_pos->num_components == 3) { /* The sample coordinate is an integer that we want left alone but * blorp_blit_apply_transform() blindly applies the transform to all @@ -1175,7 +1175,7 @@ brw_blorp_build_nir_shader(struct blorp_context *blorp, void *mem_ctx, /* Resolves (effecively) use texelFetch, so we need integers and we * don't care about the sample index if we got one. */ - src_pos = nir_f2i(&b, nir_channels(&b, src_pos, 0x3)); + src_pos = nir_f2i32(&b, nir_channels(&b, src_pos, 0x3)); if (devinfo->gen == 6) { /* Because gen6 only supports 4x interleved MSAA, we can do all the @@ -1187,7 +1187,7 @@ brw_blorp_build_nir_shader(struct blorp_context *blorp, void *mem_ctx, */ src_pos = nir_ishl(&b, src_pos, nir_imm_int(&b, 1)); src_pos = nir_iadd(&b, src_pos, nir_imm_int(&b, 1)); - src_pos = nir_i2f(&b, src_pos); + src_pos = nir_i2f32(&b, src_pos); color = blorp_nir_tex(&b, &v, src_pos, key->texture_data_type); } else { /* Gen7+ hardware doesn't automaticaly blend. */ @@ -1204,11 +1204,11 @@ brw_blorp_build_nir_shader(struct blorp_context *blorp, void *mem_ctx, } else { /* We're going to use texelFetch, so we need integers */ if (src_pos->num_components == 2) { - src_pos = nir_f2i(&b, src_pos); + src_pos = nir_f2i32(&b, src_pos); } else { assert(src_pos->num_components == 3); - src_pos = nir_vec3(&b, nir_channel(&b, nir_f2i(&b, src_pos), 0), - nir_channel(&b, nir_f2i(&b, src_pos), 1), + src_pos = nir_vec3(&b, nir_channel(&b, nir_f2i32(&b, src_pos), 0), + nir_channel(&b, nir_f2i32(&b, src_pos), 1), nir_channel(&b, src_pos, 2)); } diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index ef569b5588c..bc1ccfba3d2 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -641,17 +641,17 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) } switch (instr->op) { - case nir_op_i2f: - case nir_op_u2f: + case nir_op_i2f32: + case nir_op_u2f32: if (optimize_extract_to_float(instr, result)) return; inst = bld.MOV(result, op[0]); inst->saturate = instr->dest.saturate; break; - case nir_op_f2d: - case nir_op_i2d: - case nir_op_u2d: + case nir_op_f2f64: + case nir_op_i2f64: + case nir_op_u2f64: /* CHV PRM, vol07, 3D Media GPGPU Engine, Register Region Restrictions: * * "When source or destination is 64b (...), regioning in Align1 @@ -676,25 +676,15 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) break; } /* fallthrough */ - case nir_op_i642d: - case nir_op_u642d: + case nir_op_f2f32: + case nir_op_f2i32: + case nir_op_f2u32: case nir_op_f2i64: case nir_op_f2u64: - case nir_op_i2i64: - case nir_op_i2u64: - case nir_op_u2i64: - case nir_op_u2u64: - case nir_op_d2f: - case nir_op_d2i: - case nir_op_d2u: - case nir_op_i642f: - case nir_op_u642f: - case nir_op_u2i32: case nir_op_i2i32: + case nir_op_i2i64: case nir_op_u2u32: - case nir_op_i2u32: - case nir_op_f2i: - case nir_op_f2u: + case nir_op_u2u64: inst = bld.MOV(result, op[0]); inst->saturate = instr->dest.saturate; break; @@ -1077,7 +1067,6 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) inst->saturate = instr->dest.saturate; break; - case nir_op_b2i64: case nir_op_b2i: case nir_op_b2f: bld.MOV(result, negate(op[0])); @@ -1085,14 +1074,12 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) case nir_op_i2b: case nir_op_f2b: - case nir_op_i642b: - case nir_op_d2b: if (nir_src_bit_size(instr->src[0].src) == 64) { /* two-argument instructions can't take 64-bit immediates */ fs_reg zero; fs_reg tmp; - if (instr->op == nir_op_d2b) { + if (instr->op == nir_op_f2b) { zero = vgrf(glsl_type::double_type); tmp = vgrf(glsl_type::double_type); } else { diff --git a/src/intel/compiler/brw_nir_attribute_workarounds.c b/src/intel/compiler/brw_nir_attribute_workarounds.c index d695771f04a..c719371ddf1 100644 --- a/src/intel/compiler/brw_nir_attribute_workarounds.c +++ b/src/intel/compiler/brw_nir_attribute_workarounds.c @@ -99,7 +99,7 @@ apply_attr_wa_block(nir_block *block, struct attr_wa_state *state) nir_imm_vec4(b, 1.0f / ((1 << 9) - 1), 1.0f / ((1 << 9) - 1), 1.0f / ((1 << 9) - 1), 1.0f / ((1 << 1) - 1)); val = nir_fmax(b, - nir_fmul(b, nir_i2f(b, val), es3_normalize_factor), + nir_fmul(b, nir_i2f32(b, val), es3_normalize_factor), nir_imm_float(b, -1.0f)); } else { /* The following equations are from the OpenGL 3.2 specification: @@ -121,18 +121,18 @@ apply_attr_wa_block(nir_block *block, struct attr_wa_state *state) /* For signed normalization, the numerator is 2c+1. */ nir_ssa_def *two = nir_imm_float(b, 2.0f); nir_ssa_def *one = nir_imm_float(b, 1.0f); - val = nir_fadd(b, nir_fmul(b, nir_i2f(b, val), two), one); + val = nir_fadd(b, nir_fmul(b, nir_i2f32(b, val), two), one); } else { /* For unsigned normalization, the numerator is just c. */ - val = nir_u2f(b, val); + val = nir_u2f32(b, val); } val = nir_fmul(b, val, normalize_factor); } } if (wa_flags & BRW_ATTRIB_WA_SCALE) { - val = (wa_flags & BRW_ATTRIB_WA_SIGN) ? nir_i2f(b, val) - : nir_u2f(b, val); + val = (wa_flags & BRW_ATTRIB_WA_SIGN) ? nir_i2f32(b, val) + : nir_u2f32(b, val); } nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa, nir_src_for_ssa(val), diff --git a/src/intel/compiler/brw_vec4_nir.cpp b/src/intel/compiler/brw_vec4_nir.cpp index ca2e5dd05eb..23842653997 100644 --- a/src/intel/compiler/brw_vec4_nir.cpp +++ b/src/intel/compiler/brw_vec4_nir.cpp @@ -1287,32 +1287,24 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) case nir_op_vec4: unreachable("not reached: should be handled by lower_vec_to_movs()"); - case nir_op_i2f: - case nir_op_u2f: + case nir_op_i2f32: + case nir_op_u2f32: inst = emit(MOV(dst, op[0])); inst->saturate = instr->dest.saturate; break; - case nir_op_f2i: - case nir_op_f2u: - inst = emit(MOV(dst, op[0])); - break; - - case nir_op_d2f: - emit_conversion_from_double(dst, op[0], instr->dest.saturate); - break; - - case nir_op_f2d: - emit_conversion_to_double(dst, op[0], instr->dest.saturate); - break; - - case nir_op_d2i: - case nir_op_d2u: - emit_conversion_from_double(dst, op[0], instr->dest.saturate); + case nir_op_f2f32: + case nir_op_f2i32: + case nir_op_f2u32: + if (nir_src_bit_size(instr->src[0].src) == 64) + emit_conversion_from_double(dst, op[0], instr->dest.saturate); + else + inst = emit(MOV(dst, op[0])); break; - case nir_op_i2d: - case nir_op_u2d: + case nir_op_f2f64: + case nir_op_i2f64: + case nir_op_u2f64: emit_conversion_to_double(dst, op[0], instr->dest.saturate); break; @@ -1681,26 +1673,25 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) break; case nir_op_f2b: - emit(CMP(dst, op[0], brw_imm_f(0.0f), BRW_CONDITIONAL_NZ)); - break; - - case nir_op_d2b: { - /* We use a MOV with conditional_mod to check if the provided value is - * 0.0. We want this to flush denormalized numbers to zero, so we set a - * source modifier on the source operand to trigger this, as source - * modifiers don't affect the result of the testing against 0.0. - */ - src_reg value = op[0]; - value.abs = true; - vec4_instruction *inst = emit(MOV(dst_null_df(), value)); - inst->conditional_mod = BRW_CONDITIONAL_NZ; + if (nir_src_bit_size(instr->src[0].src) == 64) { + /* We use a MOV with conditional_mod to check if the provided value is + * 0.0. We want this to flush denormalized numbers to zero, so we set a + * source modifier on the source operand to trigger this, as source + * modifiers don't affect the result of the testing against 0.0. + */ + src_reg value = op[0]; + value.abs = true; + vec4_instruction *inst = emit(MOV(dst_null_df(), value)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; - src_reg one = src_reg(this, glsl_type::ivec4_type); - emit(MOV(dst_reg(one), brw_imm_d(~0))); - inst = emit(BRW_OPCODE_SEL, dst, one, brw_imm_d(0)); - inst->predicate = BRW_PREDICATE_NORMAL; + src_reg one = src_reg(this, glsl_type::ivec4_type); + emit(MOV(dst_reg(one), brw_imm_d(~0))); + inst = emit(BRW_OPCODE_SEL, dst, one, brw_imm_d(0)); + inst->predicate = BRW_PREDICATE_NORMAL; + } else { + emit(CMP(dst, op[0], brw_imm_f(0.0f), BRW_CONDITIONAL_NZ)); + } break; - } case nir_op_i2b: emit(CMP(dst, op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ)); diff --git a/src/intel/vulkan/anv_nir_lower_input_attachments.c b/src/intel/vulkan/anv_nir_lower_input_attachments.c index 244e7ff7ae1..6ed7fafbf01 100644 --- a/src/intel/vulkan/anv_nir_lower_input_attachments.c +++ b/src/intel/vulkan/anv_nir_lower_input_attachments.c @@ -57,7 +57,7 @@ try_lower_input_load(nir_function_impl *impl, nir_intrinsic_instr *load) nir_builder_init(&b, impl); b.cursor = nir_before_instr(&load->instr); - nir_ssa_def *frag_coord = nir_f2i(&b, load_frag_coord(&b)); + nir_ssa_def *frag_coord = nir_f2i32(&b, load_frag_coord(&b)); nir_ssa_def *offset = nir_ssa_for_src(&b, load->src[0], 2); nir_ssa_def *pos = nir_iadd(&b, frag_coord, offset); diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c index ed80d46dc4d..a1c5ba6ac20 100644 --- a/src/mesa/program/prog_to_nir.c +++ b/src/mesa/program/prog_to_nir.c @@ -306,7 +306,7 @@ ptn_move_dest(nir_builder *b, nir_alu_dest dest, nir_ssa_def *def) static void ptn_arl(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) { - ptn_move_dest(b, dest, nir_f2i(b, nir_ffloor(b, src[0]))); + ptn_move_dest(b, dest, nir_f2i32(b, nir_ffloor(b, src[0]))); } /* EXP - Approximate Exponential Base 2 -- 2.30.2