X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;ds=sidebyside;f=src%2Fglsl%2Fbuiltin_functions.cpp;h=bb7fbcdc17dbf1b147c0e126e5c4e0cfa86caa23;hb=5690c2b54cd40c4444ccd8ece4a4af917f64bc61;hp=94b57810720bbad657b5fbff5b282ca02176ae42;hpb=e877aadde065de567da53f67ea8d60e19825693d;p=mesa.git diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp index 94b57810720..bb7fbcdc17d 100644 --- a/src/glsl/builtin_functions.cpp +++ b/src/glsl/builtin_functions.cpp @@ -225,6 +225,14 @@ shader_packing_or_gpu_shader5(const _mesa_glsl_parse_state *state) gpu_shader5(state); } +static bool +fs_gpu_shader5(const _mesa_glsl_parse_state *state) +{ + return state->stage == MESA_SHADER_FRAGMENT && + (state->is_version(400, 0) || state->ARB_gpu_shader5_enable); +} + + static bool texture_array_lod(const _mesa_glsl_parse_state *state) { @@ -309,6 +317,14 @@ fs_oes_derivatives(const _mesa_glsl_parse_state *state) state->OES_standard_derivatives_enable); } +static bool +fs_derivative_control(const _mesa_glsl_parse_state *state) +{ + return state->stage == MESA_SHADER_FRAGMENT && + (state->is_version(450, 0) || + state->ARB_derivative_control_enable); +} + static bool tex1d_lod(const _mesa_glsl_parse_state *state) { @@ -426,6 +442,7 @@ private: ir_swizzle *matrix_elt(ir_variable *var, int col, int row); ir_expression *asin_expr(ir_variable *x); + void do_atan(ir_factory &body, const glsl_type *type, ir_variable *res, operand y_over_x); /** * Call function \param f with parameters specified as the linked @@ -610,6 +627,12 @@ private: B1(dFdx); B1(dFdy); B1(fwidth); + B1(dFdxCoarse); + B1(dFdyCoarse); + B1(fwidthCoarse); + B1(dFdxFine); + B1(dFdyFine); + B1(fwidthFine); B1(noise1); B1(noise2); B1(noise3); @@ -627,6 +650,9 @@ private: B1(uaddCarry) B1(usubBorrow) B1(mulExtended) + B1(interpolateAtCentroid) + B1(interpolateAtOffset) + B1(interpolateAtSample) ir_function_signature *_atomic_intrinsic(builtin_available_predicate avail); ir_function_signature *_atomic_op(const char *intrinsic, @@ -695,7 +721,8 @@ builtin_builder::find(_mesa_glsl_parse_state *state, if (f == NULL) return NULL; - ir_function_signature *sig = f->matching_signature(state, actual_parameters); + ir_function_signature *sig = + f->matching_signature(state, actual_parameters, true); if (sig == NULL) return NULL; @@ -1856,8 +1883,8 @@ builtin_builder::create_builtins() NULL); add_function("texture2DProjLod", - _texture(ir_txl, v110_lod, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT), - _texture(ir_txl, v110_lod, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txl, lod_exists_in_stage, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_txl, lod_exists_in_stage, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT), NULL); add_function("texture3D", @@ -1884,7 +1911,7 @@ builtin_builder::create_builtins() NULL); add_function("textureCubeLod", - _texture(ir_txl, v110_lod, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), + _texture(ir_txl, lod_exists_in_stage, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), NULL); add_function("texture2DRect", @@ -2136,6 +2163,12 @@ builtin_builder::create_builtins() F(dFdx) F(dFdy) F(fwidth) + F(dFdxCoarse) + F(dFdyCoarse) + F(fwidthCoarse) + F(dFdxFine) + F(dFdyFine) + F(fwidthFine) F(noise1) F(noise2) F(noise3) @@ -2186,6 +2219,24 @@ builtin_builder::create_builtins() _mulExtended(glsl_type::uvec3_type), _mulExtended(glsl_type::uvec4_type), NULL); + add_function("interpolateAtCentroid", + _interpolateAtCentroid(glsl_type::float_type), + _interpolateAtCentroid(glsl_type::vec2_type), + _interpolateAtCentroid(glsl_type::vec3_type), + _interpolateAtCentroid(glsl_type::vec4_type), + NULL); + add_function("interpolateAtOffset", + _interpolateAtOffset(glsl_type::float_type), + _interpolateAtOffset(glsl_type::vec2_type), + _interpolateAtOffset(glsl_type::vec3_type), + _interpolateAtOffset(glsl_type::vec4_type), + NULL); + add_function("interpolateAtSample", + _interpolateAtSample(glsl_type::float_type), + _interpolateAtSample(glsl_type::vec2_type), + _interpolateAtSample(glsl_type::vec3_type), + _interpolateAtSample(glsl_type::vec4_type), + NULL); add_function("atomicCounter", _atomic_op("__intrinsic_atomic_read", @@ -2575,8 +2626,7 @@ builtin_builder::call(ir_function *f, ir_variable *ret, exec_list params) { exec_list actual_params; - foreach_list(node, ¶ms) { - ir_variable *var = (ir_variable *) node; + foreach_in_list(ir_variable, var, ¶ms) { actual_params.push_tail(var_ref(var)); } @@ -2635,11 +2685,7 @@ builtin_builder::_atan2(const glsl_type *type) ir_factory outer_then(&outer_if->then_instructions, mem_ctx); /* Then...call atan(y/x) */ - ir_variable *y_over_x = outer_then.make_temp(glsl_type::float_type, "y_over_x"); - outer_then.emit(assign(y_over_x, div(y, x))); - outer_then.emit(assign(r, mul(y_over_x, rsq(add(mul(y_over_x, y_over_x), - imm(1.0f)))))); - outer_then.emit(assign(r, asin_expr(r))); + do_atan(body, glsl_type::float_type, r, div(y, x)); /* ...and fix it up: */ ir_if *inner_if = new(mem_ctx) ir_if(less(x, imm(0.0f))); @@ -2662,17 +2708,65 @@ builtin_builder::_atan2(const glsl_type *type) return sig; } +void +builtin_builder::do_atan(ir_factory &body, const glsl_type *type, ir_variable *res, operand y_over_x) +{ + /* + * range-reduction, first step: + * + * / y_over_x if |y_over_x| <= 1.0; + * x = < + * \ 1.0 / y_over_x otherwise + */ + ir_variable *x = body.make_temp(type, "atan_x"); + body.emit(assign(x, div(min2(abs(y_over_x), + imm(1.0f)), + max2(abs(y_over_x), + imm(1.0f))))); + + /* + * approximate atan by evaluating polynomial: + * + * x * 0.9999793128310355 - x^3 * 0.3326756418091246 + + * x^5 * 0.1938924977115610 - x^7 * 0.1173503194786851 + + * x^9 * 0.0536813784310406 - x^11 * 0.0121323213173444 + */ + ir_variable *tmp = body.make_temp(type, "atan_tmp"); + body.emit(assign(tmp, mul(x, x))); + body.emit(assign(tmp, mul(add(mul(sub(mul(add(mul(sub(mul(add(mul(imm(-0.0121323213173444f), + tmp), + imm(0.0536813784310406f)), + tmp), + imm(0.1173503194786851f)), + tmp), + imm(0.1938924977115610f)), + tmp), + imm(0.3326756418091246f)), + tmp), + imm(0.9999793128310355f)), + x))); + + /* range-reduction fixup */ + body.emit(assign(tmp, add(tmp, + mul(b2f(greater(abs(y_over_x), + imm(1.0f, type->components()))), + add(mul(tmp, + imm(-2.0f)), + imm(M_PI_2f)))))); + + /* sign fixup */ + body.emit(assign(res, mul(tmp, sign(y_over_x)))); +} + ir_function_signature * builtin_builder::_atan(const glsl_type *type) { ir_variable *y_over_x = in_var(type, "y_over_x"); MAKE_SIG(type, always_available, 1, y_over_x); - ir_variable *t = body.make_temp(type, "t"); - body.emit(assign(t, mul(y_over_x, rsq(add(mul(y_over_x, y_over_x), - imm(1.0f)))))); - - body.emit(ret(asin_expr(t))); + ir_variable *tmp = body.make_temp(type, "tmp"); + do_atan(body, type, tmp, y_over_x); + body.emit(ret(tmp)); return sig; } @@ -3981,7 +4075,11 @@ builtin_builder::_textureQueryLevels(const glsl_type *sampler_type) } UNOP(dFdx, ir_unop_dFdx, fs_oes_derivatives) +UNOP(dFdxCoarse, ir_unop_dFdx_coarse, fs_derivative_control) +UNOP(dFdxFine, ir_unop_dFdx_fine, fs_derivative_control) UNOP(dFdy, ir_unop_dFdy, fs_oes_derivatives) +UNOP(dFdyCoarse, ir_unop_dFdy_coarse, fs_derivative_control) +UNOP(dFdyFine, ir_unop_dFdy_fine, fs_derivative_control) ir_function_signature * builtin_builder::_fwidth(const glsl_type *type) @@ -3994,6 +4092,30 @@ builtin_builder::_fwidth(const glsl_type *type) return sig; } +ir_function_signature * +builtin_builder::_fwidthCoarse(const glsl_type *type) +{ + ir_variable *p = in_var(type, "p"); + MAKE_SIG(type, fs_derivative_control, 1, p); + + body.emit(ret(add(abs(expr(ir_unop_dFdx_coarse, p)), + abs(expr(ir_unop_dFdy_coarse, p))))); + + return sig; +} + +ir_function_signature * +builtin_builder::_fwidthFine(const glsl_type *type) +{ + ir_variable *p = in_var(type, "p"); + MAKE_SIG(type, fs_derivative_control, 1, p); + + body.emit(ret(add(abs(expr(ir_unop_dFdx_fine, p)), + abs(expr(ir_unop_dFdy_fine, p))))); + + return sig; +} + ir_function_signature * builtin_builder::_noise1(const glsl_type *type) { @@ -4260,6 +4382,44 @@ builtin_builder::_mulExtended(const glsl_type *type) return sig; } +ir_function_signature * +builtin_builder::_interpolateAtCentroid(const glsl_type *type) +{ + ir_variable *interpolant = in_var(type, "interpolant"); + interpolant->data.must_be_shader_input = 1; + MAKE_SIG(type, fs_gpu_shader5, 1, interpolant); + + body.emit(ret(interpolate_at_centroid(interpolant))); + + return sig; +} + +ir_function_signature * +builtin_builder::_interpolateAtOffset(const glsl_type *type) +{ + ir_variable *interpolant = in_var(type, "interpolant"); + interpolant->data.must_be_shader_input = 1; + ir_variable *offset = in_var(glsl_type::vec2_type, "offset"); + MAKE_SIG(type, fs_gpu_shader5, 2, interpolant, offset); + + body.emit(ret(interpolate_at_offset(interpolant, offset))); + + return sig; +} + +ir_function_signature * +builtin_builder::_interpolateAtSample(const glsl_type *type) +{ + ir_variable *interpolant = in_var(type, "interpolant"); + interpolant->data.must_be_shader_input = 1; + ir_variable *sample_num = in_var(glsl_type::int_type, "sample_num"); + MAKE_SIG(type, fs_gpu_shader5, 2, interpolant, sample_num); + + body.emit(ret(interpolate_at_sample(interpolant, sample_num))); + + return sig; +} + ir_function_signature * builtin_builder::_atomic_intrinsic(builtin_available_predicate avail) { @@ -4350,9 +4510,11 @@ builtin_builder::_image_prototype(const glsl_type *image_type, sig->parameters.push_tail(in_var(glsl_type::int_type, "sample")); /* Data arguments. */ - for (unsigned i = 0; i < num_arguments; ++i) - sig->parameters.push_tail(in_var(data_type, - ralloc_asprintf(NULL, "arg%d", i))); + for (unsigned i = 0; i < num_arguments; ++i) { + char *arg_name = ralloc_asprintf(NULL, "arg%d", i); + sig->parameters.push_tail(in_var(data_type, arg_name)); + ralloc_free(arg_name); + } /* Set the maximal set of qualifiers allowed for this image * built-in. Function calls with arguments having fewer @@ -4361,11 +4523,11 @@ builtin_builder::_image_prototype(const glsl_type *image_type, * accept everything that needs to be accepted, and reject cases * like loads from write-only or stores to read-only images. */ - image->data.image.read_only = flags & IMAGE_FUNCTION_READ_ONLY; - image->data.image.write_only = flags & IMAGE_FUNCTION_WRITE_ONLY; - image->data.image.coherent = true; - image->data.image._volatile = true; - image->data.image.restrict_flag = true; + image->data.image_read_only = (flags & IMAGE_FUNCTION_READ_ONLY) != 0; + image->data.image_write_only = (flags & IMAGE_FUNCTION_WRITE_ONLY) != 0; + image->data.image_coherent = true; + image->data.image_volatile = true; + image->data.image_restrict = true; return sig; }