X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;ds=inline;f=src%2Fglsl%2Fbuiltin_functions.cpp;h=1bc3de4aec59ae87deae54efa2fa7e80ed99ef49;hb=9b387b5d3f4103c51079ea5298d33086af6da433;hp=c882ec8dd0d2adc0a5952d1fdcaa00d921e3b96d;hpb=f80c6847e969016596b5ce3202236c15663cc61a;p=mesa.git diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp index c882ec8dd0d..1bc3de4aec5 100644 --- a/src/glsl/builtin_functions.cpp +++ b/src/glsl/builtin_functions.cpp @@ -60,7 +60,7 @@ #include "ir_builder.h" #include "glsl_parser_extras.h" #include "program/prog_instruction.h" -#include +#include #define M_PIf ((float) M_PI) #define M_PI_2f ((float) M_PI_2) @@ -135,6 +135,19 @@ v140(const _mesa_glsl_parse_state *state) return state->is_version(140, 0); } +static bool +v400_fs_only(const _mesa_glsl_parse_state *state) +{ + return state->is_version(400, 0) && + state->stage == MESA_SHADER_FRAGMENT; +} + +static bool +es31(const _mesa_glsl_parse_state *state) +{ + return state->is_version(0, 310); +} + static bool texture_rectangle(const _mesa_glsl_parse_state *state) { @@ -194,14 +207,15 @@ shader_bit_encoding(const _mesa_glsl_parse_state *state) static bool shader_integer_mix(const _mesa_glsl_parse_state *state) { - return v130(state) && state->EXT_shader_integer_mix_enable; + return state->is_version(450, 310) || + (v130(state) && state->EXT_shader_integer_mix_enable); } static bool shader_packing_or_es3(const _mesa_glsl_parse_state *state) { return state->ARB_shading_language_packing_enable || - state->is_version(400, 300); + state->is_version(420, 300); } static bool @@ -219,10 +233,17 @@ gpu_shader5(const _mesa_glsl_parse_state *state) } static bool -shader_packing_or_gpu_shader5(const _mesa_glsl_parse_state *state) +gpu_shader5_or_es31(const _mesa_glsl_parse_state *state) +{ + return state->is_version(400, 310) || state->ARB_gpu_shader5_enable; +} + +static bool +shader_packing_or_es31_or_gpu_shader5(const _mesa_glsl_parse_state *state) { return state->ARB_shading_language_packing_enable || - gpu_shader5(state); + state->ARB_gpu_shader5_enable || + state->is_version(400, 310); } static bool @@ -255,6 +276,13 @@ texture_array(const _mesa_glsl_parse_state *state) static bool texture_multisample(const _mesa_glsl_parse_state *state) +{ + return state->is_version(150, 310) || + state->ARB_texture_multisample_enable; +} + +static bool +texture_multisample_array(const _mesa_glsl_parse_state *state) { return state->is_version(150, 0) || state->ARB_texture_multisample_enable; @@ -297,15 +325,24 @@ texture_gather(const _mesa_glsl_parse_state *state) state->ARB_gpu_shader5_enable; } +static bool +texture_gather_or_es31(const _mesa_glsl_parse_state *state) +{ + return state->is_version(400, 310) || + state->ARB_texture_gather_enable || + state->ARB_gpu_shader5_enable; +} + /* Only ARB_texture_gather but not GLSL 4.0 or ARB_gpu_shader5. * used for relaxation of const offset requirements. */ static bool -texture_gather_only(const _mesa_glsl_parse_state *state) +texture_gather_only_or_es31(const _mesa_glsl_parse_state *state) { return !state->is_version(400, 0) && !state->ARB_gpu_shader5_enable && - state->ARB_texture_gather_enable; + (state->ARB_texture_gather_enable || + state->is_version(0, 310)); } /* Desktop GL or OES_standard_derivatives + fragment shader only */ @@ -359,7 +396,7 @@ tex3d_lod(const _mesa_glsl_parse_state *state) static bool shader_atomic_counters(const _mesa_glsl_parse_state *state) { - return state->ARB_shader_atomic_counters_enable; + return state->has_atomic_counters(); } static bool @@ -370,17 +407,44 @@ shader_trinary_minmax(const _mesa_glsl_parse_state *state) static bool shader_image_load_store(const _mesa_glsl_parse_state *state) +{ + return (state->is_version(420, 310) || + state->ARB_shader_image_load_store_enable); +} + +static bool +shader_image_atomic(const _mesa_glsl_parse_state *state) { return (state->is_version(420, 0) || state->ARB_shader_image_load_store_enable); } +static bool +shader_image_size(const _mesa_glsl_parse_state *state) +{ + return state->is_version(430, 310) || + state->ARB_shader_image_size_enable; +} + static bool gs_streams(const _mesa_glsl_parse_state *state) { return gpu_shader5(state) && gs_only(state); } +static bool +fp64(const _mesa_glsl_parse_state *state) +{ + return state->has_double(); +} + +static bool +barrier_supported(const _mesa_glsl_parse_state *state) +{ + return state->stage == MESA_SHADER_COMPUTE || + state->stage == MESA_SHADER_TESS_CTRL; +} + /** @} */ /******************************************************************************/ @@ -436,12 +500,14 @@ private: ir_constant *imm(float f, unsigned vector_elements=1); ir_constant *imm(int i, unsigned vector_elements=1); ir_constant *imm(unsigned u, unsigned vector_elements=1); + ir_constant *imm(double d, unsigned vector_elements=1); ir_constant *imm(const glsl_type *type, const ir_constant_data &); ir_dereference_variable *var_ref(ir_variable *var); ir_dereference_array *array_ref(ir_variable *var, int i); ir_swizzle *matrix_elt(ir_variable *var, int col, int row); ir_expression *asin_expr(ir_variable *x); + void do_atan(ir_factory &body, const glsl_type *type, ir_variable *res, operand y_over_x); /** * Call function \param f with parameters specified as the linked @@ -454,13 +520,19 @@ private: /** Create a new function and add the given signatures. */ void add_function(const char *name, ...); + typedef ir_function_signature *(builtin_builder::*image_prototype_ctr)(const glsl_type *image_type, + const char *intrinsic_name, + unsigned num_arguments, + unsigned flags); + enum image_function_flags { IMAGE_FUNCTION_EMIT_STUB = (1 << 0), IMAGE_FUNCTION_RETURNS_VOID = (1 << 1), IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE = (1 << 2), IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE = (1 << 3), IMAGE_FUNCTION_READ_ONLY = (1 << 4), - IMAGE_FUNCTION_WRITE_ONLY = (1 << 5) + IMAGE_FUNCTION_WRITE_ONLY = (1 << 5), + IMAGE_FUNCTION_AVAIL_ATOMIC = (1 << 6) }; /** @@ -469,6 +541,7 @@ private: */ void add_image_function(const char *name, const char *intrinsic_name, + image_prototype_ctr prototype, unsigned num_arguments, unsigned flags); @@ -525,29 +598,29 @@ private: B1(log) B1(exp2) B1(log2) - B1(sqrt) - B1(inversesqrt) - B1(abs) - B1(sign) - B1(floor) - B1(trunc) - B1(round) - B1(roundEven) - B1(ceil) - B1(fract) + BA1(sqrt) + BA1(inversesqrt) + BA1(abs) + BA1(sign) + BA1(floor) + BA1(trunc) + BA1(round) + BA1(roundEven) + BA1(ceil) + BA1(fract) B2(mod) - B1(modf) + BA1(modf) BA2(min) BA2(max) BA2(clamp) - B2(mix_lrp) + BA2(mix_lrp) ir_function_signature *_mix_sel(builtin_available_predicate avail, const glsl_type *val_type, const glsl_type *blend_type); - B2(step) - B2(smoothstep) - B1(isnan) - B1(isinf) + BA2(step) + BA2(smoothstep) + BA1(isnan) + BA1(isinf) B1(floatBitsToInt) B1(floatBitsToUint) B1(intBitsToFloat) @@ -562,24 +635,27 @@ private: ir_function_signature *_unpackSnorm4x8(builtin_available_predicate avail); ir_function_signature *_packHalf2x16(builtin_available_predicate avail); ir_function_signature *_unpackHalf2x16(builtin_available_predicate avail); - B1(length) - B1(distance); - B1(dot); - B1(cross); - B1(normalize); + ir_function_signature *_packDouble2x32(builtin_available_predicate avail); + ir_function_signature *_unpackDouble2x32(builtin_available_predicate avail); + + BA1(length) + BA1(distance); + BA1(dot); + BA1(cross); + BA1(normalize); B0(ftransform); - B1(faceforward); - B1(reflect); - B1(refract); - B1(matrixCompMult); - B1(outerProduct); - B0(determinant_mat2); - B0(determinant_mat3); - B0(determinant_mat4); - B0(inverse_mat2); - B0(inverse_mat3); - B0(inverse_mat4); - B1(transpose); + BA1(faceforward); + BA1(reflect); + BA1(refract); + BA1(matrixCompMult); + BA1(outerProduct); + BA1(determinant_mat2); + BA1(determinant_mat3); + BA1(determinant_mat4); + BA1(inverse_mat2); + BA1(inverse_mat3); + BA1(inverse_mat4); + BA1(transpose); BA1(lessThan); BA1(lessThanEqual); BA1(greaterThan); @@ -620,8 +696,9 @@ private: const glsl_type *stream_type); ir_function_signature *_EndStreamPrimitive(builtin_available_predicate avail, const glsl_type *stream_type); + B0(barrier) - B2(textureQueryLod); + BA2(textureQueryLod); B1(textureQueryLevels); B1(dFdx); B1(dFdy); @@ -643,9 +720,10 @@ private: B1(bitCount) B1(findLSB) B1(findMSB) - B1(fma) + BA1(fma) B2(ldexp) B2(frexp) + B2(dfrexp) B1(uaddCarry) B1(usubBorrow) B1(mulExtended) @@ -665,7 +743,12 @@ private: const char *intrinsic_name, unsigned num_arguments, unsigned flags); - ir_function_signature *_image(const glsl_type *image_type, + ir_function_signature *_image_size_prototype(const glsl_type *image_type, + const char *intrinsic_name, + unsigned num_arguments, + unsigned flags); + ir_function_signature *_image(image_prototype_ctr prototype, + const glsl_type *image_type, const char *intrinsic_name, unsigned num_arguments, unsigned flags); @@ -814,6 +897,42 @@ builtin_builder::create_builtins() _##NAME(glsl_type::vec4_type), \ NULL); +#define FD(NAME) \ + add_function(#NAME, \ + _##NAME(always_available, glsl_type::float_type), \ + _##NAME(always_available, glsl_type::vec2_type), \ + _##NAME(always_available, glsl_type::vec3_type), \ + _##NAME(always_available, glsl_type::vec4_type), \ + _##NAME(fp64, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec2_type), \ + _##NAME(fp64, glsl_type::dvec3_type), \ + _##NAME(fp64, glsl_type::dvec4_type), \ + NULL); + +#define FD130(NAME) \ + add_function(#NAME, \ + _##NAME(v130, glsl_type::float_type), \ + _##NAME(v130, glsl_type::vec2_type), \ + _##NAME(v130, glsl_type::vec3_type), \ + _##NAME(v130, glsl_type::vec4_type), \ + _##NAME(fp64, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec2_type), \ + _##NAME(fp64, glsl_type::dvec3_type), \ + _##NAME(fp64, glsl_type::dvec4_type), \ + NULL); + +#define FDGS5(NAME) \ + add_function(#NAME, \ + _##NAME(gpu_shader5, glsl_type::float_type), \ + _##NAME(gpu_shader5, glsl_type::vec2_type), \ + _##NAME(gpu_shader5, glsl_type::vec3_type), \ + _##NAME(gpu_shader5, glsl_type::vec4_type), \ + _##NAME(fp64, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec2_type), \ + _##NAME(fp64, glsl_type::dvec3_type), \ + _##NAME(fp64, glsl_type::dvec4_type), \ + NULL); + #define FI(NAME) \ add_function(#NAME, \ _##NAME(glsl_type::float_type), \ @@ -826,7 +945,23 @@ builtin_builder::create_builtins() _##NAME(glsl_type::ivec4_type), \ NULL); -#define FIU(NAME) \ +#define FID(NAME) \ + add_function(#NAME, \ + _##NAME(always_available, glsl_type::float_type), \ + _##NAME(always_available, glsl_type::vec2_type), \ + _##NAME(always_available, glsl_type::vec3_type), \ + _##NAME(always_available, glsl_type::vec4_type), \ + _##NAME(always_available, glsl_type::int_type), \ + _##NAME(always_available, glsl_type::ivec2_type), \ + _##NAME(always_available, glsl_type::ivec3_type), \ + _##NAME(always_available, glsl_type::ivec4_type), \ + _##NAME(fp64, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec2_type), \ + _##NAME(fp64, glsl_type::dvec3_type), \ + _##NAME(fp64, glsl_type::dvec4_type), \ + NULL); + +#define FIUD(NAME) \ add_function(#NAME, \ _##NAME(always_available, glsl_type::float_type), \ _##NAME(always_available, glsl_type::vec2_type), \ @@ -842,6 +977,10 @@ builtin_builder::create_builtins() _##NAME(v130, glsl_type::uvec2_type), \ _##NAME(v130, glsl_type::uvec3_type), \ _##NAME(v130, glsl_type::uvec4_type), \ + _##NAME(fp64, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec2_type), \ + _##NAME(fp64, glsl_type::dvec3_type), \ + _##NAME(fp64, glsl_type::dvec4_type), \ NULL); #define IU(NAME) \ @@ -857,7 +996,7 @@ builtin_builder::create_builtins() _##NAME(glsl_type::uvec4_type), \ NULL); -#define FIUB(NAME) \ +#define FIUBD(NAME) \ add_function(#NAME, \ _##NAME(always_available, glsl_type::float_type), \ _##NAME(always_available, glsl_type::vec2_type), \ @@ -878,9 +1017,14 @@ builtin_builder::create_builtins() _##NAME(always_available, glsl_type::bvec2_type), \ _##NAME(always_available, glsl_type::bvec3_type), \ _##NAME(always_available, glsl_type::bvec4_type), \ + \ + _##NAME(fp64, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec2_type), \ + _##NAME(fp64, glsl_type::dvec3_type), \ + _##NAME(fp64, glsl_type::dvec4_type), \ NULL); -#define FIU2_MIXED(NAME) \ +#define FIUD2_MIXED(NAME) \ add_function(#NAME, \ _##NAME(always_available, glsl_type::float_type, glsl_type::float_type), \ _##NAME(always_available, glsl_type::vec2_type, glsl_type::float_type), \ @@ -908,6 +1052,14 @@ builtin_builder::create_builtins() _##NAME(v130, glsl_type::uvec2_type, glsl_type::uvec2_type), \ _##NAME(v130, glsl_type::uvec3_type, glsl_type::uvec3_type), \ _##NAME(v130, glsl_type::uvec4_type, glsl_type::uvec4_type), \ + \ + _##NAME(fp64, glsl_type::double_type, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec2_type, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec3_type, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec4_type, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec2_type, glsl_type::dvec2_type), \ + _##NAME(fp64, glsl_type::dvec3_type, glsl_type::dvec3_type), \ + _##NAME(fp64, glsl_type::dvec4_type, glsl_type::dvec4_type), \ NULL); F(radians) @@ -940,16 +1092,16 @@ builtin_builder::create_builtins() F(log) F(exp2) F(log2) - F(sqrt) - F(inversesqrt) - FI(abs) - FI(sign) - F(floor) - F(trunc) - F(round) - F(roundEven) - F(ceil) - F(fract) + FD(sqrt) + FD(inversesqrt) + FID(abs) + FID(sign) + FD(floor) + FD(trunc) + FD(round) + FD(roundEven) + FD(ceil) + FD(fract) add_function("mod", _mod(glsl_type::float_type, glsl_type::float_type), @@ -960,29 +1112,52 @@ builtin_builder::create_builtins() _mod(glsl_type::vec2_type, glsl_type::vec2_type), _mod(glsl_type::vec3_type, glsl_type::vec3_type), _mod(glsl_type::vec4_type, glsl_type::vec4_type), + + _mod(glsl_type::double_type, glsl_type::double_type), + _mod(glsl_type::dvec2_type, glsl_type::double_type), + _mod(glsl_type::dvec3_type, glsl_type::double_type), + _mod(glsl_type::dvec4_type, glsl_type::double_type), + + _mod(glsl_type::dvec2_type, glsl_type::dvec2_type), + _mod(glsl_type::dvec3_type, glsl_type::dvec3_type), + _mod(glsl_type::dvec4_type, glsl_type::dvec4_type), NULL); - F(modf) + FD(modf) - FIU2_MIXED(min) - FIU2_MIXED(max) - FIU2_MIXED(clamp) + FIUD2_MIXED(min) + FIUD2_MIXED(max) + FIUD2_MIXED(clamp) add_function("mix", - _mix_lrp(glsl_type::float_type, glsl_type::float_type), - _mix_lrp(glsl_type::vec2_type, glsl_type::float_type), - _mix_lrp(glsl_type::vec3_type, glsl_type::float_type), - _mix_lrp(glsl_type::vec4_type, glsl_type::float_type), + _mix_lrp(always_available, glsl_type::float_type, glsl_type::float_type), + _mix_lrp(always_available, glsl_type::vec2_type, glsl_type::float_type), + _mix_lrp(always_available, glsl_type::vec3_type, glsl_type::float_type), + _mix_lrp(always_available, glsl_type::vec4_type, glsl_type::float_type), - _mix_lrp(glsl_type::vec2_type, glsl_type::vec2_type), - _mix_lrp(glsl_type::vec3_type, glsl_type::vec3_type), - _mix_lrp(glsl_type::vec4_type, glsl_type::vec4_type), + _mix_lrp(always_available, glsl_type::vec2_type, glsl_type::vec2_type), + _mix_lrp(always_available, glsl_type::vec3_type, glsl_type::vec3_type), + _mix_lrp(always_available, glsl_type::vec4_type, glsl_type::vec4_type), + + _mix_lrp(fp64, glsl_type::double_type, glsl_type::double_type), + _mix_lrp(fp64, glsl_type::dvec2_type, glsl_type::double_type), + _mix_lrp(fp64, glsl_type::dvec3_type, glsl_type::double_type), + _mix_lrp(fp64, glsl_type::dvec4_type, glsl_type::double_type), + + _mix_lrp(fp64, glsl_type::dvec2_type, glsl_type::dvec2_type), + _mix_lrp(fp64, glsl_type::dvec3_type, glsl_type::dvec3_type), + _mix_lrp(fp64, glsl_type::dvec4_type, glsl_type::dvec4_type), _mix_sel(v130, glsl_type::float_type, glsl_type::bool_type), _mix_sel(v130, glsl_type::vec2_type, glsl_type::bvec2_type), _mix_sel(v130, glsl_type::vec3_type, glsl_type::bvec3_type), _mix_sel(v130, glsl_type::vec4_type, glsl_type::bvec4_type), + _mix_sel(fp64, glsl_type::double_type, glsl_type::bool_type), + _mix_sel(fp64, glsl_type::dvec2_type, glsl_type::bvec2_type), + _mix_sel(fp64, glsl_type::dvec3_type, glsl_type::bvec3_type), + _mix_sel(fp64, glsl_type::dvec4_type, glsl_type::bvec4_type), + _mix_sel(shader_integer_mix, glsl_type::int_type, glsl_type::bool_type), _mix_sel(shader_integer_mix, glsl_type::ivec2_type, glsl_type::bvec2_type), _mix_sel(shader_integer_mix, glsl_type::ivec3_type, glsl_type::bvec3_type), @@ -1000,29 +1175,45 @@ builtin_builder::create_builtins() NULL); add_function("step", - _step(glsl_type::float_type, glsl_type::float_type), - _step(glsl_type::float_type, glsl_type::vec2_type), - _step(glsl_type::float_type, glsl_type::vec3_type), - _step(glsl_type::float_type, glsl_type::vec4_type), - - _step(glsl_type::vec2_type, glsl_type::vec2_type), - _step(glsl_type::vec3_type, glsl_type::vec3_type), - _step(glsl_type::vec4_type, glsl_type::vec4_type), + _step(always_available, glsl_type::float_type, glsl_type::float_type), + _step(always_available, glsl_type::float_type, glsl_type::vec2_type), + _step(always_available, glsl_type::float_type, glsl_type::vec3_type), + _step(always_available, glsl_type::float_type, glsl_type::vec4_type), + + _step(always_available, glsl_type::vec2_type, glsl_type::vec2_type), + _step(always_available, glsl_type::vec3_type, glsl_type::vec3_type), + _step(always_available, glsl_type::vec4_type, glsl_type::vec4_type), + _step(fp64, glsl_type::double_type, glsl_type::double_type), + _step(fp64, glsl_type::double_type, glsl_type::dvec2_type), + _step(fp64, glsl_type::double_type, glsl_type::dvec3_type), + _step(fp64, glsl_type::double_type, glsl_type::dvec4_type), + + _step(fp64, glsl_type::dvec2_type, glsl_type::dvec2_type), + _step(fp64, glsl_type::dvec3_type, glsl_type::dvec3_type), + _step(fp64, glsl_type::dvec4_type, glsl_type::dvec4_type), NULL); add_function("smoothstep", - _smoothstep(glsl_type::float_type, glsl_type::float_type), - _smoothstep(glsl_type::float_type, glsl_type::vec2_type), - _smoothstep(glsl_type::float_type, glsl_type::vec3_type), - _smoothstep(glsl_type::float_type, glsl_type::vec4_type), - - _smoothstep(glsl_type::vec2_type, glsl_type::vec2_type), - _smoothstep(glsl_type::vec3_type, glsl_type::vec3_type), - _smoothstep(glsl_type::vec4_type, glsl_type::vec4_type), + _smoothstep(always_available, glsl_type::float_type, glsl_type::float_type), + _smoothstep(always_available, glsl_type::float_type, glsl_type::vec2_type), + _smoothstep(always_available, glsl_type::float_type, glsl_type::vec3_type), + _smoothstep(always_available, glsl_type::float_type, glsl_type::vec4_type), + + _smoothstep(always_available, glsl_type::vec2_type, glsl_type::vec2_type), + _smoothstep(always_available, glsl_type::vec3_type, glsl_type::vec3_type), + _smoothstep(always_available, glsl_type::vec4_type, glsl_type::vec4_type), + _smoothstep(fp64, glsl_type::double_type, glsl_type::double_type), + _smoothstep(fp64, glsl_type::double_type, glsl_type::dvec2_type), + _smoothstep(fp64, glsl_type::double_type, glsl_type::dvec3_type), + _smoothstep(fp64, glsl_type::double_type, glsl_type::dvec4_type), + + _smoothstep(fp64, glsl_type::dvec2_type, glsl_type::dvec2_type), + _smoothstep(fp64, glsl_type::dvec3_type, glsl_type::dvec3_type), + _smoothstep(fp64, glsl_type::dvec4_type, glsl_type::dvec4_type), NULL); - F(isnan) - F(isinf) + FD130(isnan) + FD130(isinf) F(floatBitsToInt) F(floatBitsToUint) @@ -1041,76 +1232,114 @@ builtin_builder::create_builtins() add_function("packUnorm2x16", _packUnorm2x16(shader_packing_or_es3_or_gpu_shader5), NULL); add_function("packSnorm2x16", _packSnorm2x16(shader_packing_or_es3), NULL); - add_function("packUnorm4x8", _packUnorm4x8(shader_packing_or_gpu_shader5), NULL); - add_function("packSnorm4x8", _packSnorm4x8(shader_packing_or_gpu_shader5), NULL); + add_function("packUnorm4x8", _packUnorm4x8(shader_packing_or_es31_or_gpu_shader5), NULL); + add_function("packSnorm4x8", _packSnorm4x8(shader_packing_or_es31_or_gpu_shader5), NULL); add_function("unpackUnorm2x16", _unpackUnorm2x16(shader_packing_or_es3_or_gpu_shader5), NULL); add_function("unpackSnorm2x16", _unpackSnorm2x16(shader_packing_or_es3), NULL); - add_function("unpackUnorm4x8", _unpackUnorm4x8(shader_packing_or_gpu_shader5), NULL); - add_function("unpackSnorm4x8", _unpackSnorm4x8(shader_packing_or_gpu_shader5), NULL); + add_function("unpackUnorm4x8", _unpackUnorm4x8(shader_packing_or_es31_or_gpu_shader5), NULL); + add_function("unpackSnorm4x8", _unpackSnorm4x8(shader_packing_or_es31_or_gpu_shader5), NULL); add_function("packHalf2x16", _packHalf2x16(shader_packing_or_es3), NULL); add_function("unpackHalf2x16", _unpackHalf2x16(shader_packing_or_es3), NULL); + add_function("packDouble2x32", _packDouble2x32(fp64), NULL); + add_function("unpackDouble2x32", _unpackDouble2x32(fp64), NULL); - F(length) - F(distance) - F(dot) - add_function("cross", _cross(glsl_type::vec3_type), NULL); + FD(length) + FD(distance) + FD(dot) - F(normalize) + add_function("cross", _cross(always_available, glsl_type::vec3_type), + _cross(fp64, glsl_type::dvec3_type), NULL); + + FD(normalize) add_function("ftransform", _ftransform(), NULL); - F(faceforward) - F(reflect) - F(refract) + FD(faceforward) + FD(reflect) + FD(refract) // ... add_function("matrixCompMult", - _matrixCompMult(glsl_type::mat2_type), - _matrixCompMult(glsl_type::mat3_type), - _matrixCompMult(glsl_type::mat4_type), - _matrixCompMult(glsl_type::mat2x3_type), - _matrixCompMult(glsl_type::mat2x4_type), - _matrixCompMult(glsl_type::mat3x2_type), - _matrixCompMult(glsl_type::mat3x4_type), - _matrixCompMult(glsl_type::mat4x2_type), - _matrixCompMult(glsl_type::mat4x3_type), + _matrixCompMult(always_available, glsl_type::mat2_type), + _matrixCompMult(always_available, glsl_type::mat3_type), + _matrixCompMult(always_available, glsl_type::mat4_type), + _matrixCompMult(always_available, glsl_type::mat2x3_type), + _matrixCompMult(always_available, glsl_type::mat2x4_type), + _matrixCompMult(always_available, glsl_type::mat3x2_type), + _matrixCompMult(always_available, glsl_type::mat3x4_type), + _matrixCompMult(always_available, glsl_type::mat4x2_type), + _matrixCompMult(always_available, glsl_type::mat4x3_type), + _matrixCompMult(fp64, glsl_type::dmat2_type), + _matrixCompMult(fp64, glsl_type::dmat3_type), + _matrixCompMult(fp64, glsl_type::dmat4_type), + _matrixCompMult(fp64, glsl_type::dmat2x3_type), + _matrixCompMult(fp64, glsl_type::dmat2x4_type), + _matrixCompMult(fp64, glsl_type::dmat3x2_type), + _matrixCompMult(fp64, glsl_type::dmat3x4_type), + _matrixCompMult(fp64, glsl_type::dmat4x2_type), + _matrixCompMult(fp64, glsl_type::dmat4x3_type), NULL); add_function("outerProduct", - _outerProduct(glsl_type::mat2_type), - _outerProduct(glsl_type::mat3_type), - _outerProduct(glsl_type::mat4_type), - _outerProduct(glsl_type::mat2x3_type), - _outerProduct(glsl_type::mat2x4_type), - _outerProduct(glsl_type::mat3x2_type), - _outerProduct(glsl_type::mat3x4_type), - _outerProduct(glsl_type::mat4x2_type), - _outerProduct(glsl_type::mat4x3_type), + _outerProduct(v120, glsl_type::mat2_type), + _outerProduct(v120, glsl_type::mat3_type), + _outerProduct(v120, glsl_type::mat4_type), + _outerProduct(v120, glsl_type::mat2x3_type), + _outerProduct(v120, glsl_type::mat2x4_type), + _outerProduct(v120, glsl_type::mat3x2_type), + _outerProduct(v120, glsl_type::mat3x4_type), + _outerProduct(v120, glsl_type::mat4x2_type), + _outerProduct(v120, glsl_type::mat4x3_type), + _outerProduct(fp64, glsl_type::dmat2_type), + _outerProduct(fp64, glsl_type::dmat3_type), + _outerProduct(fp64, glsl_type::dmat4_type), + _outerProduct(fp64, glsl_type::dmat2x3_type), + _outerProduct(fp64, glsl_type::dmat2x4_type), + _outerProduct(fp64, glsl_type::dmat3x2_type), + _outerProduct(fp64, glsl_type::dmat3x4_type), + _outerProduct(fp64, glsl_type::dmat4x2_type), + _outerProduct(fp64, glsl_type::dmat4x3_type), NULL); add_function("determinant", - _determinant_mat2(), - _determinant_mat3(), - _determinant_mat4(), + _determinant_mat2(v120, glsl_type::mat2_type), + _determinant_mat3(v120, glsl_type::mat3_type), + _determinant_mat4(v120, glsl_type::mat4_type), + _determinant_mat2(fp64, glsl_type::dmat2_type), + _determinant_mat3(fp64, glsl_type::dmat3_type), + _determinant_mat4(fp64, glsl_type::dmat4_type), + NULL); add_function("inverse", - _inverse_mat2(), - _inverse_mat3(), - _inverse_mat4(), + _inverse_mat2(v120, glsl_type::mat2_type), + _inverse_mat3(v120, glsl_type::mat3_type), + _inverse_mat4(v120, glsl_type::mat4_type), + _inverse_mat2(fp64, glsl_type::dmat2_type), + _inverse_mat3(fp64, glsl_type::dmat3_type), + _inverse_mat4(fp64, glsl_type::dmat4_type), NULL); add_function("transpose", - _transpose(glsl_type::mat2_type), - _transpose(glsl_type::mat3_type), - _transpose(glsl_type::mat4_type), - _transpose(glsl_type::mat2x3_type), - _transpose(glsl_type::mat2x4_type), - _transpose(glsl_type::mat3x2_type), - _transpose(glsl_type::mat3x4_type), - _transpose(glsl_type::mat4x2_type), - _transpose(glsl_type::mat4x3_type), + _transpose(v120, glsl_type::mat2_type), + _transpose(v120, glsl_type::mat3_type), + _transpose(v120, glsl_type::mat4_type), + _transpose(v120, glsl_type::mat2x3_type), + _transpose(v120, glsl_type::mat2x4_type), + _transpose(v120, glsl_type::mat3x2_type), + _transpose(v120, glsl_type::mat3x4_type), + _transpose(v120, glsl_type::mat4x2_type), + _transpose(v120, glsl_type::mat4x3_type), + _transpose(fp64, glsl_type::dmat2_type), + _transpose(fp64, glsl_type::dmat3_type), + _transpose(fp64, glsl_type::dmat4_type), + _transpose(fp64, glsl_type::dmat2x3_type), + _transpose(fp64, glsl_type::dmat2x4_type), + _transpose(fp64, glsl_type::dmat3x2_type), + _transpose(fp64, glsl_type::dmat3x4_type), + _transpose(fp64, glsl_type::dmat4x2_type), + _transpose(fp64, glsl_type::dmat4x3_type), NULL); - FIU(lessThan) - FIU(lessThanEqual) - FIU(greaterThan) - FIU(greaterThanEqual) - FIUB(notEqual) - FIUB(equal) + FIUD(lessThan) + FIUD(lessThanEqual) + FIUD(greaterThan) + FIUD(greaterThanEqual) + FIUBD(notEqual) + FIUBD(equal) add_function("any", _any(glsl_type::bvec2_type), @@ -1178,9 +1407,9 @@ builtin_builder::create_builtins() _textureSize(texture_multisample, glsl_type::ivec2_type, glsl_type::isampler2DMS_type), _textureSize(texture_multisample, glsl_type::ivec2_type, glsl_type::usampler2DMS_type), - _textureSize(texture_multisample, glsl_type::ivec3_type, glsl_type::sampler2DMSArray_type), - _textureSize(texture_multisample, glsl_type::ivec3_type, glsl_type::isampler2DMSArray_type), - _textureSize(texture_multisample, glsl_type::ivec3_type, glsl_type::usampler2DMSArray_type), + _textureSize(texture_multisample_array, glsl_type::ivec3_type, glsl_type::sampler2DMSArray_type), + _textureSize(texture_multisample_array, glsl_type::ivec3_type, glsl_type::isampler2DMSArray_type), + _textureSize(texture_multisample_array, glsl_type::ivec3_type, glsl_type::usampler2DMSArray_type), NULL); add_function("texture", @@ -1443,9 +1672,9 @@ builtin_builder::create_builtins() _texelFetch(texture_multisample, glsl_type::ivec4_type, glsl_type::isampler2DMS_type, glsl_type::ivec2_type), _texelFetch(texture_multisample, glsl_type::uvec4_type, glsl_type::usampler2DMS_type, glsl_type::ivec2_type), - _texelFetch(texture_multisample, glsl_type::vec4_type, glsl_type::sampler2DMSArray_type, glsl_type::ivec3_type), - _texelFetch(texture_multisample, glsl_type::ivec4_type, glsl_type::isampler2DMSArray_type, glsl_type::ivec3_type), - _texelFetch(texture_multisample, glsl_type::uvec4_type, glsl_type::usampler2DMSArray_type, glsl_type::ivec3_type), + _texelFetch(texture_multisample_array, glsl_type::vec4_type, glsl_type::sampler2DMSArray_type, glsl_type::ivec3_type), + _texelFetch(texture_multisample_array, glsl_type::ivec4_type, glsl_type::isampler2DMSArray_type, glsl_type::ivec3_type), + _texelFetch(texture_multisample_array, glsl_type::uvec4_type, glsl_type::usampler2DMSArray_type, glsl_type::ivec3_type), NULL); add_function("texelFetchOffset", @@ -1752,42 +1981,80 @@ builtin_builder::create_builtins() _EndStreamPrimitive(gs_streams, glsl_type::uint_type), _EndStreamPrimitive(gs_streams, glsl_type::int_type), NULL); + add_function("barrier", _barrier(), NULL); add_function("textureQueryLOD", - _textureQueryLod(glsl_type::sampler1D_type, glsl_type::float_type), - _textureQueryLod(glsl_type::isampler1D_type, glsl_type::float_type), - _textureQueryLod(glsl_type::usampler1D_type, glsl_type::float_type), - - _textureQueryLod(glsl_type::sampler2D_type, glsl_type::vec2_type), - _textureQueryLod(glsl_type::isampler2D_type, glsl_type::vec2_type), - _textureQueryLod(glsl_type::usampler2D_type, glsl_type::vec2_type), - - _textureQueryLod(glsl_type::sampler3D_type, glsl_type::vec3_type), - _textureQueryLod(glsl_type::isampler3D_type, glsl_type::vec3_type), - _textureQueryLod(glsl_type::usampler3D_type, glsl_type::vec3_type), - - _textureQueryLod(glsl_type::samplerCube_type, glsl_type::vec3_type), - _textureQueryLod(glsl_type::isamplerCube_type, glsl_type::vec3_type), - _textureQueryLod(glsl_type::usamplerCube_type, glsl_type::vec3_type), - - _textureQueryLod(glsl_type::sampler1DArray_type, glsl_type::float_type), - _textureQueryLod(glsl_type::isampler1DArray_type, glsl_type::float_type), - _textureQueryLod(glsl_type::usampler1DArray_type, glsl_type::float_type), - - _textureQueryLod(glsl_type::sampler2DArray_type, glsl_type::vec2_type), - _textureQueryLod(glsl_type::isampler2DArray_type, glsl_type::vec2_type), - _textureQueryLod(glsl_type::usampler2DArray_type, glsl_type::vec2_type), - - _textureQueryLod(glsl_type::samplerCubeArray_type, glsl_type::vec3_type), - _textureQueryLod(glsl_type::isamplerCubeArray_type, glsl_type::vec3_type), - _textureQueryLod(glsl_type::usamplerCubeArray_type, glsl_type::vec3_type), - - _textureQueryLod(glsl_type::sampler1DShadow_type, glsl_type::float_type), - _textureQueryLod(glsl_type::sampler2DShadow_type, glsl_type::vec2_type), - _textureQueryLod(glsl_type::samplerCubeShadow_type, glsl_type::vec3_type), - _textureQueryLod(glsl_type::sampler1DArrayShadow_type, glsl_type::float_type), - _textureQueryLod(glsl_type::sampler2DArrayShadow_type, glsl_type::vec2_type), - _textureQueryLod(glsl_type::samplerCubeArrayShadow_type, glsl_type::vec3_type), + _textureQueryLod(texture_query_lod, glsl_type::sampler1D_type, glsl_type::float_type), + _textureQueryLod(texture_query_lod, glsl_type::isampler1D_type, glsl_type::float_type), + _textureQueryLod(texture_query_lod, glsl_type::usampler1D_type, glsl_type::float_type), + + _textureQueryLod(texture_query_lod, glsl_type::sampler2D_type, glsl_type::vec2_type), + _textureQueryLod(texture_query_lod, glsl_type::isampler2D_type, glsl_type::vec2_type), + _textureQueryLod(texture_query_lod, glsl_type::usampler2D_type, glsl_type::vec2_type), + + _textureQueryLod(texture_query_lod, glsl_type::sampler3D_type, glsl_type::vec3_type), + _textureQueryLod(texture_query_lod, glsl_type::isampler3D_type, glsl_type::vec3_type), + _textureQueryLod(texture_query_lod, glsl_type::usampler3D_type, glsl_type::vec3_type), + + _textureQueryLod(texture_query_lod, glsl_type::samplerCube_type, glsl_type::vec3_type), + _textureQueryLod(texture_query_lod, glsl_type::isamplerCube_type, glsl_type::vec3_type), + _textureQueryLod(texture_query_lod, glsl_type::usamplerCube_type, glsl_type::vec3_type), + + _textureQueryLod(texture_query_lod, glsl_type::sampler1DArray_type, glsl_type::float_type), + _textureQueryLod(texture_query_lod, glsl_type::isampler1DArray_type, glsl_type::float_type), + _textureQueryLod(texture_query_lod, glsl_type::usampler1DArray_type, glsl_type::float_type), + + _textureQueryLod(texture_query_lod, glsl_type::sampler2DArray_type, glsl_type::vec2_type), + _textureQueryLod(texture_query_lod, glsl_type::isampler2DArray_type, glsl_type::vec2_type), + _textureQueryLod(texture_query_lod, glsl_type::usampler2DArray_type, glsl_type::vec2_type), + + _textureQueryLod(texture_query_lod, glsl_type::samplerCubeArray_type, glsl_type::vec3_type), + _textureQueryLod(texture_query_lod, glsl_type::isamplerCubeArray_type, glsl_type::vec3_type), + _textureQueryLod(texture_query_lod, glsl_type::usamplerCubeArray_type, glsl_type::vec3_type), + + _textureQueryLod(texture_query_lod, glsl_type::sampler1DShadow_type, glsl_type::float_type), + _textureQueryLod(texture_query_lod, glsl_type::sampler2DShadow_type, glsl_type::vec2_type), + _textureQueryLod(texture_query_lod, glsl_type::samplerCubeShadow_type, glsl_type::vec3_type), + _textureQueryLod(texture_query_lod, glsl_type::sampler1DArrayShadow_type, glsl_type::float_type), + _textureQueryLod(texture_query_lod, glsl_type::sampler2DArrayShadow_type, glsl_type::vec2_type), + _textureQueryLod(texture_query_lod, glsl_type::samplerCubeArrayShadow_type, glsl_type::vec3_type), + NULL); + + add_function("textureQueryLod", + _textureQueryLod(v400_fs_only, glsl_type::sampler1D_type, glsl_type::float_type), + _textureQueryLod(v400_fs_only, glsl_type::isampler1D_type, glsl_type::float_type), + _textureQueryLod(v400_fs_only, glsl_type::usampler1D_type, glsl_type::float_type), + + _textureQueryLod(v400_fs_only, glsl_type::sampler2D_type, glsl_type::vec2_type), + _textureQueryLod(v400_fs_only, glsl_type::isampler2D_type, glsl_type::vec2_type), + _textureQueryLod(v400_fs_only, glsl_type::usampler2D_type, glsl_type::vec2_type), + + _textureQueryLod(v400_fs_only, glsl_type::sampler3D_type, glsl_type::vec3_type), + _textureQueryLod(v400_fs_only, glsl_type::isampler3D_type, glsl_type::vec3_type), + _textureQueryLod(v400_fs_only, glsl_type::usampler3D_type, glsl_type::vec3_type), + + _textureQueryLod(v400_fs_only, glsl_type::samplerCube_type, glsl_type::vec3_type), + _textureQueryLod(v400_fs_only, glsl_type::isamplerCube_type, glsl_type::vec3_type), + _textureQueryLod(v400_fs_only, glsl_type::usamplerCube_type, glsl_type::vec3_type), + + _textureQueryLod(v400_fs_only, glsl_type::sampler1DArray_type, glsl_type::float_type), + _textureQueryLod(v400_fs_only, glsl_type::isampler1DArray_type, glsl_type::float_type), + _textureQueryLod(v400_fs_only, glsl_type::usampler1DArray_type, glsl_type::float_type), + + _textureQueryLod(v400_fs_only, glsl_type::sampler2DArray_type, glsl_type::vec2_type), + _textureQueryLod(v400_fs_only, glsl_type::isampler2DArray_type, glsl_type::vec2_type), + _textureQueryLod(v400_fs_only, glsl_type::usampler2DArray_type, glsl_type::vec2_type), + + _textureQueryLod(v400_fs_only, glsl_type::samplerCubeArray_type, glsl_type::vec3_type), + _textureQueryLod(v400_fs_only, glsl_type::isamplerCubeArray_type, glsl_type::vec3_type), + _textureQueryLod(v400_fs_only, glsl_type::usamplerCubeArray_type, glsl_type::vec3_type), + + _textureQueryLod(v400_fs_only, glsl_type::sampler1DShadow_type, glsl_type::float_type), + _textureQueryLod(v400_fs_only, glsl_type::sampler2DShadow_type, glsl_type::vec2_type), + _textureQueryLod(v400_fs_only, glsl_type::samplerCubeShadow_type, glsl_type::vec3_type), + _textureQueryLod(v400_fs_only, glsl_type::sampler1DArrayShadow_type, glsl_type::float_type), + _textureQueryLod(v400_fs_only, glsl_type::sampler2DArrayShadow_type, glsl_type::vec2_type), + _textureQueryLod(v400_fs_only, glsl_type::samplerCubeArrayShadow_type, glsl_type::vec3_type), NULL); add_function("textureQueryLevels", @@ -1882,8 +2149,8 @@ builtin_builder::create_builtins() NULL); add_function("texture2DProjLod", - _texture(ir_txl, v110_lod, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT), - _texture(ir_txl, v110_lod, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txl, lod_exists_in_stage, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_txl, lod_exists_in_stage, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT), NULL); add_function("texture3D", @@ -1910,7 +2177,7 @@ builtin_builder::create_builtins() NULL); add_function("textureCubeLod", - _texture(ir_txl, v110_lod, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), + _texture(ir_txl, lod_exists_in_stage, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), NULL); add_function("texture2DRect", @@ -2044,61 +2311,69 @@ builtin_builder::create_builtins() NULL); add_function("textureGather", - _texture(ir_tg4, texture_gather, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type), - _texture(ir_tg4, texture_gather, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type), - _texture(ir_tg4, texture_gather, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type), _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type), _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type), _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type), - _texture(ir_tg4, texture_gather, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type), - _texture(ir_tg4, texture_gather, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type), - _texture(ir_tg4, texture_gather, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type), - _texture(ir_tg4, texture_gather, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), - _texture(ir_tg4, texture_gather, glsl_type::ivec4_type, glsl_type::isamplerCube_type, glsl_type::vec3_type), - _texture(ir_tg4, texture_gather, glsl_type::uvec4_type, glsl_type::usamplerCube_type, glsl_type::vec3_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::ivec4_type, glsl_type::isamplerCube_type, glsl_type::vec3_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::uvec4_type, glsl_type::usamplerCube_type, glsl_type::vec3_type), _texture(ir_tg4, texture_gather, glsl_type::vec4_type, glsl_type::samplerCubeArray_type, glsl_type::vec4_type), _texture(ir_tg4, texture_gather, glsl_type::ivec4_type, glsl_type::isamplerCubeArray_type, glsl_type::vec4_type), _texture(ir_tg4, texture_gather, glsl_type::uvec4_type, glsl_type::usamplerCubeArray_type, glsl_type::vec4_type), - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_COMPONENT), _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type, TEX_COMPONENT), _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type, TEX_COMPONENT), _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type, TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type, TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isamplerCube_type, glsl_type::vec3_type, TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usamplerCube_type, glsl_type::vec3_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::ivec4_type, glsl_type::isamplerCube_type, glsl_type::vec3_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::uvec4_type, glsl_type::usamplerCube_type, glsl_type::vec3_type, TEX_COMPONENT), _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::samplerCubeArray_type, glsl_type::vec4_type, TEX_COMPONENT), _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isamplerCubeArray_type, glsl_type::vec4_type, TEX_COMPONENT), _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usamplerCubeArray_type, glsl_type::vec4_type, TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec2_type), - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec3_type), - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::samplerCubeShadow_type, glsl_type::vec3_type), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec2_type), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec3_type), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::vec4_type, glsl_type::samplerCubeShadow_type, glsl_type::vec3_type), _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::samplerCubeArrayShadow_type, glsl_type::vec4_type), _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec2_type), NULL); add_function("textureGatherOffset", - _texture(ir_tg4, texture_gather_only, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET), - _texture(ir_tg4, texture_gather_only, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET), - _texture(ir_tg4, texture_gather_only, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_tg4, texture_gather_only_or_es31, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_tg4, texture_gather_only_or_es31, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_tg4, texture_gather_only_or_es31, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + + _texture(ir_tg4, texture_gather_only_or_es31, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_tg4, texture_gather_only_or_es31, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_tg4, texture_gather_only_or_es31, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), - _texture(ir_tg4, texture_gather_only, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), - _texture(ir_tg4, texture_gather_only, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), - _texture(ir_tg4, texture_gather_only, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_tg4, es31, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET | TEX_COMPONENT), + _texture(ir_tg4, es31, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET | TEX_COMPONENT), + _texture(ir_tg4, es31, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET | TEX_COMPONENT), + + _texture(ir_tg4, es31, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET | TEX_COMPONENT), + _texture(ir_tg4, es31, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET | TEX_COMPONENT), + _texture(ir_tg4, es31, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET | TEX_COMPONENT), _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), @@ -2127,6 +2402,9 @@ builtin_builder::create_builtins() _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST), _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), + + _texture(ir_tg4, es31, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_tg4, es31, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec3_type, TEX_OFFSET), NULL); add_function("textureGatherOffsets", @@ -2179,13 +2457,17 @@ builtin_builder::create_builtins() IU(bitCount) IU(findLSB) IU(findMSB) - F(fma) + FDGS5(fma) add_function("ldexp", _ldexp(glsl_type::float_type, glsl_type::int_type), _ldexp(glsl_type::vec2_type, glsl_type::ivec2_type), _ldexp(glsl_type::vec3_type, glsl_type::ivec3_type), _ldexp(glsl_type::vec4_type, glsl_type::ivec4_type), + _ldexp(glsl_type::double_type, glsl_type::int_type), + _ldexp(glsl_type::dvec2_type, glsl_type::ivec2_type), + _ldexp(glsl_type::dvec3_type, glsl_type::ivec3_type), + _ldexp(glsl_type::dvec4_type, glsl_type::ivec4_type), NULL); add_function("frexp", @@ -2193,6 +2475,10 @@ builtin_builder::create_builtins() _frexp(glsl_type::vec2_type, glsl_type::ivec2_type), _frexp(glsl_type::vec3_type, glsl_type::ivec3_type), _frexp(glsl_type::vec4_type, glsl_type::ivec4_type), + _dfrexp(glsl_type::double_type, glsl_type::int_type), + _dfrexp(glsl_type::dvec2_type, glsl_type::ivec2_type), + _dfrexp(glsl_type::dvec3_type, glsl_type::ivec3_type), + _dfrexp(glsl_type::dvec4_type, glsl_type::ivec4_type), NULL); add_function("uaddCarry", _uaddCarry(glsl_type::uint_type), @@ -2309,8 +2595,8 @@ builtin_builder::create_builtins() #undef F #undef FI -#undef FIU -#undef FIUB +#undef FIUD +#undef FIUBD #undef FIU2_MIXED } @@ -2343,6 +2629,7 @@ builtin_builder::add_function(const char *name, ...) void builtin_builder::add_image_function(const char *name, const char *intrinsic_name, + image_prototype_ctr prototype, unsigned num_arguments, unsigned flags) { @@ -2381,12 +2668,13 @@ builtin_builder::add_image_function(const char *name, glsl_type::uimage2DMS_type, glsl_type::uimage2DMSArray_type }; + ir_function *f = new(mem_ctx) ir_function(name); - for (unsigned i = 0; i < Elements(types); ++i) { + for (unsigned i = 0; i < ARRAY_SIZE(types); ++i) { if (types[i]->sampler_type != GLSL_TYPE_FLOAT || (flags & IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE)) - f->add_signature(_image(types[i], intrinsic_name, + f->add_signature(_image(prototype, types[i], intrinsic_name, num_arguments, flags)); } @@ -2399,43 +2687,60 @@ builtin_builder::add_image_functions(bool glsl) const unsigned flags = (glsl ? IMAGE_FUNCTION_EMIT_STUB : 0); add_image_function(glsl ? "imageLoad" : "__intrinsic_image_load", - "__intrinsic_image_load", 0, - (flags | IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE | + "__intrinsic_image_load", + &builtin_builder::_image_prototype, 0, + (flags | IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE | IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE | IMAGE_FUNCTION_READ_ONLY)); add_image_function(glsl ? "imageStore" : "__intrinsic_image_store", - "__intrinsic_image_store", 1, + "__intrinsic_image_store", + &builtin_builder::_image_prototype, 1, (flags | IMAGE_FUNCTION_RETURNS_VOID | IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE | IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE | IMAGE_FUNCTION_WRITE_ONLY)); + const unsigned atom_flags = flags | IMAGE_FUNCTION_AVAIL_ATOMIC; + add_image_function(glsl ? "imageAtomicAdd" : "__intrinsic_image_atomic_add", - "__intrinsic_image_atomic_add", 1, flags); + "__intrinsic_image_atomic_add", + &builtin_builder::_image_prototype, 1, atom_flags); add_image_function(glsl ? "imageAtomicMin" : "__intrinsic_image_atomic_min", - "__intrinsic_image_atomic_min", 1, flags); + "__intrinsic_image_atomic_min", + &builtin_builder::_image_prototype, 1, atom_flags); add_image_function(glsl ? "imageAtomicMax" : "__intrinsic_image_atomic_max", - "__intrinsic_image_atomic_max", 1, flags); + "__intrinsic_image_atomic_max", + &builtin_builder::_image_prototype, 1, atom_flags); add_image_function(glsl ? "imageAtomicAnd" : "__intrinsic_image_atomic_and", - "__intrinsic_image_atomic_and", 1, flags); + "__intrinsic_image_atomic_and", + &builtin_builder::_image_prototype, 1, atom_flags); add_image_function(glsl ? "imageAtomicOr" : "__intrinsic_image_atomic_or", - "__intrinsic_image_atomic_or", 1, flags); + "__intrinsic_image_atomic_or", + &builtin_builder::_image_prototype, 1, atom_flags); add_image_function(glsl ? "imageAtomicXor" : "__intrinsic_image_atomic_xor", - "__intrinsic_image_atomic_xor", 1, flags); + "__intrinsic_image_atomic_xor", + &builtin_builder::_image_prototype, 1, atom_flags); add_image_function((glsl ? "imageAtomicExchange" : "__intrinsic_image_atomic_exchange"), - "__intrinsic_image_atomic_exchange", 1, flags); + "__intrinsic_image_atomic_exchange", + &builtin_builder::_image_prototype, 1, atom_flags); add_image_function((glsl ? "imageAtomicCompSwap" : "__intrinsic_image_atomic_comp_swap"), - "__intrinsic_image_atomic_comp_swap", 2, flags); + "__intrinsic_image_atomic_comp_swap", + &builtin_builder::_image_prototype, 2, atom_flags); + + add_image_function(glsl ? "imageSize" : "__intrinsic_image_size", + "__intrinsic_image_size", + &builtin_builder::_image_size_prototype, 1, + flags | IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE); } ir_variable * @@ -2468,12 +2773,20 @@ builtin_builder::imm(unsigned u, unsigned vector_elements) return new(mem_ctx) ir_constant(u, vector_elements); } +ir_constant * +builtin_builder::imm(double d, unsigned vector_elements) +{ + return new(mem_ctx) ir_constant(d, vector_elements); +} + ir_constant * builtin_builder::imm(const glsl_type *type, const ir_constant_data &data) { return new(mem_ctx) ir_constant(type, &data); } +#define IMM_FP(type, val) (type->base_type == GLSL_TYPE_DOUBLE) ? imm(val) : imm((float)val) + ir_dereference_variable * builtin_builder::var_ref(ir_variable *var) { @@ -2549,6 +2862,13 @@ builtin_builder::_##NAME(const glsl_type *type) \ return unop(&AVAIL, OPCODE, type, type); \ } +#define UNOPA(NAME, OPCODE) \ +ir_function_signature * \ +builtin_builder::_##NAME(builtin_available_predicate avail, const glsl_type *type) \ +{ \ + return unop(avail, OPCODE, type, type); \ +} + ir_function_signature * builtin_builder::binop(ir_expression_operation opcode, builtin_available_predicate avail, @@ -2684,11 +3004,7 @@ builtin_builder::_atan2(const glsl_type *type) ir_factory outer_then(&outer_if->then_instructions, mem_ctx); /* Then...call atan(y/x) */ - ir_variable *y_over_x = outer_then.make_temp(glsl_type::float_type, "y_over_x"); - outer_then.emit(assign(y_over_x, div(y, x))); - outer_then.emit(assign(r, mul(y_over_x, rsq(add(mul(y_over_x, y_over_x), - imm(1.0f)))))); - outer_then.emit(assign(r, asin_expr(r))); + do_atan(body, glsl_type::float_type, r, div(y, x)); /* ...and fix it up: */ ir_if *inner_if = new(mem_ctx) ir_if(less(x, imm(0.0f))); @@ -2711,17 +3027,65 @@ builtin_builder::_atan2(const glsl_type *type) return sig; } +void +builtin_builder::do_atan(ir_factory &body, const glsl_type *type, ir_variable *res, operand y_over_x) +{ + /* + * range-reduction, first step: + * + * / y_over_x if |y_over_x| <= 1.0; + * x = < + * \ 1.0 / y_over_x otherwise + */ + ir_variable *x = body.make_temp(type, "atan_x"); + body.emit(assign(x, div(min2(abs(y_over_x), + imm(1.0f)), + max2(abs(y_over_x), + imm(1.0f))))); + + /* + * approximate atan by evaluating polynomial: + * + * x * 0.9999793128310355 - x^3 * 0.3326756418091246 + + * x^5 * 0.1938924977115610 - x^7 * 0.1173503194786851 + + * x^9 * 0.0536813784310406 - x^11 * 0.0121323213173444 + */ + ir_variable *tmp = body.make_temp(type, "atan_tmp"); + body.emit(assign(tmp, mul(x, x))); + body.emit(assign(tmp, mul(add(mul(sub(mul(add(mul(sub(mul(add(mul(imm(-0.0121323213173444f), + tmp), + imm(0.0536813784310406f)), + tmp), + imm(0.1173503194786851f)), + tmp), + imm(0.1938924977115610f)), + tmp), + imm(0.3326756418091246f)), + tmp), + imm(0.9999793128310355f)), + x))); + + /* range-reduction fixup */ + body.emit(assign(tmp, add(tmp, + mul(b2f(greater(abs(y_over_x), + imm(1.0f, type->components()))), + add(mul(tmp, + imm(-2.0f)), + imm(M_PI_2f)))))); + + /* sign fixup */ + body.emit(assign(res, mul(tmp, sign(y_over_x)))); +} + ir_function_signature * builtin_builder::_atan(const glsl_type *type) { ir_variable *y_over_x = in_var(type, "y_over_x"); MAKE_SIG(type, always_available, 1, y_over_x); - ir_variable *t = body.make_temp(type, "t"); - body.emit(assign(t, mul(y_over_x, rsq(add(mul(y_over_x, y_over_x), - imm(1.0f)))))); - - body.emit(ret(asin_expr(t))); + ir_variable *tmp = body.make_temp(type, "tmp"); + do_atan(body, type, tmp, y_over_x); + body.emit(ret(tmp)); return sig; } @@ -2810,19 +3174,19 @@ UNOP(exp, ir_unop_exp, always_available) UNOP(log, ir_unop_log, always_available) UNOP(exp2, ir_unop_exp2, always_available) UNOP(log2, ir_unop_log2, always_available) -UNOP(sqrt, ir_unop_sqrt, always_available) -UNOP(inversesqrt, ir_unop_rsq, always_available) +UNOPA(sqrt, ir_unop_sqrt) +UNOPA(inversesqrt, ir_unop_rsq) /** @} */ -UNOP(abs, ir_unop_abs, always_available) -UNOP(sign, ir_unop_sign, always_available) -UNOP(floor, ir_unop_floor, always_available) -UNOP(trunc, ir_unop_trunc, v130) -UNOP(round, ir_unop_round_even, always_available) -UNOP(roundEven, ir_unop_round_even, always_available) -UNOP(ceil, ir_unop_ceil, always_available) -UNOP(fract, ir_unop_fract, always_available) +UNOPA(abs, ir_unop_abs) +UNOPA(sign, ir_unop_sign) +UNOPA(floor, ir_unop_floor) +UNOPA(trunc, ir_unop_trunc) +UNOPA(round, ir_unop_round_even) +UNOPA(roundEven, ir_unop_round_even) +UNOPA(ceil, ir_unop_ceil) +UNOPA(fract, ir_unop_fract) ir_function_signature * builtin_builder::_mod(const glsl_type *x_type, const glsl_type *y_type) @@ -2831,11 +3195,11 @@ builtin_builder::_mod(const glsl_type *x_type, const glsl_type *y_type) } ir_function_signature * -builtin_builder::_modf(const glsl_type *type) +builtin_builder::_modf(builtin_available_predicate avail, const glsl_type *type) { ir_variable *x = in_var(type, "x"); ir_variable *i = out_var(type, "i"); - MAKE_SIG(type, v130, 2, x, i); + MAKE_SIG(type, avail, 2, x, i); ir_variable *t = body.make_temp(type, "t"); body.emit(assign(t, expr(ir_unop_trunc, x))); @@ -2874,12 +3238,12 @@ builtin_builder::_clamp(builtin_available_predicate avail, } ir_function_signature * -builtin_builder::_mix_lrp(const glsl_type *val_type, const glsl_type *blend_type) +builtin_builder::_mix_lrp(builtin_available_predicate avail, const glsl_type *val_type, const glsl_type *blend_type) { ir_variable *x = in_var(val_type, "x"); ir_variable *y = in_var(val_type, "y"); ir_variable *a = in_var(blend_type, "a"); - MAKE_SIG(val_type, always_available, 3, x, y, a); + MAKE_SIG(val_type, avail, 3, x, y, a); body.emit(ret(lrp(x, y, a))); @@ -2909,26 +3273,37 @@ builtin_builder::_mix_sel(builtin_available_predicate avail, } ir_function_signature * -builtin_builder::_step(const glsl_type *edge_type, const glsl_type *x_type) +builtin_builder::_step(builtin_available_predicate avail, const glsl_type *edge_type, const glsl_type *x_type) { ir_variable *edge = in_var(edge_type, "edge"); ir_variable *x = in_var(x_type, "x"); - MAKE_SIG(x_type, always_available, 2, edge, x); + MAKE_SIG(x_type, avail, 2, edge, x); ir_variable *t = body.make_temp(x_type, "t"); if (x_type->vector_elements == 1) { /* Both are floats */ - body.emit(assign(t, b2f(gequal(x, edge)))); + if (edge_type->base_type == GLSL_TYPE_DOUBLE) + body.emit(assign(t, f2d(b2f(gequal(x, edge))))); + else + body.emit(assign(t, b2f(gequal(x, edge)))); } else if (edge_type->vector_elements == 1) { /* x is a vector but edge is a float */ for (int i = 0; i < x_type->vector_elements; i++) { - body.emit(assign(t, b2f(gequal(swizzle(x, i, 1), edge)), 1 << i)); + if (edge_type->base_type == GLSL_TYPE_DOUBLE) + body.emit(assign(t, f2d(b2f(gequal(swizzle(x, i, 1), edge))), 1 << i)); + else + body.emit(assign(t, b2f(gequal(swizzle(x, i, 1), edge)), 1 << i)); } } else { /* Both are vectors */ for (int i = 0; i < x_type->vector_elements; i++) { - body.emit(assign(t, b2f(gequal(swizzle(x, i, 1), swizzle(edge, i, 1))), - 1 << i)); + if (edge_type->base_type == GLSL_TYPE_DOUBLE) + body.emit(assign(t, f2d(b2f(gequal(swizzle(x, i, 1), swizzle(edge, i, 1)))), + 1 << i)); + else + body.emit(assign(t, b2f(gequal(swizzle(x, i, 1), swizzle(edge, i, 1))), + 1 << i)); + } } body.emit(ret(t)); @@ -2937,12 +3312,12 @@ builtin_builder::_step(const glsl_type *edge_type, const glsl_type *x_type) } ir_function_signature * -builtin_builder::_smoothstep(const glsl_type *edge_type, const glsl_type *x_type) +builtin_builder::_smoothstep(builtin_available_predicate avail, const glsl_type *edge_type, const glsl_type *x_type) { ir_variable *edge0 = in_var(edge_type, "edge0"); ir_variable *edge1 = in_var(edge_type, "edge1"); ir_variable *x = in_var(x_type, "x"); - MAKE_SIG(x_type, always_available, 3, edge0, edge1, x); + MAKE_SIG(x_type, avail, 3, edge0, edge1, x); /* From the GLSL 1.10 specification: * @@ -2953,18 +3328,18 @@ builtin_builder::_smoothstep(const glsl_type *edge_type, const glsl_type *x_type ir_variable *t = body.make_temp(x_type, "t"); body.emit(assign(t, clamp(div(sub(x, edge0), sub(edge1, edge0)), - imm(0.0f), imm(1.0f)))); + IMM_FP(x_type, 0.0), IMM_FP(x_type, 1.0)))); - body.emit(ret(mul(t, mul(t, sub(imm(3.0f), mul(imm(2.0f), t)))))); + body.emit(ret(mul(t, mul(t, sub(IMM_FP(x_type, 3.0), mul(IMM_FP(x_type, 2.0), t)))))); return sig; } ir_function_signature * -builtin_builder::_isnan(const glsl_type *type) +builtin_builder::_isnan(builtin_available_predicate avail, const glsl_type *type) { ir_variable *x = in_var(type, "x"); - MAKE_SIG(glsl_type::bvec(type->vector_elements), v130, 1, x); + MAKE_SIG(glsl_type::bvec(type->vector_elements), avail, 1, x); body.emit(ret(nequal(x, x))); @@ -2972,14 +3347,14 @@ builtin_builder::_isnan(const glsl_type *type) } ir_function_signature * -builtin_builder::_isinf(const glsl_type *type) +builtin_builder::_isinf(builtin_available_predicate avail, const glsl_type *type) { ir_variable *x = in_var(type, "x"); - MAKE_SIG(glsl_type::bvec(type->vector_elements), v130, 1, x); + MAKE_SIG(glsl_type::bvec(type->vector_elements), avail, 1, x); ir_constant_data infinities; for (int i = 0; i < type->vector_elements; i++) { - infinities.f[i] = std::numeric_limits::infinity(); + infinities.f[i] = INFINITY; } body.emit(ret(equal(abs(x), imm(type, infinities)))); @@ -3115,10 +3490,28 @@ builtin_builder::_unpackHalf2x16(builtin_available_predicate avail) } ir_function_signature * -builtin_builder::_length(const glsl_type *type) +builtin_builder::_packDouble2x32(builtin_available_predicate avail) +{ + ir_variable *v = in_var(glsl_type::uvec2_type, "v"); + MAKE_SIG(glsl_type::double_type, avail, 1, v); + body.emit(ret(expr(ir_unop_pack_double_2x32, v))); + return sig; +} + +ir_function_signature * +builtin_builder::_unpackDouble2x32(builtin_available_predicate avail) +{ + ir_variable *p = in_var(glsl_type::double_type, "p"); + MAKE_SIG(glsl_type::uvec2_type, avail, 1, p); + body.emit(ret(expr(ir_unop_unpack_double_2x32, p))); + return sig; +} + +ir_function_signature * +builtin_builder::_length(builtin_available_predicate avail, const glsl_type *type) { ir_variable *x = in_var(type, "x"); - MAKE_SIG(glsl_type::float_type, always_available, 1, x); + MAKE_SIG(type->get_base_type(), avail, 1, x); body.emit(ret(sqrt(dot(x, x)))); @@ -3126,11 +3519,11 @@ builtin_builder::_length(const glsl_type *type) } ir_function_signature * -builtin_builder::_distance(const glsl_type *type) +builtin_builder::_distance(builtin_available_predicate avail, const glsl_type *type) { ir_variable *p0 = in_var(type, "p0"); ir_variable *p1 = in_var(type, "p1"); - MAKE_SIG(glsl_type::float_type, always_available, 2, p0, p1); + MAKE_SIG(type->get_base_type(), avail, 2, p0, p1); if (type->vector_elements == 1) { body.emit(ret(abs(sub(p0, p1)))); @@ -3144,21 +3537,21 @@ builtin_builder::_distance(const glsl_type *type) } ir_function_signature * -builtin_builder::_dot(const glsl_type *type) +builtin_builder::_dot(builtin_available_predicate avail, const glsl_type *type) { if (type->vector_elements == 1) - return binop(ir_binop_mul, always_available, type, type, type); + return binop(ir_binop_mul, avail, type, type, type); - return binop(ir_binop_dot, always_available, - glsl_type::float_type, type, type); + return binop(ir_binop_dot, avail, + type->get_base_type(), type, type); } ir_function_signature * -builtin_builder::_cross(const glsl_type *type) +builtin_builder::_cross(builtin_available_predicate avail, const glsl_type *type) { ir_variable *a = in_var(type, "a"); ir_variable *b = in_var(type, "b"); - MAKE_SIG(type, always_available, 2, a, b); + MAKE_SIG(type, avail, 2, a, b); int yzx = MAKE_SWIZZLE4(SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, 0); int zxy = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, 0); @@ -3170,10 +3563,10 @@ builtin_builder::_cross(const glsl_type *type) } ir_function_signature * -builtin_builder::_normalize(const glsl_type *type) +builtin_builder::_normalize(builtin_available_predicate avail, const glsl_type *type) { ir_variable *x = in_var(type, "x"); - MAKE_SIG(type, always_available, 1, x); + MAKE_SIG(type, avail, 1, x); if (type->vector_elements == 1) { body.emit(ret(sign(x))); @@ -3203,41 +3596,41 @@ builtin_builder::_ftransform() } ir_function_signature * -builtin_builder::_faceforward(const glsl_type *type) +builtin_builder::_faceforward(builtin_available_predicate avail, const glsl_type *type) { ir_variable *N = in_var(type, "N"); ir_variable *I = in_var(type, "I"); ir_variable *Nref = in_var(type, "Nref"); - MAKE_SIG(type, always_available, 3, N, I, Nref); + MAKE_SIG(type, avail, 3, N, I, Nref); - body.emit(if_tree(less(dot(Nref, I), imm(0.0f)), + body.emit(if_tree(less(dot(Nref, I), IMM_FP(type, 0.0)), ret(N), ret(neg(N)))); return sig; } ir_function_signature * -builtin_builder::_reflect(const glsl_type *type) +builtin_builder::_reflect(builtin_available_predicate avail, const glsl_type *type) { ir_variable *I = in_var(type, "I"); ir_variable *N = in_var(type, "N"); - MAKE_SIG(type, always_available, 2, I, N); + MAKE_SIG(type, avail, 2, I, N); /* I - 2 * dot(N, I) * N */ - body.emit(ret(sub(I, mul(imm(2.0f), mul(dot(N, I), N))))); + body.emit(ret(sub(I, mul(IMM_FP(type, 2.0), mul(dot(N, I), N))))); return sig; } ir_function_signature * -builtin_builder::_refract(const glsl_type *type) +builtin_builder::_refract(builtin_available_predicate avail, const glsl_type *type) { ir_variable *I = in_var(type, "I"); ir_variable *N = in_var(type, "N"); - ir_variable *eta = in_var(glsl_type::float_type, "eta"); - MAKE_SIG(type, always_available, 3, I, N, eta); + ir_variable *eta = in_var(type->get_base_type(), "eta"); + MAKE_SIG(type, avail, 3, I, N, eta); - ir_variable *n_dot_i = body.make_temp(glsl_type::float_type, "n_dot_i"); + ir_variable *n_dot_i = body.make_temp(type->get_base_type(), "n_dot_i"); body.emit(assign(n_dot_i, dot(N, I))); /* From the GLSL 1.10 specification: @@ -3247,11 +3640,11 @@ builtin_builder::_refract(const glsl_type *type) * else * return eta * I - (eta * dot(N, I) + sqrt(k)) * N */ - ir_variable *k = body.make_temp(glsl_type::float_type, "k"); - body.emit(assign(k, sub(imm(1.0f), - mul(eta, mul(eta, sub(imm(1.0f), + ir_variable *k = body.make_temp(type->get_base_type(), "k"); + body.emit(assign(k, sub(IMM_FP(type, 1.0), + mul(eta, mul(eta, sub(IMM_FP(type, 1.0), mul(n_dot_i, n_dot_i))))))); - body.emit(if_tree(less(k, imm(0.0f)), + body.emit(if_tree(less(k, IMM_FP(type, 0.0)), ret(ir_constant::zero(mem_ctx, type)), ret(sub(mul(eta, I), mul(add(mul(eta, n_dot_i), sqrt(k)), N))))); @@ -3260,11 +3653,11 @@ builtin_builder::_refract(const glsl_type *type) } ir_function_signature * -builtin_builder::_matrixCompMult(const glsl_type *type) +builtin_builder::_matrixCompMult(builtin_available_predicate avail, const glsl_type *type) { ir_variable *x = in_var(type, "x"); ir_variable *y = in_var(type, "y"); - MAKE_SIG(type, always_available, 2, x, y); + MAKE_SIG(type, avail, 2, x, y); ir_variable *z = body.make_temp(type, "z"); for (int i = 0; i < type->matrix_columns; i++) { @@ -3276,11 +3669,19 @@ builtin_builder::_matrixCompMult(const glsl_type *type) } ir_function_signature * -builtin_builder::_outerProduct(const glsl_type *type) +builtin_builder::_outerProduct(builtin_available_predicate avail, const glsl_type *type) { - ir_variable *c = in_var(glsl_type::vec(type->vector_elements), "c"); - ir_variable *r = in_var(glsl_type::vec(type->matrix_columns), "r"); - MAKE_SIG(type, v120, 2, c, r); + ir_variable *c; + ir_variable *r; + + if (type->base_type == GLSL_TYPE_DOUBLE) { + r = in_var(glsl_type::dvec(type->matrix_columns), "r"); + c = in_var(glsl_type::dvec(type->vector_elements), "c"); + } else { + r = in_var(glsl_type::vec(type->matrix_columns), "r"); + c = in_var(glsl_type::vec(type->vector_elements), "c"); + } + MAKE_SIG(type, avail, 2, c, r); ir_variable *m = body.make_temp(type, "m"); for (int i = 0; i < type->matrix_columns; i++) { @@ -3292,15 +3693,15 @@ builtin_builder::_outerProduct(const glsl_type *type) } ir_function_signature * -builtin_builder::_transpose(const glsl_type *orig_type) +builtin_builder::_transpose(builtin_available_predicate avail, const glsl_type *orig_type) { const glsl_type *transpose_type = - glsl_type::get_instance(GLSL_TYPE_FLOAT, + glsl_type::get_instance(orig_type->base_type, orig_type->matrix_columns, orig_type->vector_elements); ir_variable *m = in_var(orig_type, "m"); - MAKE_SIG(transpose_type, v120, 1, m); + MAKE_SIG(transpose_type, avail, 1, m); ir_variable *t = body.make_temp(transpose_type, "t"); for (int i = 0; i < orig_type->matrix_columns; i++) { @@ -3316,10 +3717,10 @@ builtin_builder::_transpose(const glsl_type *orig_type) } ir_function_signature * -builtin_builder::_determinant_mat2() +builtin_builder::_determinant_mat2(builtin_available_predicate avail, const glsl_type *type) { - ir_variable *m = in_var(glsl_type::mat2_type, "m"); - MAKE_SIG(glsl_type::float_type, v120, 1, m); + ir_variable *m = in_var(type, "m"); + MAKE_SIG(type->get_base_type(), avail, 1, m); body.emit(ret(sub(mul(matrix_elt(m, 0, 0), matrix_elt(m, 1, 1)), mul(matrix_elt(m, 1, 0), matrix_elt(m, 0, 1))))); @@ -3328,10 +3729,10 @@ builtin_builder::_determinant_mat2() } ir_function_signature * -builtin_builder::_determinant_mat3() +builtin_builder::_determinant_mat3(builtin_available_predicate avail, const glsl_type *type) { - ir_variable *m = in_var(glsl_type::mat3_type, "m"); - MAKE_SIG(glsl_type::float_type, v120, 1, m); + ir_variable *m = in_var(type, "m"); + MAKE_SIG(type->get_base_type(), avail, 1, m); ir_expression *f1 = sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 2, 2)), @@ -3353,30 +3754,31 @@ builtin_builder::_determinant_mat3() } ir_function_signature * -builtin_builder::_determinant_mat4() -{ - ir_variable *m = in_var(glsl_type::mat4_type, "m"); - MAKE_SIG(glsl_type::float_type, v120, 1, m); - - ir_variable *SubFactor00 = body.make_temp(glsl_type::float_type, "SubFactor00"); - ir_variable *SubFactor01 = body.make_temp(glsl_type::float_type, "SubFactor01"); - ir_variable *SubFactor02 = body.make_temp(glsl_type::float_type, "SubFactor02"); - ir_variable *SubFactor03 = body.make_temp(glsl_type::float_type, "SubFactor03"); - ir_variable *SubFactor04 = body.make_temp(glsl_type::float_type, "SubFactor04"); - ir_variable *SubFactor05 = body.make_temp(glsl_type::float_type, "SubFactor05"); - ir_variable *SubFactor06 = body.make_temp(glsl_type::float_type, "SubFactor06"); - ir_variable *SubFactor07 = body.make_temp(glsl_type::float_type, "SubFactor07"); - ir_variable *SubFactor08 = body.make_temp(glsl_type::float_type, "SubFactor08"); - ir_variable *SubFactor09 = body.make_temp(glsl_type::float_type, "SubFactor09"); - ir_variable *SubFactor10 = body.make_temp(glsl_type::float_type, "SubFactor10"); - ir_variable *SubFactor11 = body.make_temp(glsl_type::float_type, "SubFactor11"); - ir_variable *SubFactor12 = body.make_temp(glsl_type::float_type, "SubFactor12"); - ir_variable *SubFactor13 = body.make_temp(glsl_type::float_type, "SubFactor13"); - ir_variable *SubFactor14 = body.make_temp(glsl_type::float_type, "SubFactor14"); - ir_variable *SubFactor15 = body.make_temp(glsl_type::float_type, "SubFactor15"); - ir_variable *SubFactor16 = body.make_temp(glsl_type::float_type, "SubFactor16"); - ir_variable *SubFactor17 = body.make_temp(glsl_type::float_type, "SubFactor17"); - ir_variable *SubFactor18 = body.make_temp(glsl_type::float_type, "SubFactor18"); +builtin_builder::_determinant_mat4(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *m = in_var(type, "m"); + const glsl_type *btype = type->get_base_type(); + MAKE_SIG(btype, avail, 1, m); + + ir_variable *SubFactor00 = body.make_temp(btype, "SubFactor00"); + ir_variable *SubFactor01 = body.make_temp(btype, "SubFactor01"); + ir_variable *SubFactor02 = body.make_temp(btype, "SubFactor02"); + ir_variable *SubFactor03 = body.make_temp(btype, "SubFactor03"); + ir_variable *SubFactor04 = body.make_temp(btype, "SubFactor04"); + ir_variable *SubFactor05 = body.make_temp(btype, "SubFactor05"); + ir_variable *SubFactor06 = body.make_temp(btype, "SubFactor06"); + ir_variable *SubFactor07 = body.make_temp(btype, "SubFactor07"); + ir_variable *SubFactor08 = body.make_temp(btype, "SubFactor08"); + ir_variable *SubFactor09 = body.make_temp(btype, "SubFactor09"); + ir_variable *SubFactor10 = body.make_temp(btype, "SubFactor10"); + ir_variable *SubFactor11 = body.make_temp(btype, "SubFactor11"); + ir_variable *SubFactor12 = body.make_temp(btype, "SubFactor12"); + ir_variable *SubFactor13 = body.make_temp(btype, "SubFactor13"); + ir_variable *SubFactor14 = body.make_temp(btype, "SubFactor14"); + ir_variable *SubFactor15 = body.make_temp(btype, "SubFactor15"); + ir_variable *SubFactor16 = body.make_temp(btype, "SubFactor16"); + ir_variable *SubFactor17 = body.make_temp(btype, "SubFactor17"); + ir_variable *SubFactor18 = body.make_temp(btype, "SubFactor18"); body.emit(assign(SubFactor00, sub(mul(matrix_elt(m, 2, 2), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 2), matrix_elt(m, 2, 3))))); body.emit(assign(SubFactor01, sub(mul(matrix_elt(m, 2, 1), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 1), matrix_elt(m, 2, 3))))); @@ -3398,7 +3800,7 @@ builtin_builder::_determinant_mat4() body.emit(assign(SubFactor17, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 2)), mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 2))))); body.emit(assign(SubFactor18, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 1)), mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 1))))); - ir_variable *adj_0 = body.make_temp(glsl_type::vec4_type, "adj_0"); + ir_variable *adj_0 = body.make_temp(btype == glsl_type::float_type ? glsl_type::vec4_type : glsl_type::dvec4_type, "adj_0"); body.emit(assign(adj_0, add(sub(mul(matrix_elt(m, 1, 1), SubFactor00), @@ -3427,12 +3829,12 @@ builtin_builder::_determinant_mat4() } ir_function_signature * -builtin_builder::_inverse_mat2() +builtin_builder::_inverse_mat2(builtin_available_predicate avail, const glsl_type *type) { - ir_variable *m = in_var(glsl_type::mat2_type, "m"); - MAKE_SIG(glsl_type::mat2_type, v120, 1, m); + ir_variable *m = in_var(type, "m"); + MAKE_SIG(type, avail, 1, m); - ir_variable *adj = body.make_temp(glsl_type::mat2_type, "adj"); + ir_variable *adj = body.make_temp(type, "adj"); body.emit(assign(array_ref(adj, 0), matrix_elt(m, 1, 1), 1 << 0)); body.emit(assign(array_ref(adj, 0), neg(matrix_elt(m, 0, 1)), 1 << 1)); body.emit(assign(array_ref(adj, 1), neg(matrix_elt(m, 1, 0)), 1 << 0)); @@ -3447,14 +3849,15 @@ builtin_builder::_inverse_mat2() } ir_function_signature * -builtin_builder::_inverse_mat3() +builtin_builder::_inverse_mat3(builtin_available_predicate avail, const glsl_type *type) { - ir_variable *m = in_var(glsl_type::mat3_type, "m"); - MAKE_SIG(glsl_type::mat3_type, v120, 1, m); + ir_variable *m = in_var(type, "m"); + const glsl_type *btype = type->get_base_type(); + MAKE_SIG(type, avail, 1, m); - ir_variable *f11_22_21_12 = body.make_temp(glsl_type::float_type, "f11_22_21_12"); - ir_variable *f10_22_20_12 = body.make_temp(glsl_type::float_type, "f10_22_20_12"); - ir_variable *f10_21_20_11 = body.make_temp(glsl_type::float_type, "f10_21_20_11"); + ir_variable *f11_22_21_12 = body.make_temp(btype, "f11_22_21_12"); + ir_variable *f10_22_20_12 = body.make_temp(btype, "f10_22_20_12"); + ir_variable *f10_21_20_11 = body.make_temp(btype, "f10_21_20_11"); body.emit(assign(f11_22_21_12, sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 2, 2)), @@ -3466,7 +3869,7 @@ builtin_builder::_inverse_mat3() sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 1)), mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 1))))); - ir_variable *adj = body.make_temp(glsl_type::mat3_type, "adj"); + ir_variable *adj = body.make_temp(type, "adj"); body.emit(assign(array_ref(adj, 0), f11_22_21_12, WRITEMASK_X)); body.emit(assign(array_ref(adj, 1), neg(f10_22_20_12), WRITEMASK_X)); body.emit(assign(array_ref(adj, 2), f10_21_20_11, WRITEMASK_X)); @@ -3508,30 +3911,31 @@ builtin_builder::_inverse_mat3() } ir_function_signature * -builtin_builder::_inverse_mat4() -{ - ir_variable *m = in_var(glsl_type::mat4_type, "m"); - MAKE_SIG(glsl_type::mat4_type, v120, 1, m); - - ir_variable *SubFactor00 = body.make_temp(glsl_type::float_type, "SubFactor00"); - ir_variable *SubFactor01 = body.make_temp(glsl_type::float_type, "SubFactor01"); - ir_variable *SubFactor02 = body.make_temp(glsl_type::float_type, "SubFactor02"); - ir_variable *SubFactor03 = body.make_temp(glsl_type::float_type, "SubFactor03"); - ir_variable *SubFactor04 = body.make_temp(glsl_type::float_type, "SubFactor04"); - ir_variable *SubFactor05 = body.make_temp(glsl_type::float_type, "SubFactor05"); - ir_variable *SubFactor06 = body.make_temp(glsl_type::float_type, "SubFactor06"); - ir_variable *SubFactor07 = body.make_temp(glsl_type::float_type, "SubFactor07"); - ir_variable *SubFactor08 = body.make_temp(glsl_type::float_type, "SubFactor08"); - ir_variable *SubFactor09 = body.make_temp(glsl_type::float_type, "SubFactor09"); - ir_variable *SubFactor10 = body.make_temp(glsl_type::float_type, "SubFactor10"); - ir_variable *SubFactor11 = body.make_temp(glsl_type::float_type, "SubFactor11"); - ir_variable *SubFactor12 = body.make_temp(glsl_type::float_type, "SubFactor12"); - ir_variable *SubFactor13 = body.make_temp(glsl_type::float_type, "SubFactor13"); - ir_variable *SubFactor14 = body.make_temp(glsl_type::float_type, "SubFactor14"); - ir_variable *SubFactor15 = body.make_temp(glsl_type::float_type, "SubFactor15"); - ir_variable *SubFactor16 = body.make_temp(glsl_type::float_type, "SubFactor16"); - ir_variable *SubFactor17 = body.make_temp(glsl_type::float_type, "SubFactor17"); - ir_variable *SubFactor18 = body.make_temp(glsl_type::float_type, "SubFactor18"); +builtin_builder::_inverse_mat4(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *m = in_var(type, "m"); + const glsl_type *btype = type->get_base_type(); + MAKE_SIG(type, avail, 1, m); + + ir_variable *SubFactor00 = body.make_temp(btype, "SubFactor00"); + ir_variable *SubFactor01 = body.make_temp(btype, "SubFactor01"); + ir_variable *SubFactor02 = body.make_temp(btype, "SubFactor02"); + ir_variable *SubFactor03 = body.make_temp(btype, "SubFactor03"); + ir_variable *SubFactor04 = body.make_temp(btype, "SubFactor04"); + ir_variable *SubFactor05 = body.make_temp(btype, "SubFactor05"); + ir_variable *SubFactor06 = body.make_temp(btype, "SubFactor06"); + ir_variable *SubFactor07 = body.make_temp(btype, "SubFactor07"); + ir_variable *SubFactor08 = body.make_temp(btype, "SubFactor08"); + ir_variable *SubFactor09 = body.make_temp(btype, "SubFactor09"); + ir_variable *SubFactor10 = body.make_temp(btype, "SubFactor10"); + ir_variable *SubFactor11 = body.make_temp(btype, "SubFactor11"); + ir_variable *SubFactor12 = body.make_temp(btype, "SubFactor12"); + ir_variable *SubFactor13 = body.make_temp(btype, "SubFactor13"); + ir_variable *SubFactor14 = body.make_temp(btype, "SubFactor14"); + ir_variable *SubFactor15 = body.make_temp(btype, "SubFactor15"); + ir_variable *SubFactor16 = body.make_temp(btype, "SubFactor16"); + ir_variable *SubFactor17 = body.make_temp(btype, "SubFactor17"); + ir_variable *SubFactor18 = body.make_temp(btype, "SubFactor18"); body.emit(assign(SubFactor00, sub(mul(matrix_elt(m, 2, 2), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 2), matrix_elt(m, 2, 3))))); body.emit(assign(SubFactor01, sub(mul(matrix_elt(m, 2, 1), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 1), matrix_elt(m, 2, 3))))); @@ -3553,7 +3957,7 @@ builtin_builder::_inverse_mat4() body.emit(assign(SubFactor17, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 2)), mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 2))))); body.emit(assign(SubFactor18, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 1)), mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 1))))); - ir_variable *adj = body.make_temp(glsl_type::mat4_type, "adj"); + ir_variable *adj = body.make_temp(btype == glsl_type::float_type ? glsl_type::mat4_type : glsl_type::dmat4_type, "adj"); body.emit(assign(array_ref(adj, 0), add(sub(mul(matrix_elt(m, 1, 1), SubFactor00), mul(matrix_elt(m, 1, 2), SubFactor01)), @@ -3997,13 +4401,23 @@ builtin_builder::_EndStreamPrimitive(builtin_available_predicate avail, } ir_function_signature * -builtin_builder::_textureQueryLod(const glsl_type *sampler_type, +builtin_builder::_barrier() +{ + MAKE_SIG(glsl_type::void_type, barrier_supported, 0); + + body.emit(new(mem_ctx) ir_barrier()); + return sig; +} + +ir_function_signature * +builtin_builder::_textureQueryLod(builtin_available_predicate avail, + const glsl_type *sampler_type, const glsl_type *coord_type) { ir_variable *s = in_var(sampler_type, "sampler"); ir_variable *coord = in_var(coord_type, "coord"); /* The sampler and coordinate always exist; add optional parameters later. */ - MAKE_SIG(glsl_type::vec2_type, texture_query_lod, 2, s, coord); + MAKE_SIG(glsl_type::vec2_type, avail, 2, s, coord); ir_texture *tex = new(mem_ctx) ir_texture(ir_lod); tex->coordinate = var_ref(coord); @@ -4180,7 +4594,7 @@ builtin_builder::_bitfieldExtract(const glsl_type *type) ir_variable *value = in_var(type, "value"); ir_variable *offset = in_var(glsl_type::int_type, "offset"); ir_variable *bits = in_var(glsl_type::int_type, "bits"); - MAKE_SIG(type, gpu_shader5, 3, value, offset, bits); + MAKE_SIG(type, gpu_shader5_or_es31, 3, value, offset, bits); body.emit(ret(expr(ir_triop_bitfield_extract, value, offset, bits))); @@ -4194,43 +4608,43 @@ builtin_builder::_bitfieldInsert(const glsl_type *type) ir_variable *insert = in_var(type, "insert"); ir_variable *offset = in_var(glsl_type::int_type, "offset"); ir_variable *bits = in_var(glsl_type::int_type, "bits"); - MAKE_SIG(type, gpu_shader5, 4, base, insert, offset, bits); + MAKE_SIG(type, gpu_shader5_or_es31, 4, base, insert, offset, bits); body.emit(ret(bitfield_insert(base, insert, offset, bits))); return sig; } -UNOP(bitfieldReverse, ir_unop_bitfield_reverse, gpu_shader5) +UNOP(bitfieldReverse, ir_unop_bitfield_reverse, gpu_shader5_or_es31) ir_function_signature * builtin_builder::_bitCount(const glsl_type *type) { - return unop(gpu_shader5, ir_unop_bit_count, + return unop(gpu_shader5_or_es31, ir_unop_bit_count, glsl_type::ivec(type->vector_elements), type); } ir_function_signature * builtin_builder::_findLSB(const glsl_type *type) { - return unop(gpu_shader5, ir_unop_find_lsb, + return unop(gpu_shader5_or_es31, ir_unop_find_lsb, glsl_type::ivec(type->vector_elements), type); } ir_function_signature * builtin_builder::_findMSB(const glsl_type *type) { - return unop(gpu_shader5, ir_unop_find_msb, + return unop(gpu_shader5_or_es31, ir_unop_find_msb, glsl_type::ivec(type->vector_elements), type); } ir_function_signature * -builtin_builder::_fma(const glsl_type *type) +builtin_builder::_fma(builtin_available_predicate avail, const glsl_type *type) { ir_variable *a = in_var(type, "a"); ir_variable *b = in_var(type, "b"); ir_variable *c = in_var(type, "c"); - MAKE_SIG(type, gpu_shader5, 3, a, b, c); + MAKE_SIG(type, avail, 3, a, b, c); body.emit(ret(ir_builder::fma(a, b, c))); @@ -4240,7 +4654,20 @@ builtin_builder::_fma(const glsl_type *type) ir_function_signature * builtin_builder::_ldexp(const glsl_type *x_type, const glsl_type *exp_type) { - return binop(ir_binop_ldexp, gpu_shader5, x_type, x_type, exp_type); + return binop(ir_binop_ldexp, x_type->base_type == GLSL_TYPE_DOUBLE ? fp64 : gpu_shader5_or_es31, x_type, x_type, exp_type); +} + +ir_function_signature * +builtin_builder::_dfrexp(const glsl_type *x_type, const glsl_type *exp_type) +{ + ir_variable *x = in_var(x_type, "x"); + ir_variable *exponent = out_var(exp_type, "exp"); + MAKE_SIG(x_type, fp64, 2, x, exponent); + + body.emit(assign(exponent, expr(ir_unop_frexp_exp, x))); + + body.emit(ret(expr(ir_unop_frexp_sig, x))); + return sig; } ir_function_signature * @@ -4248,7 +4675,7 @@ builtin_builder::_frexp(const glsl_type *x_type, const glsl_type *exp_type) { ir_variable *x = in_var(x_type, "x"); ir_variable *exponent = out_var(exp_type, "exp"); - MAKE_SIG(x_type, gpu_shader5, 2, x, exponent); + MAKE_SIG(x_type, gpu_shader5_or_es31, 2, x, exponent); const unsigned vec_elem = x_type->vector_elements; const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1); @@ -4297,7 +4724,7 @@ builtin_builder::_uaddCarry(const glsl_type *type) ir_variable *x = in_var(type, "x"); ir_variable *y = in_var(type, "y"); ir_variable *carry = out_var(type, "carry"); - MAKE_SIG(type, gpu_shader5, 3, x, y, carry); + MAKE_SIG(type, gpu_shader5_or_es31, 3, x, y, carry); body.emit(assign(carry, ir_builder::carry(x, y))); body.emit(ret(add(x, y))); @@ -4311,7 +4738,7 @@ builtin_builder::_usubBorrow(const glsl_type *type) ir_variable *x = in_var(type, "x"); ir_variable *y = in_var(type, "y"); ir_variable *borrow = out_var(type, "borrow"); - MAKE_SIG(type, gpu_shader5, 3, x, y, borrow); + MAKE_SIG(type, gpu_shader5_or_es31, 3, x, y, borrow); body.emit(assign(borrow, ir_builder::borrow(x, y))); body.emit(ret(sub(x, y))); @@ -4329,7 +4756,7 @@ builtin_builder::_mulExtended(const glsl_type *type) ir_variable *y = in_var(type, "y"); ir_variable *msb = out_var(type, "msb"); ir_variable *lsb = out_var(type, "lsb"); - MAKE_SIG(glsl_type::void_type, gpu_shader5, 4, x, y, msb, lsb); + MAKE_SIG(glsl_type::void_type, gpu_shader5_or_es31, 4, x, y, msb, lsb); body.emit(assign(msb, imul_high(x, y))); body.emit(assign(lsb, mul(x, y))); @@ -4457,17 +4884,62 @@ builtin_builder::_image_prototype(const glsl_type *image_type, ir_variable *coord = in_var( glsl_type::ivec(image_type->coordinate_components()), "coord"); - ir_function_signature *sig = new_sig( - ret_type, shader_image_load_store, 2, image, coord); + const builtin_available_predicate avail = + (flags & IMAGE_FUNCTION_AVAIL_ATOMIC ? shader_image_atomic : + shader_image_load_store); + ir_function_signature *sig = new_sig(ret_type, avail, 2, image, coord); /* Sample index for multisample images. */ if (image_type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) sig->parameters.push_tail(in_var(glsl_type::int_type, "sample")); /* Data arguments. */ - for (unsigned i = 0; i < num_arguments; ++i) - sig->parameters.push_tail(in_var(data_type, - ralloc_asprintf(NULL, "arg%d", i))); + for (unsigned i = 0; i < num_arguments; ++i) { + char *arg_name = ralloc_asprintf(NULL, "arg%d", i); + sig->parameters.push_tail(in_var(data_type, arg_name)); + ralloc_free(arg_name); + } + + /* Set the maximal set of qualifiers allowed for this image + * built-in. Function calls with arguments having fewer + * qualifiers than present in the prototype are allowed by the + * spec, but not with more, i.e. this will make the compiler + * accept everything that needs to be accepted, and reject cases + * like loads from write-only or stores to read-only images. + */ + image->data.image_read_only = (flags & IMAGE_FUNCTION_READ_ONLY) != 0; + image->data.image_write_only = (flags & IMAGE_FUNCTION_WRITE_ONLY) != 0; + image->data.image_coherent = true; + image->data.image_volatile = true; + image->data.image_restrict = true; + + return sig; +} + +ir_function_signature * +builtin_builder::_image_size_prototype(const glsl_type *image_type, + const char *intrinsic_name, + unsigned num_arguments, + unsigned flags) +{ + const glsl_type *ret_type; + unsigned num_components = image_type->coordinate_components(); + + /* From the ARB_shader_image_size extension: + * "Cube images return the dimensions of one face." + */ + if (image_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE && + !image_type->sampler_array) { + num_components = 2; + } + + /* FIXME: Add the highp precision qualifier for GLES 3.10 when it is + * supported by mesa. + */ + ret_type = glsl_type::get_instance(GLSL_TYPE_INT, num_components, 1); + + ir_variable *image = in_var(image_type, "image"); + ir_function_signature *sig = new_sig(ret_type, shader_image_size, 1, image); /* Set the maximal set of qualifiers allowed for this image * built-in. Function calls with arguments having fewer @@ -4476,23 +4948,24 @@ builtin_builder::_image_prototype(const glsl_type *image_type, * accept everything that needs to be accepted, and reject cases * like loads from write-only or stores to read-only images. */ - image->data.image.read_only = flags & IMAGE_FUNCTION_READ_ONLY; - image->data.image.write_only = flags & IMAGE_FUNCTION_WRITE_ONLY; - image->data.image.coherent = true; - image->data.image._volatile = true; - image->data.image.restrict_flag = true; + image->data.image_read_only = true; + image->data.image_write_only = true; + image->data.image_coherent = true; + image->data.image_volatile = true; + image->data.image_restrict = true; return sig; } ir_function_signature * -builtin_builder::_image(const glsl_type *image_type, +builtin_builder::_image(image_prototype_ctr prototype, + const glsl_type *image_type, const char *intrinsic_name, unsigned num_arguments, unsigned flags) { - ir_function_signature *sig = _image_prototype(image_type, intrinsic_name, - num_arguments, flags); + ir_function_signature *sig = (this->*prototype)(image_type, intrinsic_name, + num_arguments, flags); if (flags & IMAGE_FUNCTION_EMIT_STUB) { ir_factory body(&sig->body, mem_ctx); @@ -4571,6 +5044,17 @@ _mesa_glsl_find_builtin_function(_mesa_glsl_parse_state *state, return s; } +ir_function * +_mesa_glsl_find_builtin_function_by_name(_mesa_glsl_parse_state *state, + const char *name) +{ + ir_function *f; + mtx_lock(&builtins_lock); + f = builtins.shader->symbols->get_function(name); + mtx_unlock(&builtins_lock); + return f; +} + gl_shader * _mesa_glsl_get_builtin_function_shader() {