X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fglsl%2Fbuiltin_functions.cpp;h=bb7fbcdc17dbf1b147c0e126e5c4e0cfa86caa23;hb=0abfb80dacf80c1f2d0a6a4142b9ab84695aa9e7;hp=72054e0fe93c88f5248a04c55885f5987b405e85;hpb=10ef949424809d51c627008bb2feab5a067f8e08;p=mesa.git diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp index 72054e0fe93..bb7fbcdc17d 100644 --- a/src/glsl/builtin_functions.cpp +++ b/src/glsl/builtin_functions.cpp @@ -62,6 +62,10 @@ #include "program/prog_instruction.h" #include +#define M_PIf ((float) M_PI) +#define M_PI_2f ((float) M_PI_2) +#define M_PI_4f ((float) M_PI_4) + using namespace ir_builder; /** @@ -69,7 +73,7 @@ using namespace ir_builder; * @{ */ static bool -always_available(const _mesa_glsl_parse_state *state) +always_available(const _mesa_glsl_parse_state *) { return true; } @@ -77,7 +81,7 @@ always_available(const _mesa_glsl_parse_state *state) static bool compatibility_vs_only(const _mesa_glsl_parse_state *state) { - return state->target == vertex_shader && + return state->stage == MESA_SHADER_VERTEX && state->language_version <= 130 && !state->es_shader; } @@ -85,13 +89,13 @@ compatibility_vs_only(const _mesa_glsl_parse_state *state) static bool fs_only(const _mesa_glsl_parse_state *state) { - return state->target == fragment_shader; + return state->stage == MESA_SHADER_FRAGMENT; } static bool gs_only(const _mesa_glsl_parse_state *state) { - return state->target == geometry_shader; + return state->stage == MESA_SHADER_GEOMETRY; } static bool @@ -103,7 +107,7 @@ v110(const _mesa_glsl_parse_state *state) static bool v110_fs_only(const _mesa_glsl_parse_state *state) { - return !state->es_shader && state->target == fragment_shader; + return !state->es_shader && state->stage == MESA_SHADER_FRAGMENT; } static bool @@ -122,7 +126,7 @@ static bool v130_fs_only(const _mesa_glsl_parse_state *state) { return state->is_version(130, 300) && - state->target == fragment_shader; + state->stage == MESA_SHADER_FRAGMENT; } static bool @@ -155,7 +159,7 @@ lod_exists_in_stage(const _mesa_glsl_parse_state *state) * Since ARB_shader_texture_lod can only be enabled on desktop GLSL, we * don't need to explicitly check state->es_shader. */ - return state->target == vertex_shader || + return state->stage == MESA_SHADER_VERTEX || state->is_version(130, 300) || state->ARB_shader_texture_lod_enable; } @@ -194,16 +198,17 @@ shader_integer_mix(const _mesa_glsl_parse_state *state) } static bool -shader_packing(const _mesa_glsl_parse_state *state) +shader_packing_or_es3(const _mesa_glsl_parse_state *state) { return state->ARB_shading_language_packing_enable || - state->is_version(400, 0); + state->is_version(400, 300); } static bool -shader_packing_or_es3(const _mesa_glsl_parse_state *state) +shader_packing_or_es3_or_gpu_shader5(const _mesa_glsl_parse_state *state) { return state->ARB_shading_language_packing_enable || + state->ARB_gpu_shader5_enable || state->is_version(400, 300); } @@ -213,6 +218,21 @@ gpu_shader5(const _mesa_glsl_parse_state *state) return state->is_version(400, 0) || state->ARB_gpu_shader5_enable; } +static bool +shader_packing_or_gpu_shader5(const _mesa_glsl_parse_state *state) +{ + return state->ARB_shading_language_packing_enable || + gpu_shader5(state); +} + +static bool +fs_gpu_shader5(const _mesa_glsl_parse_state *state) +{ + return state->stage == MESA_SHADER_FRAGMENT && + (state->is_version(400, 0) || state->ARB_gpu_shader5_enable); +} + + static bool texture_array_lod(const _mesa_glsl_parse_state *state) { @@ -223,7 +243,7 @@ texture_array_lod(const _mesa_glsl_parse_state *state) static bool fs_texture_array(const _mesa_glsl_parse_state *state) { - return state->target == fragment_shader && + return state->stage == MESA_SHADER_FRAGMENT && state->EXT_texture_array_enable; } @@ -243,7 +263,7 @@ texture_multisample(const _mesa_glsl_parse_state *state) static bool fs_texture_cube_map_array(const _mesa_glsl_parse_state *state) { - return state->target == fragment_shader && + return state->stage == MESA_SHADER_FRAGMENT && (state->is_version(400, 0) || state->ARB_texture_cube_map_array_enable); } @@ -255,19 +275,54 @@ texture_cube_map_array(const _mesa_glsl_parse_state *state) state->ARB_texture_cube_map_array_enable; } +static bool +texture_query_levels(const _mesa_glsl_parse_state *state) +{ + return state->is_version(430, 0) || + state->ARB_texture_query_levels_enable; +} + static bool texture_query_lod(const _mesa_glsl_parse_state *state) { - return state->target == fragment_shader && + return state->stage == MESA_SHADER_FRAGMENT && state->ARB_texture_query_lod_enable; } +static bool +texture_gather(const _mesa_glsl_parse_state *state) +{ + return state->is_version(400, 0) || + state->ARB_texture_gather_enable || + state->ARB_gpu_shader5_enable; +} + +/* Only ARB_texture_gather but not GLSL 4.0 or ARB_gpu_shader5. + * used for relaxation of const offset requirements. + */ +static bool +texture_gather_only(const _mesa_glsl_parse_state *state) +{ + return !state->is_version(400, 0) && + !state->ARB_gpu_shader5_enable && + state->ARB_texture_gather_enable; +} + /* Desktop GL or OES_standard_derivatives + fragment shader only */ static bool fs_oes_derivatives(const _mesa_glsl_parse_state *state) { - return state->target == fragment_shader && - (!state->es_shader || state->OES_standard_derivatives_enable); + return state->stage == MESA_SHADER_FRAGMENT && + (state->is_version(110, 300) || + state->OES_standard_derivatives_enable); +} + +static bool +fs_derivative_control(const _mesa_glsl_parse_state *state) +{ + return state->stage == MESA_SHADER_FRAGMENT && + (state->is_version(450, 0) || + state->ARB_derivative_control_enable); } static bool @@ -291,7 +346,7 @@ tex3d(const _mesa_glsl_parse_state *state) static bool fs_tex3d(const _mesa_glsl_parse_state *state) { - return state->target == fragment_shader && + return state->stage == MESA_SHADER_FRAGMENT && (!state->es_shader || state->OES_texture_3D_enable); } @@ -300,6 +355,32 @@ tex3d_lod(const _mesa_glsl_parse_state *state) { return tex3d(state) && lod_exists_in_stage(state); } + +static bool +shader_atomic_counters(const _mesa_glsl_parse_state *state) +{ + return state->ARB_shader_atomic_counters_enable; +} + +static bool +shader_trinary_minmax(const _mesa_glsl_parse_state *state) +{ + return state->AMD_shader_trinary_minmax_enable; +} + +static bool +shader_image_load_store(const _mesa_glsl_parse_state *state) +{ + return (state->is_version(420, 0) || + state->ARB_shader_image_load_store_enable); +} + +static bool +gs_streams(const _mesa_glsl_parse_state *state) +{ + return gpu_shader5(state) && gs_only(state); +} + /** @} */ /******************************************************************************/ @@ -323,8 +404,6 @@ public: ir_function_signature *find(_mesa_glsl_parse_state *state, const char *name, exec_list *actual_parameters); -private: - void *mem_ctx; /** * A shader to hold all the built-in signatures; created by this module. * @@ -334,11 +413,15 @@ private: */ gl_shader *shader; +private: + void *mem_ctx; + /** Global variables used by built-in functions. */ ir_variable *gl_ModelViewProjectionMatrix; ir_variable *gl_Vertex; void create_shader(); + void create_intrinsics(); void create_builtins(); /** @@ -359,10 +442,46 @@ private: ir_swizzle *matrix_elt(ir_variable *var, int col, int row); ir_expression *asin_expr(ir_variable *x); + void do_atan(ir_factory &body, const glsl_type *type, ir_variable *res, operand y_over_x); + + /** + * Call function \param f with parameters specified as the linked + * list \param params of \c ir_variable objects. \param ret should + * point to the ir_variable that will hold the function return + * value, or be \c NULL if the function has void return type. + */ + ir_call *call(ir_function *f, ir_variable *ret, exec_list params); /** Create a new function and add the given signatures. */ void add_function(const char *name, ...); + enum image_function_flags { + IMAGE_FUNCTION_EMIT_STUB = (1 << 0), + IMAGE_FUNCTION_RETURNS_VOID = (1 << 1), + IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE = (1 << 2), + IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE = (1 << 3), + IMAGE_FUNCTION_READ_ONLY = (1 << 4), + IMAGE_FUNCTION_WRITE_ONLY = (1 << 5) + }; + + /** + * Create a new image built-in function for all known image types. + * \p flags is a bitfield of \c image_function_flags flags. + */ + void add_image_function(const char *name, + const char *intrinsic_name, + unsigned num_arguments, + unsigned flags); + + /** + * Create new functions for all known image built-ins and types. + * If \p glsl is \c true, use the GLSL built-in names and emit code + * to call into the actual compiler intrinsic. If \p glsl is + * false, emit a function prototype with no body for each image + * intrinsic name. + */ + void add_image_functions(bool glsl); + ir_function_signature *new_sig(const glsl_type *return_type, builtin_available_predicate avail, int num_params, ...); @@ -479,6 +598,9 @@ private: /** Flags to _texture() */ #define TEX_PROJECT 1 #define TEX_OFFSET 2 +#define TEX_COMPONENT 4 +#define TEX_OFFSET_NONCONST 8 +#define TEX_OFFSET_ARRAY 16 ir_function_signature *_texture(ir_texture_opcode opcode, builtin_available_predicate avail, @@ -495,11 +617,22 @@ private: B0(EmitVertex) B0(EndPrimitive) + ir_function_signature *_EmitStreamVertex(builtin_available_predicate avail, + const glsl_type *stream_type); + ir_function_signature *_EndStreamPrimitive(builtin_available_predicate avail, + const glsl_type *stream_type); B2(textureQueryLod); + B1(textureQueryLevels); B1(dFdx); B1(dFdy); B1(fwidth); + B1(dFdxCoarse); + B1(dFdyCoarse); + B1(fwidthCoarse); + B1(dFdxFine); + B1(dFdyFine); + B1(fwidthFine); B1(noise1); B1(noise2); B1(noise3); @@ -514,6 +647,35 @@ private: B1(fma) B2(ldexp) B2(frexp) + B1(uaddCarry) + B1(usubBorrow) + B1(mulExtended) + B1(interpolateAtCentroid) + B1(interpolateAtOffset) + B1(interpolateAtSample) + + ir_function_signature *_atomic_intrinsic(builtin_available_predicate avail); + ir_function_signature *_atomic_op(const char *intrinsic, + builtin_available_predicate avail); + + B1(min3) + B1(max3) + B1(mid3) + + ir_function_signature *_image_prototype(const glsl_type *image_type, + const char *intrinsic_name, + unsigned num_arguments, + unsigned flags); + ir_function_signature *_image(const glsl_type *image_type, + const char *intrinsic_name, + unsigned num_arguments, + unsigned flags); + + ir_function_signature *_memory_barrier_intrinsic( + builtin_available_predicate avail); + ir_function_signature *_memory_barrier( + builtin_available_predicate avail); + #undef B0 #undef B1 #undef B2 @@ -553,14 +715,14 @@ builtin_builder::find(_mesa_glsl_parse_state *state, * that the "no matching signature" error will list potential candidates * from the available built-ins. */ - state->builtins_to_link[0] = shader; - state->num_builtins_to_link = 1; + state->uses_builtin_functions = true; ir_function *f = shader->symbols->get_function(name); if (f == NULL) return NULL; - ir_function_signature *sig = f->matching_signature(state, actual_parameters); + ir_function_signature *sig = + f->matching_signature(state, actual_parameters, true); if (sig == NULL) return NULL; @@ -576,6 +738,7 @@ builtin_builder::initialize() mem_ctx = ralloc_context(NULL); create_shader(); + create_intrinsics(); create_builtins(); } @@ -612,6 +775,30 @@ builtin_builder::create_shader() /** @} */ +/** + * Create ir_function and ir_function_signature objects for each + * intrinsic. + */ +void +builtin_builder::create_intrinsics() +{ + add_function("__intrinsic_atomic_read", + _atomic_intrinsic(shader_atomic_counters), + NULL); + add_function("__intrinsic_atomic_increment", + _atomic_intrinsic(shader_atomic_counters), + NULL); + add_function("__intrinsic_atomic_predecrement", + _atomic_intrinsic(shader_atomic_counters), + NULL); + + add_image_functions(false); + + add_function("__intrinsic_memory_barrier", + _memory_barrier_intrinsic(shader_image_load_store), + NULL); +} + /** * Create ir_function and ir_function_signature objects for each built-in. * @@ -853,16 +1040,16 @@ builtin_builder::create_builtins() _uintBitsToFloat(glsl_type::uvec4_type), NULL); - add_function("packUnorm2x16", _packUnorm2x16(shader_packing_or_es3), NULL); - add_function("packSnorm2x16", _packSnorm2x16(shader_packing_or_es3), NULL); - add_function("packUnorm4x8", _packUnorm4x8(shader_packing), NULL); - add_function("packSnorm4x8", _packSnorm4x8(shader_packing), NULL); - add_function("unpackUnorm2x16", _unpackUnorm2x16(shader_packing_or_es3), NULL); - add_function("unpackSnorm2x16", _unpackSnorm2x16(shader_packing_or_es3), NULL); - add_function("unpackUnorm4x8", _unpackUnorm4x8(shader_packing), NULL); - add_function("unpackSnorm4x8", _unpackSnorm4x8(shader_packing), NULL); - add_function("packHalf2x16", _packHalf2x16(shader_packing_or_es3), NULL); - add_function("unpackHalf2x16", _unpackHalf2x16(shader_packing_or_es3), NULL); + add_function("packUnorm2x16", _packUnorm2x16(shader_packing_or_es3_or_gpu_shader5), NULL); + add_function("packSnorm2x16", _packSnorm2x16(shader_packing_or_es3), NULL); + add_function("packUnorm4x8", _packUnorm4x8(shader_packing_or_gpu_shader5), NULL); + add_function("packSnorm4x8", _packSnorm4x8(shader_packing_or_gpu_shader5), NULL); + add_function("unpackUnorm2x16", _unpackUnorm2x16(shader_packing_or_es3_or_gpu_shader5), NULL); + add_function("unpackSnorm2x16", _unpackSnorm2x16(shader_packing_or_es3), NULL); + add_function("unpackUnorm4x8", _unpackUnorm4x8(shader_packing_or_gpu_shader5), NULL); + add_function("unpackSnorm4x8", _unpackSnorm4x8(shader_packing_or_gpu_shader5), NULL); + add_function("packHalf2x16", _packHalf2x16(shader_packing_or_es3), NULL); + add_function("unpackHalf2x16", _unpackHalf2x16(shader_packing_or_es3), NULL); F(length) F(distance) @@ -1558,6 +1745,14 @@ builtin_builder::create_builtins() add_function("EmitVertex", _EmitVertex(), NULL); add_function("EndPrimitive", _EndPrimitive(), NULL); + add_function("EmitStreamVertex", + _EmitStreamVertex(gs_streams, glsl_type::uint_type), + _EmitStreamVertex(gs_streams, glsl_type::int_type), + NULL); + add_function("EndStreamPrimitive", + _EndStreamPrimitive(gs_streams, glsl_type::uint_type), + _EndStreamPrimitive(gs_streams, glsl_type::int_type), + NULL); add_function("textureQueryLOD", _textureQueryLod(glsl_type::sampler1D_type, glsl_type::float_type), @@ -1596,6 +1791,39 @@ builtin_builder::create_builtins() _textureQueryLod(glsl_type::samplerCubeArrayShadow_type, glsl_type::vec3_type), NULL); + add_function("textureQueryLevels", + _textureQueryLevels(glsl_type::sampler1D_type), + _textureQueryLevels(glsl_type::sampler2D_type), + _textureQueryLevels(glsl_type::sampler3D_type), + _textureQueryLevels(glsl_type::samplerCube_type), + _textureQueryLevels(glsl_type::sampler1DArray_type), + _textureQueryLevels(glsl_type::sampler2DArray_type), + _textureQueryLevels(glsl_type::samplerCubeArray_type), + _textureQueryLevels(glsl_type::sampler1DShadow_type), + _textureQueryLevels(glsl_type::sampler2DShadow_type), + _textureQueryLevels(glsl_type::samplerCubeShadow_type), + _textureQueryLevels(glsl_type::sampler1DArrayShadow_type), + _textureQueryLevels(glsl_type::sampler2DArrayShadow_type), + _textureQueryLevels(glsl_type::samplerCubeArrayShadow_type), + + _textureQueryLevels(glsl_type::isampler1D_type), + _textureQueryLevels(glsl_type::isampler2D_type), + _textureQueryLevels(glsl_type::isampler3D_type), + _textureQueryLevels(glsl_type::isamplerCube_type), + _textureQueryLevels(glsl_type::isampler1DArray_type), + _textureQueryLevels(glsl_type::isampler2DArray_type), + _textureQueryLevels(glsl_type::isamplerCubeArray_type), + + _textureQueryLevels(glsl_type::usampler1D_type), + _textureQueryLevels(glsl_type::usampler2D_type), + _textureQueryLevels(glsl_type::usampler3D_type), + _textureQueryLevels(glsl_type::usamplerCube_type), + _textureQueryLevels(glsl_type::usampler1DArray_type), + _textureQueryLevels(glsl_type::usampler2DArray_type), + _textureQueryLevels(glsl_type::usamplerCubeArray_type), + + NULL); + add_function("texture1D", _texture(ir_tex, v110, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type), _texture(ir_txb, v110_fs_only, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type), @@ -1655,8 +1883,8 @@ builtin_builder::create_builtins() NULL); add_function("texture2DProjLod", - _texture(ir_txl, v110_lod, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT), - _texture(ir_txl, v110_lod, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txl, lod_exists_in_stage, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_txl, lod_exists_in_stage, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT), NULL); add_function("texture3D", @@ -1683,7 +1911,7 @@ builtin_builder::create_builtins() NULL); add_function("textureCubeLod", - _texture(ir_txl, v110_lod, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), + _texture(ir_txl, lod_exists_in_stage, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), NULL); add_function("texture2DRect", @@ -1816,9 +2044,131 @@ builtin_builder::create_builtins() _texture(ir_txd, shader_texture_lod_and_rect, glsl_type::vec4_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec4_type, TEX_PROJECT), NULL); + add_function("textureGather", + _texture(ir_tg4, texture_gather, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type), + _texture(ir_tg4, texture_gather, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type), + _texture(ir_tg4, texture_gather, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type), + + _texture(ir_tg4, texture_gather, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type), + _texture(ir_tg4, texture_gather, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type), + _texture(ir_tg4, texture_gather, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type), + + _texture(ir_tg4, texture_gather, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), + _texture(ir_tg4, texture_gather, glsl_type::ivec4_type, glsl_type::isamplerCube_type, glsl_type::vec3_type), + _texture(ir_tg4, texture_gather, glsl_type::uvec4_type, glsl_type::usamplerCube_type, glsl_type::vec3_type), + + _texture(ir_tg4, texture_gather, glsl_type::vec4_type, glsl_type::samplerCubeArray_type, glsl_type::vec4_type), + _texture(ir_tg4, texture_gather, glsl_type::ivec4_type, glsl_type::isamplerCubeArray_type, glsl_type::vec4_type), + _texture(ir_tg4, texture_gather, glsl_type::uvec4_type, glsl_type::usamplerCubeArray_type, glsl_type::vec4_type), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_COMPONENT), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type, TEX_COMPONENT), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_COMPONENT), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isamplerCube_type, glsl_type::vec3_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usamplerCube_type, glsl_type::vec3_type, TEX_COMPONENT), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::samplerCubeArray_type, glsl_type::vec4_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isamplerCubeArray_type, glsl_type::vec4_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usamplerCubeArray_type, glsl_type::vec4_type, TEX_COMPONENT), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec2_type), + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec3_type), + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::samplerCubeShadow_type, glsl_type::vec3_type), + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::samplerCubeArrayShadow_type, glsl_type::vec4_type), + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec2_type), + NULL); + + add_function("textureGatherOffset", + _texture(ir_tg4, texture_gather_only, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_tg4, texture_gather_only, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_tg4, texture_gather_only, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + + _texture(ir_tg4, texture_gather_only, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_tg4, texture_gather_only, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_tg4, texture_gather_only, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST), + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), + NULL); + + add_function("textureGatherOffsets", + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY), + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), + NULL); + F(dFdx) F(dFdy) F(fwidth) + F(dFdxCoarse) + F(dFdyCoarse) + F(fwidthCoarse) + F(dFdxFine) + F(dFdyFine) + F(fwidthFine) F(noise1) F(noise2) F(noise3) @@ -1845,6 +2195,119 @@ builtin_builder::create_builtins() _frexp(glsl_type::vec3_type, glsl_type::ivec3_type), _frexp(glsl_type::vec4_type, glsl_type::ivec4_type), NULL); + add_function("uaddCarry", + _uaddCarry(glsl_type::uint_type), + _uaddCarry(glsl_type::uvec2_type), + _uaddCarry(glsl_type::uvec3_type), + _uaddCarry(glsl_type::uvec4_type), + NULL); + add_function("usubBorrow", + _usubBorrow(glsl_type::uint_type), + _usubBorrow(glsl_type::uvec2_type), + _usubBorrow(glsl_type::uvec3_type), + _usubBorrow(glsl_type::uvec4_type), + NULL); + add_function("imulExtended", + _mulExtended(glsl_type::int_type), + _mulExtended(glsl_type::ivec2_type), + _mulExtended(glsl_type::ivec3_type), + _mulExtended(glsl_type::ivec4_type), + NULL); + add_function("umulExtended", + _mulExtended(glsl_type::uint_type), + _mulExtended(glsl_type::uvec2_type), + _mulExtended(glsl_type::uvec3_type), + _mulExtended(glsl_type::uvec4_type), + NULL); + add_function("interpolateAtCentroid", + _interpolateAtCentroid(glsl_type::float_type), + _interpolateAtCentroid(glsl_type::vec2_type), + _interpolateAtCentroid(glsl_type::vec3_type), + _interpolateAtCentroid(glsl_type::vec4_type), + NULL); + add_function("interpolateAtOffset", + _interpolateAtOffset(glsl_type::float_type), + _interpolateAtOffset(glsl_type::vec2_type), + _interpolateAtOffset(glsl_type::vec3_type), + _interpolateAtOffset(glsl_type::vec4_type), + NULL); + add_function("interpolateAtSample", + _interpolateAtSample(glsl_type::float_type), + _interpolateAtSample(glsl_type::vec2_type), + _interpolateAtSample(glsl_type::vec3_type), + _interpolateAtSample(glsl_type::vec4_type), + NULL); + + add_function("atomicCounter", + _atomic_op("__intrinsic_atomic_read", + shader_atomic_counters), + NULL); + add_function("atomicCounterIncrement", + _atomic_op("__intrinsic_atomic_increment", + shader_atomic_counters), + NULL); + add_function("atomicCounterDecrement", + _atomic_op("__intrinsic_atomic_predecrement", + shader_atomic_counters), + NULL); + + add_function("min3", + _min3(glsl_type::float_type), + _min3(glsl_type::vec2_type), + _min3(glsl_type::vec3_type), + _min3(glsl_type::vec4_type), + + _min3(glsl_type::int_type), + _min3(glsl_type::ivec2_type), + _min3(glsl_type::ivec3_type), + _min3(glsl_type::ivec4_type), + + _min3(glsl_type::uint_type), + _min3(glsl_type::uvec2_type), + _min3(glsl_type::uvec3_type), + _min3(glsl_type::uvec4_type), + NULL); + + add_function("max3", + _max3(glsl_type::float_type), + _max3(glsl_type::vec2_type), + _max3(glsl_type::vec3_type), + _max3(glsl_type::vec4_type), + + _max3(glsl_type::int_type), + _max3(glsl_type::ivec2_type), + _max3(glsl_type::ivec3_type), + _max3(glsl_type::ivec4_type), + + _max3(glsl_type::uint_type), + _max3(glsl_type::uvec2_type), + _max3(glsl_type::uvec3_type), + _max3(glsl_type::uvec4_type), + NULL); + + add_function("mid3", + _mid3(glsl_type::float_type), + _mid3(glsl_type::vec2_type), + _mid3(glsl_type::vec3_type), + _mid3(glsl_type::vec4_type), + + _mid3(glsl_type::int_type), + _mid3(glsl_type::ivec2_type), + _mid3(glsl_type::ivec3_type), + _mid3(glsl_type::ivec4_type), + + _mid3(glsl_type::uint_type), + _mid3(glsl_type::uvec2_type), + _mid3(glsl_type::uvec3_type), + _mid3(glsl_type::uvec4_type), + NULL); + + add_image_functions(true); + + add_function("memoryBarrier", + _memory_barrier(shader_image_load_store), + NULL); + #undef F #undef FI #undef FIU @@ -1865,8 +2328,6 @@ builtin_builder::add_function(const char *name, ...) if (sig == NULL) break; - sig->is_defined = true; - if (false) { exec_list stuff; stuff.push_tail(sig); @@ -1880,6 +2341,104 @@ builtin_builder::add_function(const char *name, ...) shader->symbols->add_function(f); } +void +builtin_builder::add_image_function(const char *name, + const char *intrinsic_name, + unsigned num_arguments, + unsigned flags) +{ + static const glsl_type *const types[] = { + glsl_type::image1D_type, + glsl_type::image2D_type, + glsl_type::image3D_type, + glsl_type::image2DRect_type, + glsl_type::imageCube_type, + glsl_type::imageBuffer_type, + glsl_type::image1DArray_type, + glsl_type::image2DArray_type, + glsl_type::imageCubeArray_type, + glsl_type::image2DMS_type, + glsl_type::image2DMSArray_type, + glsl_type::iimage1D_type, + glsl_type::iimage2D_type, + glsl_type::iimage3D_type, + glsl_type::iimage2DRect_type, + glsl_type::iimageCube_type, + glsl_type::iimageBuffer_type, + glsl_type::iimage1DArray_type, + glsl_type::iimage2DArray_type, + glsl_type::iimageCubeArray_type, + glsl_type::iimage2DMS_type, + glsl_type::iimage2DMSArray_type, + glsl_type::uimage1D_type, + glsl_type::uimage2D_type, + glsl_type::uimage3D_type, + glsl_type::uimage2DRect_type, + glsl_type::uimageCube_type, + glsl_type::uimageBuffer_type, + glsl_type::uimage1DArray_type, + glsl_type::uimage2DArray_type, + glsl_type::uimageCubeArray_type, + glsl_type::uimage2DMS_type, + glsl_type::uimage2DMSArray_type + }; + ir_function *f = new(mem_ctx) ir_function(name); + + for (unsigned i = 0; i < Elements(types); ++i) { + if (types[i]->sampler_type != GLSL_TYPE_FLOAT || + (flags & IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE)) + f->add_signature(_image(types[i], intrinsic_name, + num_arguments, flags)); + } + + shader->symbols->add_function(f); +} + +void +builtin_builder::add_image_functions(bool glsl) +{ + const unsigned flags = (glsl ? IMAGE_FUNCTION_EMIT_STUB : 0); + + add_image_function(glsl ? "imageLoad" : "__intrinsic_image_load", + "__intrinsic_image_load", 0, + (flags | IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE | + IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE | + IMAGE_FUNCTION_READ_ONLY)); + + add_image_function(glsl ? "imageStore" : "__intrinsic_image_store", + "__intrinsic_image_store", 1, + (flags | IMAGE_FUNCTION_RETURNS_VOID | + IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE | + IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE | + IMAGE_FUNCTION_WRITE_ONLY)); + + add_image_function(glsl ? "imageAtomicAdd" : "__intrinsic_image_atomic_add", + "__intrinsic_image_atomic_add", 1, flags); + + add_image_function(glsl ? "imageAtomicMin" : "__intrinsic_image_atomic_min", + "__intrinsic_image_atomic_min", 1, flags); + + add_image_function(glsl ? "imageAtomicMax" : "__intrinsic_image_atomic_max", + "__intrinsic_image_atomic_max", 1, flags); + + add_image_function(glsl ? "imageAtomicAnd" : "__intrinsic_image_atomic_and", + "__intrinsic_image_atomic_and", 1, flags); + + add_image_function(glsl ? "imageAtomicOr" : "__intrinsic_image_atomic_or", + "__intrinsic_image_atomic_or", 1, flags); + + add_image_function(glsl ? "imageAtomicXor" : "__intrinsic_image_atomic_xor", + "__intrinsic_image_atomic_xor", 1, flags); + + add_image_function((glsl ? "imageAtomicExchange" : + "__intrinsic_image_atomic_exchange"), + "__intrinsic_image_atomic_exchange", 1, flags); + + add_image_function((glsl ? "imageAtomicCompSwap" : + "__intrinsic_image_atomic_comp_swap"), + "__intrinsic_image_atomic_comp_swap", 2, flags); +} + ir_variable * builtin_builder::in_var(const glsl_type *type, const char *name) { @@ -1964,7 +2523,13 @@ builtin_builder::new_sig(const glsl_type *return_type, #define MAKE_SIG(return_type, avail, ...) \ ir_function_signature *sig = \ new_sig(return_type, avail, __VA_ARGS__); \ - ir_factory body(&sig->body, mem_ctx); + ir_factory body(&sig->body, mem_ctx); \ + sig->is_defined = true; + +#define MAKE_INTRINSIC(return_type, avail, ...) \ + ir_function_signature *sig = \ + new_sig(return_type, avail, __VA_ARGS__); \ + sig->is_intrinsic = true; ir_function_signature * builtin_builder::unop(builtin_available_predicate avail, @@ -2046,16 +2611,35 @@ ir_expression * builtin_builder::asin_expr(ir_variable *x) { return mul(sign(x), - sub(imm(1.5707964f), + sub(imm(M_PI_2f), mul(sqrt(sub(imm(1.0f), abs(x))), - add(imm(1.5707964f), + add(imm(M_PI_2f), mul(abs(x), - add(imm(-0.21460183f), + add(imm(M_PI_4f - 1.0f), mul(abs(x), add(imm(0.086566724f), mul(abs(x), imm(-0.03102955f)))))))))); } +ir_call * +builtin_builder::call(ir_function *f, ir_variable *ret, exec_list params) +{ + exec_list actual_params; + + foreach_in_list(ir_variable, var, ¶ms) { + actual_params.push_tail(var_ref(var)); + } + + ir_function_signature *sig = + f->exact_matching_signature(NULL, &actual_params); + if (!sig) + return NULL; + + ir_dereference_variable *deref = + (sig->return_type->is_void() ? NULL : var_ref(ret)); + + return new(mem_ctx) ir_call(sig, deref, &actual_params); +} ir_function_signature * builtin_builder::_asin(const glsl_type *type) @@ -2074,7 +2658,7 @@ builtin_builder::_acos(const glsl_type *type) ir_variable *x = in_var(type, "x"); MAKE_SIG(type, always_available, 1, x); - body.emit(ret(sub(imm(1.5707964f), asin_expr(x)))); + body.emit(ret(sub(imm(M_PI_2f), asin_expr(x)))); return sig; } @@ -2101,23 +2685,19 @@ builtin_builder::_atan2(const glsl_type *type) ir_factory outer_then(&outer_if->then_instructions, mem_ctx); /* Then...call atan(y/x) */ - ir_variable *y_over_x = outer_then.make_temp(glsl_type::float_type, "y_over_x"); - outer_then.emit(assign(y_over_x, div(y, x))); - outer_then.emit(assign(r, mul(y_over_x, rsq(add(mul(y_over_x, y_over_x), - imm(1.0f)))))); - outer_then.emit(assign(r, asin_expr(r))); + do_atan(body, glsl_type::float_type, r, div(y, x)); /* ...and fix it up: */ ir_if *inner_if = new(mem_ctx) ir_if(less(x, imm(0.0f))); inner_if->then_instructions.push_tail( if_tree(gequal(y, imm(0.0f)), - assign(r, add(r, imm(3.141593f))), - assign(r, sub(r, imm(3.141593f))))); + assign(r, add(r, imm(M_PIf))), + assign(r, sub(r, imm(M_PIf))))); outer_then.emit(inner_if); /* Else... */ outer_if->else_instructions.push_tail( - assign(r, mul(sign(y), imm(1.5707965f)))); + assign(r, mul(sign(y), imm(M_PI_2f)))); body.emit(outer_if); @@ -2128,17 +2708,65 @@ builtin_builder::_atan2(const glsl_type *type) return sig; } +void +builtin_builder::do_atan(ir_factory &body, const glsl_type *type, ir_variable *res, operand y_over_x) +{ + /* + * range-reduction, first step: + * + * / y_over_x if |y_over_x| <= 1.0; + * x = < + * \ 1.0 / y_over_x otherwise + */ + ir_variable *x = body.make_temp(type, "atan_x"); + body.emit(assign(x, div(min2(abs(y_over_x), + imm(1.0f)), + max2(abs(y_over_x), + imm(1.0f))))); + + /* + * approximate atan by evaluating polynomial: + * + * x * 0.9999793128310355 - x^3 * 0.3326756418091246 + + * x^5 * 0.1938924977115610 - x^7 * 0.1173503194786851 + + * x^9 * 0.0536813784310406 - x^11 * 0.0121323213173444 + */ + ir_variable *tmp = body.make_temp(type, "atan_tmp"); + body.emit(assign(tmp, mul(x, x))); + body.emit(assign(tmp, mul(add(mul(sub(mul(add(mul(sub(mul(add(mul(imm(-0.0121323213173444f), + tmp), + imm(0.0536813784310406f)), + tmp), + imm(0.1173503194786851f)), + tmp), + imm(0.1938924977115610f)), + tmp), + imm(0.3326756418091246f)), + tmp), + imm(0.9999793128310355f)), + x))); + + /* range-reduction fixup */ + body.emit(assign(tmp, add(tmp, + mul(b2f(greater(abs(y_over_x), + imm(1.0f, type->components()))), + add(mul(tmp, + imm(-2.0f)), + imm(M_PI_2f)))))); + + /* sign fixup */ + body.emit(assign(res, mul(tmp, sign(y_over_x)))); +} + ir_function_signature * builtin_builder::_atan(const glsl_type *type) { ir_variable *y_over_x = in_var(type, "y_over_x"); MAKE_SIG(type, always_available, 1, y_over_x); - ir_variable *t = body.make_temp(type, "t"); - body.emit(assign(t, mul(y_over_x, rsq(add(mul(y_over_x, y_over_x), - imm(1.0f)))))); - - body.emit(ret(asin_expr(t))); + ir_variable *tmp = body.make_temp(type, "tmp"); + do_atan(body, type, tmp, y_over_x); + body.emit(ret(tmp)); return sig; } @@ -2537,7 +3165,7 @@ builtin_builder::_length(const glsl_type *type) ir_variable *x = in_var(type, "x"); MAKE_SIG(glsl_type::float_type, always_available, 1, x); - body.emit(ret(sqrt(dotlike(x, x)))); + body.emit(ret(sqrt(dot(x, x)))); return sig; } @@ -2627,7 +3255,7 @@ builtin_builder::_faceforward(const glsl_type *type) ir_variable *Nref = in_var(type, "Nref"); MAKE_SIG(type, always_available, 3, N, I, Nref); - body.emit(if_tree(less(dotlike(Nref, I), imm(0.0f)), + body.emit(if_tree(less(dot(Nref, I), imm(0.0f)), ret(N), ret(neg(N)))); return sig; @@ -2641,7 +3269,7 @@ builtin_builder::_reflect(const glsl_type *type) MAKE_SIG(type, always_available, 2, I, N); /* I - 2 * dot(N, I) * N */ - body.emit(ret(sub(I, mul(imm(2.0f), mul(dotlike(N, I), N))))); + body.emit(ret(sub(I, mul(imm(2.0f), mul(dot(N, I), N))))); return sig; } @@ -2655,7 +3283,7 @@ builtin_builder::_refract(const glsl_type *type) MAKE_SIG(type, always_available, 3, I, N, eta); ir_variable *n_dot_i = body.make_temp(glsl_type::float_type, "n_dot_i"); - body.emit(assign(n_dot_i, dotlike(N, I))); + body.emit(assign(n_dot_i, dot(N, I))); /* From the GLSL 1.10 specification: * k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I)) @@ -3203,7 +3831,7 @@ builtin_builder::_texture(ir_texture_opcode opcode, ir_texture *tex = new(mem_ctx) ir_texture(opcode); tex->set_sampler(var_ref(s), return_type); - const int coord_size = sampler_type->sampler_coordinate_components(); + const int coord_size = sampler_type->coordinate_components(); if (coord_size == coord_type->vector_elements) { tex->coordinate = var_ref(P); @@ -3218,11 +3846,21 @@ builtin_builder::_texture(ir_texture_opcode opcode, if (flags & TEX_PROJECT) tex->projector = swizzle(P, coord_type->vector_elements - 1, 1); - /* The shadow comparitor is normally in the Z component, but a few types - * have sufficiently large coordinates that it's in W. - */ - if (sampler_type->sampler_shadow) - tex->shadow_comparitor = swizzle(P, MAX2(coord_size, SWIZZLE_Z), 1); + if (sampler_type->sampler_shadow) { + if (opcode == ir_tg4) { + /* gather has refz as a separate parameter, immediately after the + * coordinate + */ + ir_variable *refz = in_var(glsl_type::float_type, "refz"); + sig->parameters.push_tail(refz); + tex->shadow_comparitor = var_ref(refz); + } else { + /* The shadow comparitor is normally in the Z component, but a few types + * have sufficiently large coordinates that it's in W. + */ + tex->shadow_comparitor = swizzle(P, MAX2(coord_size, SWIZZLE_Z), 1); + } + } if (opcode == ir_txl) { ir_variable *lod = in_var(glsl_type::float_type, "lod"); @@ -3238,14 +3876,35 @@ builtin_builder::_texture(ir_texture_opcode opcode, tex->lod_info.grad.dPdy = var_ref(dPdy); } - if (flags & TEX_OFFSET) { + if (flags & (TEX_OFFSET | TEX_OFFSET_NONCONST)) { int offset_size = coord_size - (sampler_type->sampler_array ? 1 : 0); ir_variable *offset = - new(mem_ctx) ir_variable(glsl_type::ivec(offset_size), "offset", ir_var_const_in); + new(mem_ctx) ir_variable(glsl_type::ivec(offset_size), "offset", + (flags & TEX_OFFSET) ? ir_var_const_in : ir_var_function_in); sig->parameters.push_tail(offset); tex->offset = var_ref(offset); } + if (flags & TEX_OFFSET_ARRAY) { + ir_variable *offsets = + new(mem_ctx) ir_variable(glsl_type::get_array_instance(glsl_type::ivec2_type, 4), + "offsets", ir_var_const_in); + sig->parameters.push_tail(offsets); + tex->offset = var_ref(offsets); + } + + if (opcode == ir_tg4) { + if (flags & TEX_COMPONENT) { + ir_variable *component = + new(mem_ctx) ir_variable(glsl_type::int_type, "comp", ir_var_const_in); + sig->parameters.push_tail(component); + tex->lod_info.component = var_ref(component); + } + else { + tex->lod_info.component = imm(0); + } + } + /* The "bias" parameter comes /after/ the "offset" parameter, which is * inconsistent with both textureLodOffset and textureGradOffset. */ @@ -3325,7 +3984,28 @@ builtin_builder::_EmitVertex() { MAKE_SIG(glsl_type::void_type, gs_only, 0); - body.emit(new(mem_ctx) ir_emit_vertex()); + ir_rvalue *stream = new(mem_ctx) ir_constant(0, 1); + body.emit(new(mem_ctx) ir_emit_vertex(stream)); + + return sig; +} + +ir_function_signature * +builtin_builder::_EmitStreamVertex(builtin_available_predicate avail, + const glsl_type *stream_type) +{ + /* Section 8.12 (Geometry Shader Functions) of the GLSL 4.0 spec says: + * + * "Emit the current values of output variables to the current output + * primitive on stream stream. The argument to stream must be a constant + * integral expression." + */ + ir_variable *stream = + new(mem_ctx) ir_variable(stream_type, "stream", ir_var_const_in); + + MAKE_SIG(glsl_type::void_type, avail, 1, stream); + + body.emit(new(mem_ctx) ir_emit_vertex(var_ref(stream))); return sig; } @@ -3335,7 +4015,28 @@ builtin_builder::_EndPrimitive() { MAKE_SIG(glsl_type::void_type, gs_only, 0); - body.emit(new(mem_ctx) ir_end_primitive()); + ir_rvalue *stream = new(mem_ctx) ir_constant(0, 1); + body.emit(new(mem_ctx) ir_end_primitive(stream)); + + return sig; +} + +ir_function_signature * +builtin_builder::_EndStreamPrimitive(builtin_available_predicate avail, + const glsl_type *stream_type) +{ + /* Section 8.12 (Geometry Shader Functions) of the GLSL 4.0 spec says: + * + * "Completes the current output primitive on stream stream and starts + * a new one. The argument to stream must be a constant integral + * expression." + */ + ir_variable *stream = + new(mem_ctx) ir_variable(stream_type, "stream", ir_var_const_in); + + MAKE_SIG(glsl_type::void_type, avail, 1, stream); + + body.emit(new(mem_ctx) ir_end_primitive(var_ref(stream))); return sig; } @@ -3358,8 +4059,27 @@ builtin_builder::_textureQueryLod(const glsl_type *sampler_type, return sig; } +ir_function_signature * +builtin_builder::_textureQueryLevels(const glsl_type *sampler_type) +{ + ir_variable *s = in_var(sampler_type, "sampler"); + const glsl_type *return_type = glsl_type::int_type; + MAKE_SIG(return_type, texture_query_levels, 1, s); + + ir_texture *tex = new(mem_ctx) ir_texture(ir_query_levels); + tex->set_sampler(var_ref(s), return_type); + + body.emit(ret(tex)); + + return sig; +} + UNOP(dFdx, ir_unop_dFdx, fs_oes_derivatives) +UNOP(dFdxCoarse, ir_unop_dFdx_coarse, fs_derivative_control) +UNOP(dFdxFine, ir_unop_dFdx_fine, fs_derivative_control) UNOP(dFdy, ir_unop_dFdy, fs_oes_derivatives) +UNOP(dFdyCoarse, ir_unop_dFdy_coarse, fs_derivative_control) +UNOP(dFdyFine, ir_unop_dFdy_fine, fs_derivative_control) ir_function_signature * builtin_builder::_fwidth(const glsl_type *type) @@ -3372,6 +4092,30 @@ builtin_builder::_fwidth(const glsl_type *type) return sig; } +ir_function_signature * +builtin_builder::_fwidthCoarse(const glsl_type *type) +{ + ir_variable *p = in_var(type, "p"); + MAKE_SIG(type, fs_derivative_control, 1, p); + + body.emit(ret(add(abs(expr(ir_unop_dFdx_coarse, p)), + abs(expr(ir_unop_dFdy_coarse, p))))); + + return sig; +} + +ir_function_signature * +builtin_builder::_fwidthFine(const glsl_type *type) +{ + ir_variable *p = in_var(type, "p"); + MAKE_SIG(type, fs_derivative_control, 1, p); + + body.emit(ret(add(abs(expr(ir_unop_dFdx_fine, p)), + abs(expr(ir_unop_dFdy_fine, p))))); + + return sig; +} + ir_function_signature * builtin_builder::_noise1(const glsl_type *type) { @@ -3533,7 +4277,7 @@ builtin_builder::_fma(const glsl_type *type) ir_variable *c = in_var(type, "c"); MAKE_SIG(type, gpu_shader5, 3, a, b, c); - body.emit(ret(fma(a, b, c))); + body.emit(ret(ir_builder::fma(a, b, c))); return sig; } @@ -3591,12 +4335,257 @@ builtin_builder::_frexp(const glsl_type *x_type, const glsl_type *exp_type) return sig; } + +ir_function_signature * +builtin_builder::_uaddCarry(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + ir_variable *y = in_var(type, "y"); + ir_variable *carry = out_var(type, "carry"); + MAKE_SIG(type, gpu_shader5, 3, x, y, carry); + + body.emit(assign(carry, ir_builder::carry(x, y))); + body.emit(ret(add(x, y))); + + return sig; +} + +ir_function_signature * +builtin_builder::_usubBorrow(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + ir_variable *y = in_var(type, "y"); + ir_variable *borrow = out_var(type, "borrow"); + MAKE_SIG(type, gpu_shader5, 3, x, y, borrow); + + body.emit(assign(borrow, ir_builder::borrow(x, y))); + body.emit(ret(sub(x, y))); + + return sig; +} + +/** + * For both imulExtended() and umulExtended() built-ins. + */ +ir_function_signature * +builtin_builder::_mulExtended(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + ir_variable *y = in_var(type, "y"); + ir_variable *msb = out_var(type, "msb"); + ir_variable *lsb = out_var(type, "lsb"); + MAKE_SIG(glsl_type::void_type, gpu_shader5, 4, x, y, msb, lsb); + + body.emit(assign(msb, imul_high(x, y))); + body.emit(assign(lsb, mul(x, y))); + + return sig; +} + +ir_function_signature * +builtin_builder::_interpolateAtCentroid(const glsl_type *type) +{ + ir_variable *interpolant = in_var(type, "interpolant"); + interpolant->data.must_be_shader_input = 1; + MAKE_SIG(type, fs_gpu_shader5, 1, interpolant); + + body.emit(ret(interpolate_at_centroid(interpolant))); + + return sig; +} + +ir_function_signature * +builtin_builder::_interpolateAtOffset(const glsl_type *type) +{ + ir_variable *interpolant = in_var(type, "interpolant"); + interpolant->data.must_be_shader_input = 1; + ir_variable *offset = in_var(glsl_type::vec2_type, "offset"); + MAKE_SIG(type, fs_gpu_shader5, 2, interpolant, offset); + + body.emit(ret(interpolate_at_offset(interpolant, offset))); + + return sig; +} + +ir_function_signature * +builtin_builder::_interpolateAtSample(const glsl_type *type) +{ + ir_variable *interpolant = in_var(type, "interpolant"); + interpolant->data.must_be_shader_input = 1; + ir_variable *sample_num = in_var(glsl_type::int_type, "sample_num"); + MAKE_SIG(type, fs_gpu_shader5, 2, interpolant, sample_num); + + body.emit(ret(interpolate_at_sample(interpolant, sample_num))); + + return sig; +} + +ir_function_signature * +builtin_builder::_atomic_intrinsic(builtin_available_predicate avail) +{ + ir_variable *counter = in_var(glsl_type::atomic_uint_type, "counter"); + MAKE_INTRINSIC(glsl_type::uint_type, avail, 1, counter); + return sig; +} + +ir_function_signature * +builtin_builder::_atomic_op(const char *intrinsic, + builtin_available_predicate avail) +{ + ir_variable *counter = in_var(glsl_type::atomic_uint_type, "atomic_counter"); + MAKE_SIG(glsl_type::uint_type, avail, 1, counter); + + ir_variable *retval = body.make_temp(glsl_type::uint_type, "atomic_retval"); + body.emit(call(shader->symbols->get_function(intrinsic), retval, + sig->parameters)); + body.emit(ret(retval)); + return sig; +} + +ir_function_signature * +builtin_builder::_min3(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + ir_variable *y = in_var(type, "y"); + ir_variable *z = in_var(type, "z"); + MAKE_SIG(type, shader_trinary_minmax, 3, x, y, z); + + ir_expression *min3 = min2(x, min2(y,z)); + body.emit(ret(min3)); + + return sig; +} + +ir_function_signature * +builtin_builder::_max3(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + ir_variable *y = in_var(type, "y"); + ir_variable *z = in_var(type, "z"); + MAKE_SIG(type, shader_trinary_minmax, 3, x, y, z); + + ir_expression *max3 = max2(x, max2(y,z)); + body.emit(ret(max3)); + + return sig; +} + +ir_function_signature * +builtin_builder::_mid3(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + ir_variable *y = in_var(type, "y"); + ir_variable *z = in_var(type, "z"); + MAKE_SIG(type, shader_trinary_minmax, 3, x, y, z); + + ir_expression *mid3 = max2(min2(x, y), max2(min2(x, z), min2(y, z))); + body.emit(ret(mid3)); + + return sig; +} + +ir_function_signature * +builtin_builder::_image_prototype(const glsl_type *image_type, + const char *intrinsic_name, + unsigned num_arguments, + unsigned flags) +{ + const glsl_type *data_type = glsl_type::get_instance( + image_type->sampler_type, + (flags & IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE ? 4 : 1), + 1); + const glsl_type *ret_type = (flags & IMAGE_FUNCTION_RETURNS_VOID ? + glsl_type::void_type : data_type); + + /* Addressing arguments that are always present. */ + ir_variable *image = in_var(image_type, "image"); + ir_variable *coord = in_var( + glsl_type::ivec(image_type->coordinate_components()), "coord"); + + ir_function_signature *sig = new_sig( + ret_type, shader_image_load_store, 2, image, coord); + + /* Sample index for multisample images. */ + if (image_type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) + sig->parameters.push_tail(in_var(glsl_type::int_type, "sample")); + + /* Data arguments. */ + for (unsigned i = 0; i < num_arguments; ++i) { + char *arg_name = ralloc_asprintf(NULL, "arg%d", i); + sig->parameters.push_tail(in_var(data_type, arg_name)); + ralloc_free(arg_name); + } + + /* Set the maximal set of qualifiers allowed for this image + * built-in. Function calls with arguments having fewer + * qualifiers than present in the prototype are allowed by the + * spec, but not with more, i.e. this will make the compiler + * accept everything that needs to be accepted, and reject cases + * like loads from write-only or stores to read-only images. + */ + image->data.image_read_only = (flags & IMAGE_FUNCTION_READ_ONLY) != 0; + image->data.image_write_only = (flags & IMAGE_FUNCTION_WRITE_ONLY) != 0; + image->data.image_coherent = true; + image->data.image_volatile = true; + image->data.image_restrict = true; + + return sig; +} + +ir_function_signature * +builtin_builder::_image(const glsl_type *image_type, + const char *intrinsic_name, + unsigned num_arguments, + unsigned flags) +{ + ir_function_signature *sig = _image_prototype(image_type, intrinsic_name, + num_arguments, flags); + + if (flags & IMAGE_FUNCTION_EMIT_STUB) { + ir_factory body(&sig->body, mem_ctx); + ir_function *f = shader->symbols->get_function(intrinsic_name); + + if (flags & IMAGE_FUNCTION_RETURNS_VOID) { + body.emit(call(f, NULL, sig->parameters)); + } else { + ir_variable *ret_val = + body.make_temp(sig->return_type, "_ret_val"); + body.emit(call(f, ret_val, sig->parameters)); + body.emit(ret(ret_val)); + } + + sig->is_defined = true; + + } else { + sig->is_intrinsic = true; + } + + return sig; +} + +ir_function_signature * +builtin_builder::_memory_barrier_intrinsic(builtin_available_predicate avail) +{ + MAKE_INTRINSIC(glsl_type::void_type, avail, 0); + return sig; +} + +ir_function_signature * +builtin_builder::_memory_barrier(builtin_available_predicate avail) +{ + MAKE_SIG(glsl_type::void_type, avail, 0); + body.emit(call(shader->symbols->get_function("__intrinsic_memory_barrier"), + NULL, sig->parameters)); + return sig; +} + /** @} */ /******************************************************************************/ /* The singleton instance of builtin_builder. */ static builtin_builder builtins; +static mtx_t builtins_lock = _MTX_INITIALIZER_NP; /** * External API (exposing the built-in module to the rest of the compiler): @@ -3605,19 +4594,34 @@ static builtin_builder builtins; void _mesa_glsl_initialize_builtin_functions() { + mtx_lock(&builtins_lock); builtins.initialize(); + mtx_unlock(&builtins_lock); } void _mesa_glsl_release_builtin_functions() { + mtx_lock(&builtins_lock); builtins.release(); + mtx_unlock(&builtins_lock); } ir_function_signature * _mesa_glsl_find_builtin_function(_mesa_glsl_parse_state *state, const char *name, exec_list *actual_parameters) { - return builtins.find(state, name, actual_parameters); + ir_function_signature * s; + mtx_lock(&builtins_lock); + s = builtins.find(state, name, actual_parameters); + mtx_unlock(&builtins_lock); + return s; } + +gl_shader * +_mesa_glsl_get_builtin_function_shader() +{ + return builtins.shader; +} + /** @} */