From: Anuj Phogat Date: Fri, 5 Sep 2014 19:19:22 +0000 (-0700) Subject: meta: Implement ext_framebuffer_multisample_blit_scaled extension X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=68ee950c78a809db8df821d1c9f0a083b02fd905;p=mesa.git meta: Implement ext_framebuffer_multisample_blit_scaled extension Extension enables doing a multisample buffer resolve and buffer scaling using a single glBlitFrameBuffer() call. Currently, we have this extension implemented in BLORP which is only used by SNB and IVB. This patch implements the extension in meta path which makes it available to Broadwell. Implementation features: - Supports scaled resolves of 2X, 4X and 8X multisample buffers. - Avoids unnecessary shader compilations by storing the pre compiled shaders for each supported sample count. - Uses bilinear filtering for both GL_SCALED_RESOLVE_FASTEST_EXT and GL_SCALED_RESOLVE_NICEST_EXT filter options. This is an allowed behavior in the extension's spec. - I tried doing bicubic filtering for GL_SCALED_RESOLVE_NICEST_EXT filter. It made the edges in the image look little smoother but the image gets blurred causing no overall quality improvement. For now I have dropped the idea of doing different filtering for nicest filter. V2: - Minor changes to simplify the fragment shader. - Refactor the code to move i965 specific sample_map computation out of Meta. We now use ctx->Const.SampleMap{2,4,8}x variables initialized by the driver. - Use a simple msaa resolve shader for scaled resolves with scaling factor = 1.0. V3: - Make changes to create a string out of ctx->Const.SampleMap{2,4,8}x variables and use it in fragment shader. V4: - Make changes to use uint8_t type ctx->Const.SampleMap{2,4,8}x variables. Signed-off-by: Anuj Phogat Reviewed-by: Jordan Justen --- diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h index edc3e8c2090..2c9517b1464 100644 --- a/src/mesa/drivers/common/meta.h +++ b/src/mesa/drivers/common/meta.h @@ -279,6 +279,12 @@ enum blit_msaa_shader { BLIT_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_COPY_UINT, BLIT_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_DEPTH_RESOLVE, BLIT_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_DEPTH_COPY, + BLIT_2X_MSAA_SHADER_2D_MULTISAMPLE_SCALED_RESOLVE, + BLIT_4X_MSAA_SHADER_2D_MULTISAMPLE_SCALED_RESOLVE, + BLIT_8X_MSAA_SHADER_2D_MULTISAMPLE_SCALED_RESOLVE, + BLIT_2X_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_SCALED_RESOLVE, + BLIT_4X_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_SCALED_RESOLVE, + BLIT_8X_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_SCALED_RESOLVE, BLIT_MSAA_SHADER_COUNT, }; diff --git a/src/mesa/drivers/common/meta_blit.c b/src/mesa/drivers/common/meta_blit.c index 770bc4178a2..d78300b802a 100644 --- a/src/mesa/drivers/common/meta_blit.c +++ b/src/mesa/drivers/common/meta_blit.c @@ -54,6 +54,179 @@ /** Return offset in bytes of the field within a vertex struct */ #define OFFSET(FIELD) ((void *) offsetof(struct vertex, FIELD)) +static void +setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx, + struct blit_state *blit, + struct gl_renderbuffer *src_rb, + GLenum target, GLenum filter) +{ + GLint loc_src_width, loc_src_height; + int i, samples; + int shader_offset = 0; + void *mem_ctx = ralloc_context(NULL); + char *fs_source; + char *name, *sample_number; + const uint8_t *sample_map; + char *sample_map_str = rzalloc_size(mem_ctx, 1); + char *sample_map_expr = rzalloc_size(mem_ctx, 1); + char *texel_fetch_macro = rzalloc_size(mem_ctx, 1);; + const char *vs_source; + const char *sampler_array_suffix = ""; + const char *texcoord_type = "vec2"; + float y_scale; + enum blit_msaa_shader shader_index; + + assert(src_rb); + samples = MAX2(src_rb->NumSamples, 1); + y_scale = samples * 0.5; + + /* We expect only power of 2 samples in source multisample buffer. */ + assert((samples & (samples - 1)) == 0); + while (samples >> (shader_offset + 1)) { + shader_offset++; + } + /* Update the assert if we plan to support more than 8X MSAA. */ + assert(shader_offset > 0 && shader_offset < 4); + + assert(target == GL_TEXTURE_2D_MULTISAMPLE || + target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY); + + shader_index = BLIT_2X_MSAA_SHADER_2D_MULTISAMPLE_SCALED_RESOLVE + + shader_offset - 1; + + if (target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) { + shader_index += BLIT_2X_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_SCALED_RESOLVE - + BLIT_2X_MSAA_SHADER_2D_MULTISAMPLE_SCALED_RESOLVE; + sampler_array_suffix = "Array"; + texcoord_type = "vec3"; + } + + if (blit->msaa_shaders[shader_index]) { + _mesa_UseProgram(blit->msaa_shaders[shader_index]); + /* Update the uniform values. */ + loc_src_width = + glGetUniformLocation(blit->msaa_shaders[shader_index], "src_width"); + loc_src_height = + glGetUniformLocation(blit->msaa_shaders[shader_index], "src_height"); + glUniform1f(loc_src_width, src_rb->Width); + glUniform1f(loc_src_height, src_rb->Height); + return; + } + + name = ralloc_asprintf(mem_ctx, "vec4 MSAA scaled resolve"); + + /* Below switch is used to setup the shader expression, which computes + * sample index and map it to to a sample number on hardware. + */ + switch(samples) { + case 2: + sample_number = "sample_map[int(2 * fract(coord.x))]"; + sample_map = ctx->Const.SampleMap2x; + break; + case 4: + sample_number = "sample_map[int(2 * fract(coord.x) + 4 * fract(coord.y))]"; + sample_map = ctx->Const.SampleMap4x; + break; + case 8: + sample_number = "sample_map[int(2 * fract(coord.x) + 8 * fract(coord.y))]"; + sample_map = ctx->Const.SampleMap8x; + break; + default: + _mesa_problem(ctx, "Unsupported sample count %d\n", samples); + } + + /* Create sample map string. */ + for (i = 0 ; i < samples - 1; i++) { + ralloc_asprintf_append(&sample_map_str, "%d, ", sample_map[i]); + } + ralloc_asprintf_append(&sample_map_str, "%d", sample_map[samples - 1]); + + /* Create sample map expression using above string. */ + ralloc_asprintf_append(&sample_map_expr, + " const int sample_map[%d] = int[%d](%s);\n", + samples, samples, sample_map_str); + + ralloc_asprintf_append(&texel_fetch_macro, + "#define TEXEL_FETCH(coord) texelFetch(texSampler, i%s(coord), %s);\n", + texcoord_type, sample_number); + + vs_source = ralloc_asprintf(mem_ctx, + "#version 130\n" + "in vec2 position;\n" + "in %s textureCoords;\n" + "out %s texCoords;\n" + "void main()\n" + "{\n" + " texCoords = textureCoords;\n" + " gl_Position = vec4(position, 0.0, 1.0);\n" + "}\n", + texcoord_type, + texcoord_type); + fs_source = ralloc_asprintf(mem_ctx, + "#version 130\n" + "#extension GL_ARB_texture_multisample : enable\n" + "uniform sampler2DMS%s texSampler;\n" + "uniform float src_width, src_height;\n" + "in %s texCoords;\n" + "out vec4 out_color;\n" + "\n" + "void main()\n" + "{\n" + "%s" + " vec2 interp;\n" + " const vec2 scale = vec2(2.0f, %ff);\n" + " const vec2 scale_inv = vec2(0.5f, %ff);\n" + " const vec2 s_0_offset = vec2(0.25f, %ff);\n" + " vec2 s_0_coord, s_1_coord, s_2_coord, s_3_coord;\n" + " vec4 s_0_color, s_1_color, s_2_color, s_3_color;\n" + " vec4 x_0_color, x_1_color;\n" + " vec2 tex_coord = texCoords - s_0_offset;\n" + "\n" + " tex_coord *= scale;\n" + " clamp(tex_coord.x, 0.0f, scale.x * src_width - 1.0f);\n" + " clamp(tex_coord.y, 0.0f, scale.y * src_height - 1.0f);\n" + " interp = fract(tex_coord);\n" + " tex_coord = ivec2(tex_coord) * scale_inv;\n" + "\n" + " /* Compute the sample coordinates used for filtering. */\n" + " s_0_coord = tex_coord;\n" + " s_1_coord = tex_coord + vec2(scale_inv.x, 0.0f);\n" + " s_2_coord = tex_coord + vec2(0.0f, scale_inv.y);\n" + " s_3_coord = tex_coord + vec2(scale_inv.x, scale_inv.y);\n" + "\n" + " /* Fetch sample color values. */\n" + "%s" + " s_0_color = TEXEL_FETCH(s_0_coord)\n" + " s_1_color = TEXEL_FETCH(s_1_coord)\n" + " s_2_color = TEXEL_FETCH(s_2_coord)\n" + " s_3_color = TEXEL_FETCH(s_3_coord)\n" + "#undef TEXEL_FETCH\n" + "\n" + " /* Do bilinear filtering on sample colors. */\n" + " x_0_color = mix(s_0_color, s_1_color, interp.x);\n" + " x_1_color = mix(s_2_color, s_3_color, interp.x);\n" + " out_color = mix(x_0_color, x_1_color, interp.y);\n" + "}\n", + sampler_array_suffix, + texcoord_type, + sample_map_expr, + y_scale, + 1.0f / y_scale, + 1.0f / samples, + texel_fetch_macro); + + _mesa_meta_compile_and_link_program(ctx, vs_source, fs_source, name, + &blit->msaa_shaders[shader_index]); + loc_src_width = + glGetUniformLocation(blit->msaa_shaders[shader_index], "src_width"); + loc_src_height = + glGetUniformLocation(blit->msaa_shaders[shader_index], "src_height"); + glUniform1f(loc_src_width, src_rb->Width); + glUniform1f(loc_src_height, src_rb->Height); + + ralloc_free(mem_ctx); +} + static void setup_glsl_msaa_blit_shader(struct gl_context *ctx, struct blit_state *blit, @@ -334,9 +507,14 @@ static void setup_glsl_blit_framebuffer(struct gl_context *ctx, struct blit_state *blit, struct gl_renderbuffer *src_rb, - GLenum target) + GLenum target, GLenum filter, + bool is_scaled_blit) { unsigned texcoord_size; + bool is_target_multisample = target == GL_TEXTURE_2D_MULTISAMPLE || + target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY; + bool is_filter_scaled_resolve = filter == GL_SCALED_RESOLVE_FASTEST_EXT || + filter == GL_SCALED_RESOLVE_NICEST_EXT; /* target = GL_TEXTURE_RECTANGLE is not supported in GLES 3.0 */ assert(_mesa_is_desktop_gl(ctx) || target == GL_TEXTURE_2D); @@ -346,8 +524,9 @@ setup_glsl_blit_framebuffer(struct gl_context *ctx, _mesa_meta_setup_vertex_objects(&blit->VAO, &blit->VBO, true, 2, texcoord_size, 0); - if (target == GL_TEXTURE_2D_MULTISAMPLE || - target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) { + if (is_target_multisample && is_filter_scaled_resolve && is_scaled_blit) { + setup_glsl_msaa_blit_scaled_shader(ctx, blit, src_rb, target, filter); + } else if (is_target_multisample) { setup_glsl_msaa_blit_shader(ctx, blit, src_rb, target); } else { _mesa_meta_setup_blit_shader(ctx, target, &blit->shaders); @@ -377,6 +556,9 @@ blitframebuffer_texture(struct gl_context *ctx, const GLint dstY = MIN2(dstY0, dstY1); const GLint dstW = abs(dstX1 - dstX0); const GLint dstH = abs(dstY1 - dstY0); + const int srcW = abs(srcX1 - srcX0); + const int srcH = abs(srcY1 - srcY0); + bool scaled_blit = false; struct gl_texture_object *texObj; GLuint srcLevel; GLenum target; @@ -386,11 +568,6 @@ blitframebuffer_texture(struct gl_context *ctx, if (rb->NumSamples && !ctx->Extensions.ARB_texture_multisample) return false; - if (filter == GL_SCALED_RESOLVE_FASTEST_EXT || - filter == GL_SCALED_RESOLVE_NICEST_EXT) { - filter = GL_LINEAR; - } - _mesa_meta_fb_tex_blit_begin(ctx, &fb_tex_blit); if (readAtt->Texture && @@ -418,8 +595,6 @@ blitframebuffer_texture(struct gl_context *ctx, } } else { GLenum tex_base_format; - int srcW = abs(srcX1 - srcX0); - int srcH = abs(srcY1 - srcY0); /* Fall back to doing a CopyTexSubImage to get the destination * renderbuffer into a texture. */ @@ -462,8 +637,10 @@ blitframebuffer_texture(struct gl_context *ctx, fb_tex_blit.maxLevelSave = texObj->MaxLevel; fb_tex_blit.stencilSamplingSave = texObj->StencilSampling; + scaled_blit = dstW != srcW || dstH != srcH; + if (glsl_version) { - setup_glsl_blit_framebuffer(ctx, blit, rb, target); + setup_glsl_blit_framebuffer(ctx, blit, rb, target, filter, scaled_blit); } else { _mesa_meta_setup_ff_tnl_for_blit(&ctx->Meta->Blit.VAO, @@ -663,14 +840,17 @@ _mesa_meta_setup_sampler(struct gl_context *ctx, GLenum target, GLenum filter, GLuint srcLevel) { GLuint sampler; + GLenum tex_filter = (filter == GL_SCALED_RESOLVE_FASTEST_EXT || + filter == GL_SCALED_RESOLVE_NICEST_EXT) ? + GL_NEAREST : filter; _mesa_GenSamplers(1, &sampler); _mesa_BindSampler(ctx->Texture.CurrentUnit, sampler); /* Prepare src texture state */ _mesa_BindTexture(target, texObj->Name); - _mesa_SamplerParameteri(sampler, GL_TEXTURE_MIN_FILTER, filter); - _mesa_SamplerParameteri(sampler, GL_TEXTURE_MAG_FILTER, filter); + _mesa_SamplerParameteri(sampler, GL_TEXTURE_MIN_FILTER, tex_filter); + _mesa_SamplerParameteri(sampler, GL_TEXTURE_MAG_FILTER, tex_filter); if (target != GL_TEXTURE_RECTANGLE_ARB) { _mesa_TexParameteri(target, GL_TEXTURE_BASE_LEVEL, srcLevel); _mesa_TexParameteri(target, GL_TEXTURE_MAX_LEVEL, srcLevel);