X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fauxiliary%2Fvl%2Fvl_zscan.c;h=769fc71ff4243219efd1ffd21fa54303ac81ef12;hb=2dad9fde505c7d8e97f57f4a5a3f495f902f94f2;hp=ec806982226b14f96850ba7ac0ae75e257da984c;hpb=828540e491d88b9b6217e6568873a78462919ae8;p=mesa.git diff --git a/src/gallium/auxiliary/vl/vl_zscan.c b/src/gallium/auxiliary/vl/vl_zscan.c index ec806982226..769fc71ff42 100644 --- a/src/gallium/auxiliary/vl/vl_zscan.c +++ b/src/gallium/auxiliary/vl/vl_zscan.c @@ -18,7 +18,7 @@ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @@ -27,25 +27,33 @@ #include -#include -#include +#include "pipe/p_screen.h" +#include "pipe/p_context.h" -#include -#include -#include +#include "util/u_draw.h" +#include "util/u_sampler.h" +#include "util/u_inlines.h" +#include "util/u_memory.h" -#include +#include "tgsi/tgsi_ureg.h" -#include -#include +#include "vl_defines.h" +#include "vl_types.h" #include "vl_zscan.h" #include "vl_vertex_buffers.h" enum VS_OUTPUT { - VS_O_VPOS, - VS_O_VTEX + VS_O_VPOS = 0, + VS_O_VTEX = 0 +}; + +const int vl_zscan_normal_16[] = +{ + /* Zig-Zag scan pattern */ + 0, 1, 4, 8, 5, 2, 3, 6, + 9,12,13,10, 7,11,14,15 }; const int vl_zscan_linear[] = @@ -87,31 +95,51 @@ const int vl_zscan_alternate[] = 38,46,54,62,39,47,55,63 }; +const int vl_zscan_h265_up_right_diagonal_16[] = +{ + /* Up-right diagonal scan order for 4x4 blocks - see H.265 section 6.5.3. */ + 0, 4, 1, 8, 5, 2, 12, 9, + 6, 3, 13, 10, 7, 14, 11, 15, +}; + +const int vl_zscan_h265_up_right_diagonal[] = +{ + /* Up-right diagonal scan order for 8x8 blocks - see H.265 section 6.5.3. */ + 0, 8, 1, 16, 9, 2, 24, 17, + 10, 3, 32, 25, 18, 11, 4, 40, + 33, 26, 19, 12, 5, 48, 41, 34, + 27, 20, 13, 6, 56, 49, 42, 35, + 28, 21, 14, 7, 57, 50, 43, 36, + 29, 22, 15, 58, 51, 44, 37, 30, + 23, 59, 52, 45, 38, 31, 60, 53, + 46, 39, 61, 54, 47, 62, 55, 63, +}; + + static void * create_vert_shader(struct vl_zscan *zscan) { struct ureg_program *shader; - - struct ureg_src scale, instance; - struct ureg_src vrect, vpos; - + struct ureg_src scale; + struct ureg_src vrect, vpos, block_num; struct ureg_dst tmp; - struct ureg_dst o_vpos, o_vtex[zscan->num_channels]; - - signed i; + struct ureg_dst o_vpos; + struct ureg_dst *o_vtex; + unsigned i; - shader = ureg_create(TGSI_PROCESSOR_VERTEX); + shader = ureg_create(PIPE_SHADER_VERTEX); if (!shader) return NULL; - scale = ureg_imm2f(shader, - (float)BLOCK_WIDTH / zscan->buffer_width, - (float)BLOCK_HEIGHT / zscan->buffer_height); + o_vtex = MALLOC(zscan->num_channels * sizeof(struct ureg_dst)); - instance = ureg_DECL_system_value(shader, 0, TGSI_SEMANTIC_INSTANCEID, 0); + scale = ureg_imm2f(shader, + (float)VL_BLOCK_WIDTH / zscan->buffer_width, + (float)VL_BLOCK_HEIGHT / zscan->buffer_height); vrect = ureg_DECL_vs_input(shader, VS_I_RECT); vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); + block_num = ureg_DECL_vs_input(shader, VS_I_BLOCK_NUM); tmp = ureg_DECL_temporary(shader); @@ -136,26 +164,30 @@ create_vert_shader(struct vl_zscan *zscan) ureg_MUL(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(tmp), scale); ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f)); - ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XZ), instance, + ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XW), ureg_scalar(block_num, TGSI_SWIZZLE_X), ureg_imm1f(shader, 1.0f / zscan->blocks_per_line)); ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X)); - ureg_FLR(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_src(tmp)); + ureg_FLR(shader, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_src(tmp)); for (i = 0; i < zscan->num_channels; ++i) { ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), - ureg_imm1f(shader, 1.0f / (zscan->blocks_per_line * BLOCK_WIDTH) * (i - (signed)zscan->num_channels / 2))); + ureg_imm1f(shader, 1.0f / (zscan->blocks_per_line * VL_BLOCK_WIDTH) + * ((signed)i - (signed)zscan->num_channels / 2))); ureg_MAD(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_X), vrect, ureg_imm1f(shader, 1.0f / zscan->blocks_per_line), ureg_src(tmp)); ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Y), vrect); - ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Z), ureg_src(tmp), + ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Z), vpos); + ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_W), ureg_src(tmp), ureg_imm1f(shader, (float)zscan->blocks_per_line / zscan->blocks_total)); } ureg_release_temporary(shader, tmp); ureg_END(shader); + FREE(o_vtex); + return ureg_create_shader_and_destroy(shader, zscan->pipe); } @@ -163,28 +195,32 @@ static void * create_frag_shader(struct vl_zscan *zscan) { struct ureg_program *shader; - struct ureg_src vtex[zscan->num_channels]; + struct ureg_src *vtex; - struct ureg_src src, scan, quant; + struct ureg_src samp_src, samp_scan, samp_quant; - struct ureg_dst tmp[zscan->num_channels]; - struct ureg_dst fragment; + struct ureg_dst *tmp; + struct ureg_dst quant, fragment; unsigned i; - shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); + shader = ureg_create(PIPE_SHADER_FRAGMENT); if (!shader) return NULL; + vtex = MALLOC(zscan->num_channels * sizeof(struct ureg_src)); + tmp = MALLOC(zscan->num_channels * sizeof(struct ureg_dst)); + for (i = 0; i < zscan->num_channels; ++i) vtex[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i, TGSI_INTERPOLATE_LINEAR); - src = ureg_DECL_sampler(shader, 0); - scan = ureg_DECL_sampler(shader, 1); - quant = ureg_DECL_sampler(shader, 2); + samp_src = ureg_DECL_sampler(shader, 0); + samp_scan = ureg_DECL_sampler(shader, 1); + samp_quant = ureg_DECL_sampler(shader, 2); for (i = 0; i < zscan->num_channels; ++i) tmp[i] = ureg_DECL_temporary(shader); + quant = ureg_DECL_temporary(shader); fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); @@ -194,22 +230,26 @@ create_frag_shader(struct vl_zscan *zscan) * fragment = tex(tmp, 0) * quant */ for (i = 0; i < zscan->num_channels; ++i) - ureg_TEX(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_X), TGSI_TEXTURE_2D, vtex[i], scan); + ureg_TEX(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_X), TGSI_TEXTURE_2D, vtex[i], samp_scan); for (i = 0; i < zscan->num_channels; ++i) - ureg_MOV(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_Y), ureg_scalar(vtex[i], TGSI_SWIZZLE_Z)); + ureg_MOV(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_Y), ureg_scalar(vtex[i], TGSI_SWIZZLE_W)); - for (i = 0; i < zscan->num_channels; ++i) - ureg_TEX(shader, tmp[i], TGSI_TEXTURE_2D, ureg_src(tmp[i]), src); + for (i = 0; i < zscan->num_channels; ++i) { + ureg_TEX(shader, ureg_writemask(tmp[0], TGSI_WRITEMASK_X << i), TGSI_TEXTURE_2D, ureg_src(tmp[i]), samp_src); + ureg_TEX(shader, ureg_writemask(quant, TGSI_WRITEMASK_X << i), TGSI_TEXTURE_3D, vtex[i], samp_quant); + } - // TODO: Fetch quant and use it - for (i = 0; i < zscan->num_channels; ++i) - ureg_MUL(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X << i), ureg_src(tmp[i]), ureg_imm1f(shader, 1.0f)); + ureg_MUL(shader, quant, ureg_src(quant), ureg_imm1f(shader, 16.0f)); + ureg_MUL(shader, fragment, ureg_src(tmp[0]), ureg_src(quant)); for (i = 0; i < zscan->num_channels; ++i) ureg_release_temporary(shader, tmp[i]); ureg_END(shader); + FREE(vtex); + FREE(tmp); + return ureg_create_shader_and_destroy(shader, zscan->pipe); } @@ -255,7 +295,11 @@ init_state(struct vl_zscan *zscan) assert(zscan); memset(&rs_state, 0, sizeof(rs_state)); - rs_state.gl_rasterization_rules = true; + rs_state.half_pixel_center = true; + rs_state.bottom_edge_rule = true; + rs_state.depth_clip_near = 1; + rs_state.depth_clip_far = 1; + zscan->rs_state = zscan->pipe->create_rasterizer_state(zscan->pipe, &rs_state); if (!zscan->rs_state) goto error_rs_state; @@ -283,7 +327,7 @@ init_state(struct vl_zscan *zscan) memset(&sampler, 0, sizeof(sampler)); sampler.wrap_s = PIPE_TEX_WRAP_REPEAT; sampler.wrap_t = PIPE_TEX_WRAP_REPEAT; - sampler.wrap_r = PIPE_TEX_WRAP_REPEAT; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; @@ -328,7 +372,7 @@ cleanup_state(struct vl_zscan *zscan) struct pipe_sampler_view * vl_zscan_layout(struct pipe_context *pipe, const int layout[64], unsigned blocks_per_line) { - const unsigned total_size = blocks_per_line * BLOCK_WIDTH * BLOCK_HEIGHT; + const unsigned total_size = blocks_per_line * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT; int patched_layout[64]; @@ -341,8 +385,8 @@ vl_zscan_layout(struct pipe_context *pipe, const int layout[64], unsigned blocks struct pipe_box rect = { 0, 0, 0, - BLOCK_WIDTH * blocks_per_line, - BLOCK_HEIGHT, + VL_BLOCK_WIDTH * blocks_per_line, + VL_BLOCK_HEIGHT, 1 }; @@ -354,8 +398,8 @@ vl_zscan_layout(struct pipe_context *pipe, const int layout[64], unsigned blocks memset(&res_tmpl, 0, sizeof(res_tmpl)); res_tmpl.target = PIPE_TEXTURE_2D; res_tmpl.format = PIPE_FORMAT_R32_FLOAT; - res_tmpl.width0 = BLOCK_WIDTH * blocks_per_line; - res_tmpl.height0 = BLOCK_HEIGHT; + res_tmpl.width0 = VL_BLOCK_WIDTH * blocks_per_line; + res_tmpl.height0 = VL_BLOCK_HEIGHT; res_tmpl.depth0 = 1; res_tmpl.array_size = 1; res_tmpl.usage = PIPE_USAGE_IMMUTABLE; @@ -365,34 +409,26 @@ vl_zscan_layout(struct pipe_context *pipe, const int layout[64], unsigned blocks if (!res) goto error_resource; - buf_transfer = pipe->get_transfer - ( - pipe, res, - 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, - &rect - ); - if (!buf_transfer) - goto error_transfer; - - pitch = buf_transfer->stride / sizeof(float); - - f = pipe->transfer_map(pipe, buf_transfer); + f = pipe->transfer_map(pipe, res, + 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD_RANGE, + &rect, &buf_transfer); if (!f) goto error_map; + pitch = buf_transfer->stride / sizeof(float); + for (i = 0; i < blocks_per_line; ++i) - for (y = 0; y < BLOCK_HEIGHT; ++y) - for (x = 0; x < BLOCK_WIDTH; ++x) { - float addr = patched_layout[x + y * BLOCK_WIDTH] + - i * BLOCK_WIDTH * BLOCK_HEIGHT; + for (y = 0; y < VL_BLOCK_HEIGHT; ++y) + for (x = 0; x < VL_BLOCK_WIDTH; ++x) { + float addr = patched_layout[x + y * VL_BLOCK_WIDTH] + + i * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT; addr /= total_size; - f[i * BLOCK_WIDTH + y * pitch + x] = addr; + f[i * VL_BLOCK_WIDTH + y * pitch + x] = addr; } pipe->transfer_unmap(pipe, buf_transfer); - pipe->transfer_destroy(pipe, buf_transfer); memset(&sv_tmpl, 0, sizeof(sv_tmpl)); u_sampler_view_default_template(&sv_tmpl, res, res->format); @@ -404,24 +440,12 @@ vl_zscan_layout(struct pipe_context *pipe, const int layout[64], unsigned blocks return sv; error_map: - pipe->transfer_destroy(pipe, buf_transfer); - -error_transfer: pipe_resource_reference(&res, NULL); error_resource: return NULL; } -#if 0 -// TODO -struct pipe_sampler_view * -vl_zscan_normal(struct pipe_context *pipe, unsigned blocks_per_line); - -struct pipe_sampler_view * -vl_zscan_alternate(struct pipe_context *pipe, unsigned blocks_per_line); -#endif - bool vl_zscan_init(struct vl_zscan *zscan, struct pipe_context *pipe, unsigned buffer_width, unsigned buffer_height, @@ -457,38 +481,53 @@ vl_zscan_cleanup(struct vl_zscan *zscan) cleanup_state(zscan); } -#if 0 -// TODO -void -vl_zscan_upload_quant(struct vl_zscan *zscan, ...); -#endif - bool vl_zscan_init_buffer(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer, struct pipe_sampler_view *src, struct pipe_surface *dst) { + struct pipe_resource res_tmpl, *res; + struct pipe_sampler_view sv_tmpl; + assert(zscan && buffer); memset(buffer, 0, sizeof(struct vl_zscan_buffer)); - buffer->zscan = zscan; - pipe_sampler_view_reference(&buffer->src, src); buffer->viewport.scale[0] = dst->width; buffer->viewport.scale[1] = dst->height; buffer->viewport.scale[2] = 1; - buffer->viewport.scale[3] = 1; buffer->viewport.translate[0] = 0; buffer->viewport.translate[1] = 0; buffer->viewport.translate[2] = 0; - buffer->viewport.translate[3] = 0; buffer->fb_state.width = dst->width; buffer->fb_state.height = dst->height; buffer->fb_state.nr_cbufs = 1; pipe_surface_reference(&buffer->fb_state.cbufs[0], dst); + memset(&res_tmpl, 0, sizeof(res_tmpl)); + res_tmpl.target = PIPE_TEXTURE_3D; + res_tmpl.format = PIPE_FORMAT_R8_UNORM; + res_tmpl.width0 = VL_BLOCK_WIDTH * zscan->blocks_per_line; + res_tmpl.height0 = VL_BLOCK_HEIGHT; + res_tmpl.depth0 = 2; + res_tmpl.array_size = 1; + res_tmpl.usage = PIPE_USAGE_IMMUTABLE; + res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW; + + res = zscan->pipe->screen->resource_create(zscan->pipe->screen, &res_tmpl); + if (!res) + return false; + + memset(&sv_tmpl, 0, sizeof(sv_tmpl)); + u_sampler_view_default_template(&sv_tmpl, res, res->format); + sv_tmpl.swizzle_r = sv_tmpl.swizzle_g = sv_tmpl.swizzle_b = sv_tmpl.swizzle_a = TGSI_SWIZZLE_X; + buffer->quant = zscan->pipe->create_sampler_view(zscan->pipe, res, &sv_tmpl); + pipe_resource_reference(&res, NULL); + if (!buffer->quant) + return false; + return true; } @@ -513,20 +552,59 @@ vl_zscan_set_layout(struct vl_zscan_buffer *buffer, struct pipe_sampler_view *la } void -vl_zscan_render(struct vl_zscan_buffer *buffer, unsigned num_instances) +vl_zscan_upload_quant(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer, + const uint8_t matrix[64], bool intra) { - struct vl_zscan *zscan; + struct pipe_context *pipe; + struct pipe_transfer *buf_transfer; + unsigned x, y, i, pitch; + uint8_t *data; + + struct pipe_box rect = + { + 0, 0, intra ? 1 : 0, + VL_BLOCK_WIDTH, + VL_BLOCK_HEIGHT, + 1 + }; assert(buffer); + assert(matrix); + + pipe = zscan->pipe; + + rect.width *= zscan->blocks_per_line; + + data = pipe->transfer_map(pipe, buffer->quant->texture, + 0, PIPE_TRANSFER_WRITE | + PIPE_TRANSFER_DISCARD_RANGE, + &rect, &buf_transfer); + if (!data) + return; - zscan = buffer->zscan; + pitch = buf_transfer->stride; + + for (i = 0; i < zscan->blocks_per_line; ++i) + for (y = 0; y < VL_BLOCK_HEIGHT; ++y) + for (x = 0; x < VL_BLOCK_WIDTH; ++x) + data[i * VL_BLOCK_WIDTH + y * pitch + x] = matrix[x + y * VL_BLOCK_WIDTH]; + + pipe->transfer_unmap(pipe, buf_transfer); +} + +void +vl_zscan_render(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer, unsigned num_instances) +{ + assert(buffer); zscan->pipe->bind_rasterizer_state(zscan->pipe, zscan->rs_state); zscan->pipe->bind_blend_state(zscan->pipe, zscan->blend); - zscan->pipe->bind_fragment_sampler_states(zscan->pipe, 2, zscan->samplers); + zscan->pipe->bind_sampler_states(zscan->pipe, PIPE_SHADER_FRAGMENT, + 0, 3, zscan->samplers); zscan->pipe->set_framebuffer_state(zscan->pipe, &buffer->fb_state); - zscan->pipe->set_viewport_state(zscan->pipe, &buffer->viewport); - zscan->pipe->set_fragment_sampler_views(zscan->pipe, 2, &buffer->src); + zscan->pipe->set_viewport_states(zscan->pipe, 0, 1, &buffer->viewport); + zscan->pipe->set_sampler_views(zscan->pipe, PIPE_SHADER_FRAGMENT, + 0, 3, &buffer->src); zscan->pipe->bind_vs_state(zscan->pipe, zscan->vs); zscan->pipe->bind_fs_state(zscan->pipe, zscan->fs); util_draw_arrays_instanced(zscan->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);