1 /**************************************************************************
3 * Copyright 2011 Christian König
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 #include "pipe/p_screen.h"
31 #include "pipe/p_context.h"
33 #include "util/u_draw.h"
34 #include "util/u_sampler.h"
35 #include "util/u_inlines.h"
36 #include "util/u_memory.h"
38 #include "tgsi/tgsi_ureg.h"
40 #include "vl_defines.h"
44 #include "vl_vertex_buffers.h"
52 const int vl_zscan_normal_16
[] =
54 /* Zig-Zag scan pattern */
55 0, 1, 4, 8, 5, 2, 3, 6,
56 9,12,13,10, 7,11,14,15
59 const int vl_zscan_linear
[] =
61 /* Linear scan pattern */
62 0, 1, 2, 3, 4, 5, 6, 7,
63 8, 9,10,11,12,13,14,15,
64 16,17,18,19,20,21,22,23,
65 24,25,26,27,28,29,30,31,
66 32,33,34,35,36,37,38,39,
67 40,41,42,43,44,45,46,47,
68 48,49,50,51,52,53,54,55,
69 56,57,58,59,60,61,62,63
72 const int vl_zscan_normal
[] =
74 /* Zig-Zag scan pattern */
75 0, 1, 8,16, 9, 2, 3,10,
76 17,24,32,25,18,11, 4, 5,
77 12,19,26,33,40,48,41,34,
78 27,20,13, 6, 7,14,21,28,
79 35,42,49,56,57,50,43,36,
80 29,22,15,23,30,37,44,51,
81 58,59,52,45,38,31,39,46,
82 53,60,61,54,47,55,62,63
85 const int vl_zscan_alternate
[] =
87 /* Alternate scan pattern */
88 0, 8,16,24, 1, 9, 2,10,
89 17,25,32,40,48,56,57,49,
90 41,33,26,18, 3,11, 4,12,
91 19,27,34,42,50,58,35,43,
92 51,59,20,28, 5,13, 6,14,
93 21,29,36,44,52,60,37,45,
94 53,61,22,30, 7,15,23,31,
95 38,46,54,62,39,47,55,63
98 const int vl_zscan_h265_up_right_diagonal_16
[] =
100 /* Up-right diagonal scan order for 4x4 blocks - see H.265 section 6.5.3. */
101 0, 4, 1, 8, 5, 2, 12, 9,
102 6, 3, 13, 10, 7, 14, 11, 15,
105 const int vl_zscan_h265_up_right_diagonal
[] =
107 /* Up-right diagonal scan order for 8x8 blocks - see H.265 section 6.5.3. */
108 0, 8, 1, 16, 9, 2, 24, 17,
109 10, 3, 32, 25, 18, 11, 4, 40,
110 33, 26, 19, 12, 5, 48, 41, 34,
111 27, 20, 13, 6, 56, 49, 42, 35,
112 28, 21, 14, 7, 57, 50, 43, 36,
113 29, 22, 15, 58, 51, 44, 37, 30,
114 23, 59, 52, 45, 38, 31, 60, 53,
115 46, 39, 61, 54, 47, 62, 55, 63,
120 create_vert_shader(struct vl_zscan
*zscan
)
122 struct ureg_program
*shader
;
123 struct ureg_src scale
;
124 struct ureg_src vrect
, vpos
, block_num
;
126 struct ureg_dst o_vpos
;
127 struct ureg_dst
*o_vtex
;
130 shader
= ureg_create(PIPE_SHADER_VERTEX
);
134 o_vtex
= MALLOC(zscan
->num_channels
* sizeof(struct ureg_dst
));
136 scale
= ureg_imm2f(shader
,
137 (float)VL_BLOCK_WIDTH
/ zscan
->buffer_width
,
138 (float)VL_BLOCK_HEIGHT
/ zscan
->buffer_height
);
140 vrect
= ureg_DECL_vs_input(shader
, VS_I_RECT
);
141 vpos
= ureg_DECL_vs_input(shader
, VS_I_VPOS
);
142 block_num
= ureg_DECL_vs_input(shader
, VS_I_BLOCK_NUM
);
144 tmp
= ureg_DECL_temporary(shader
);
146 o_vpos
= ureg_DECL_output(shader
, TGSI_SEMANTIC_POSITION
, VS_O_VPOS
);
148 for (i
= 0; i
< zscan
->num_channels
; ++i
)
149 o_vtex
[i
] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_VTEX
+ i
);
152 * o_vpos.xy = (vpos + vrect) * scale
155 * tmp.xy = InstanceID / blocks_per_line
156 * tmp.x = frac(tmp.x)
157 * tmp.y = floor(tmp.y)
159 * o_vtex.x = vrect.x / blocks_per_line + tmp.x
161 * o_vtex.z = tmp.z * blocks_per_line / blocks_total
163 ureg_ADD(shader
, ureg_writemask(tmp
, TGSI_WRITEMASK_XY
), vpos
, vrect
);
164 ureg_MUL(shader
, ureg_writemask(o_vpos
, TGSI_WRITEMASK_XY
), ureg_src(tmp
), scale
);
165 ureg_MOV(shader
, ureg_writemask(o_vpos
, TGSI_WRITEMASK_ZW
), ureg_imm1f(shader
, 1.0f
));
167 ureg_MUL(shader
, ureg_writemask(tmp
, TGSI_WRITEMASK_XW
), ureg_scalar(block_num
, TGSI_SWIZZLE_X
),
168 ureg_imm1f(shader
, 1.0f
/ zscan
->blocks_per_line
));
170 ureg_FRC(shader
, ureg_writemask(tmp
, TGSI_WRITEMASK_Y
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
));
171 ureg_FLR(shader
, ureg_writemask(tmp
, TGSI_WRITEMASK_W
), ureg_src(tmp
));
173 for (i
= 0; i
< zscan
->num_channels
; ++i
) {
174 ureg_ADD(shader
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_Y
),
175 ureg_imm1f(shader
, 1.0f
/ (zscan
->blocks_per_line
* VL_BLOCK_WIDTH
)
176 * ((signed)i
- (signed)zscan
->num_channels
/ 2)));
178 ureg_MAD(shader
, ureg_writemask(o_vtex
[i
], TGSI_WRITEMASK_X
), vrect
,
179 ureg_imm1f(shader
, 1.0f
/ zscan
->blocks_per_line
), ureg_src(tmp
));
180 ureg_MOV(shader
, ureg_writemask(o_vtex
[i
], TGSI_WRITEMASK_Y
), vrect
);
181 ureg_MOV(shader
, ureg_writemask(o_vtex
[i
], TGSI_WRITEMASK_Z
), vpos
);
182 ureg_MUL(shader
, ureg_writemask(o_vtex
[i
], TGSI_WRITEMASK_W
), ureg_src(tmp
),
183 ureg_imm1f(shader
, (float)zscan
->blocks_per_line
/ zscan
->blocks_total
));
186 ureg_release_temporary(shader
, tmp
);
191 return ureg_create_shader_and_destroy(shader
, zscan
->pipe
);
195 create_frag_shader(struct vl_zscan
*zscan
)
197 struct ureg_program
*shader
;
198 struct ureg_src
*vtex
;
200 struct ureg_src samp_src
, samp_scan
, samp_quant
;
202 struct ureg_dst
*tmp
;
203 struct ureg_dst quant
, fragment
;
207 shader
= ureg_create(PIPE_SHADER_FRAGMENT
);
211 vtex
= MALLOC(zscan
->num_channels
* sizeof(struct ureg_src
));
212 tmp
= MALLOC(zscan
->num_channels
* sizeof(struct ureg_dst
));
214 for (i
= 0; i
< zscan
->num_channels
; ++i
)
215 vtex
[i
] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_VTEX
+ i
, TGSI_INTERPOLATE_LINEAR
);
217 samp_src
= ureg_DECL_sampler(shader
, 0);
218 samp_scan
= ureg_DECL_sampler(shader
, 1);
219 samp_quant
= ureg_DECL_sampler(shader
, 2);
221 for (i
= 0; i
< zscan
->num_channels
; ++i
)
222 tmp
[i
] = ureg_DECL_temporary(shader
);
223 quant
= ureg_DECL_temporary(shader
);
225 fragment
= ureg_DECL_output(shader
, TGSI_SEMANTIC_COLOR
, 0);
228 * tmp.x = tex(vtex, 1)
230 * fragment = tex(tmp, 0) * quant
232 for (i
= 0; i
< zscan
->num_channels
; ++i
)
233 ureg_TEX(shader
, ureg_writemask(tmp
[i
], TGSI_WRITEMASK_X
), TGSI_TEXTURE_2D
, vtex
[i
], samp_scan
);
235 for (i
= 0; i
< zscan
->num_channels
; ++i
)
236 ureg_MOV(shader
, ureg_writemask(tmp
[i
], TGSI_WRITEMASK_Y
), ureg_scalar(vtex
[i
], TGSI_SWIZZLE_W
));
238 for (i
= 0; i
< zscan
->num_channels
; ++i
) {
239 ureg_TEX(shader
, ureg_writemask(tmp
[0], TGSI_WRITEMASK_X
<< i
), TGSI_TEXTURE_2D
, ureg_src(tmp
[i
]), samp_src
);
240 ureg_TEX(shader
, ureg_writemask(quant
, TGSI_WRITEMASK_X
<< i
), TGSI_TEXTURE_3D
, vtex
[i
], samp_quant
);
243 ureg_MUL(shader
, quant
, ureg_src(quant
), ureg_imm1f(shader
, 16.0f
));
244 ureg_MUL(shader
, fragment
, ureg_src(tmp
[0]), ureg_src(quant
));
246 for (i
= 0; i
< zscan
->num_channels
; ++i
)
247 ureg_release_temporary(shader
, tmp
[i
]);
253 return ureg_create_shader_and_destroy(shader
, zscan
->pipe
);
257 init_shaders(struct vl_zscan
*zscan
)
261 zscan
->vs
= create_vert_shader(zscan
);
265 zscan
->fs
= create_frag_shader(zscan
);
272 zscan
->pipe
->delete_vs_state(zscan
->pipe
, zscan
->vs
);
279 cleanup_shaders(struct vl_zscan
*zscan
)
283 zscan
->pipe
->delete_vs_state(zscan
->pipe
, zscan
->vs
);
284 zscan
->pipe
->delete_fs_state(zscan
->pipe
, zscan
->fs
);
288 init_state(struct vl_zscan
*zscan
)
290 struct pipe_blend_state blend
;
291 struct pipe_rasterizer_state rs_state
;
292 struct pipe_sampler_state sampler
;
297 memset(&rs_state
, 0, sizeof(rs_state
));
298 rs_state
.half_pixel_center
= true;
299 rs_state
.bottom_edge_rule
= true;
300 rs_state
.depth_clip_near
= 1;
301 rs_state
.depth_clip_far
= 1;
303 zscan
->rs_state
= zscan
->pipe
->create_rasterizer_state(zscan
->pipe
, &rs_state
);
304 if (!zscan
->rs_state
)
307 memset(&blend
, 0, sizeof blend
);
309 blend
.independent_blend_enable
= 0;
310 blend
.rt
[0].blend_enable
= 0;
311 blend
.rt
[0].rgb_func
= PIPE_BLEND_ADD
;
312 blend
.rt
[0].rgb_src_factor
= PIPE_BLENDFACTOR_ONE
;
313 blend
.rt
[0].rgb_dst_factor
= PIPE_BLENDFACTOR_ONE
;
314 blend
.rt
[0].alpha_func
= PIPE_BLEND_ADD
;
315 blend
.rt
[0].alpha_src_factor
= PIPE_BLENDFACTOR_ONE
;
316 blend
.rt
[0].alpha_dst_factor
= PIPE_BLENDFACTOR_ONE
;
317 blend
.logicop_enable
= 0;
318 blend
.logicop_func
= PIPE_LOGICOP_CLEAR
;
319 /* Needed to allow color writes to FB, even if blending disabled */
320 blend
.rt
[0].colormask
= PIPE_MASK_RGBA
;
322 zscan
->blend
= zscan
->pipe
->create_blend_state(zscan
->pipe
, &blend
);
326 for (i
= 0; i
< 3; ++i
) {
327 memset(&sampler
, 0, sizeof(sampler
));
328 sampler
.wrap_s
= PIPE_TEX_WRAP_REPEAT
;
329 sampler
.wrap_t
= PIPE_TEX_WRAP_REPEAT
;
330 sampler
.wrap_r
= PIPE_TEX_WRAP_CLAMP_TO_EDGE
;
331 sampler
.min_img_filter
= PIPE_TEX_FILTER_NEAREST
;
332 sampler
.min_mip_filter
= PIPE_TEX_MIPFILTER_NONE
;
333 sampler
.mag_img_filter
= PIPE_TEX_FILTER_NEAREST
;
334 sampler
.compare_mode
= PIPE_TEX_COMPARE_NONE
;
335 sampler
.compare_func
= PIPE_FUNC_ALWAYS
;
336 sampler
.normalized_coords
= 1;
337 zscan
->samplers
[i
] = zscan
->pipe
->create_sampler_state(zscan
->pipe
, &sampler
);
338 if (!zscan
->samplers
[i
])
345 for (i
= 0; i
< 2; ++i
)
346 if (zscan
->samplers
[i
])
347 zscan
->pipe
->delete_sampler_state(zscan
->pipe
, zscan
->samplers
[i
]);
349 zscan
->pipe
->delete_rasterizer_state(zscan
->pipe
, zscan
->rs_state
);
352 zscan
->pipe
->delete_blend_state(zscan
->pipe
, zscan
->blend
);
359 cleanup_state(struct vl_zscan
*zscan
)
365 for (i
= 0; i
< 3; ++i
)
366 zscan
->pipe
->delete_sampler_state(zscan
->pipe
, zscan
->samplers
[i
]);
368 zscan
->pipe
->delete_rasterizer_state(zscan
->pipe
, zscan
->rs_state
);
369 zscan
->pipe
->delete_blend_state(zscan
->pipe
, zscan
->blend
);
372 struct pipe_sampler_view
*
373 vl_zscan_layout(struct pipe_context
*pipe
, const int layout
[64], unsigned blocks_per_line
)
375 const unsigned total_size
= blocks_per_line
* VL_BLOCK_WIDTH
* VL_BLOCK_HEIGHT
;
377 int patched_layout
[64];
379 struct pipe_resource res_tmpl
, *res
;
380 struct pipe_sampler_view sv_tmpl
, *sv
;
381 struct pipe_transfer
*buf_transfer
;
382 unsigned x
, y
, i
, pitch
;
385 struct pipe_box rect
=
388 VL_BLOCK_WIDTH
* blocks_per_line
,
393 assert(pipe
&& layout
&& blocks_per_line
);
395 for (i
= 0; i
< 64; ++i
)
396 patched_layout
[layout
[i
]] = i
;
398 memset(&res_tmpl
, 0, sizeof(res_tmpl
));
399 res_tmpl
.target
= PIPE_TEXTURE_2D
;
400 res_tmpl
.format
= PIPE_FORMAT_R32_FLOAT
;
401 res_tmpl
.width0
= VL_BLOCK_WIDTH
* blocks_per_line
;
402 res_tmpl
.height0
= VL_BLOCK_HEIGHT
;
404 res_tmpl
.array_size
= 1;
405 res_tmpl
.usage
= PIPE_USAGE_IMMUTABLE
;
406 res_tmpl
.bind
= PIPE_BIND_SAMPLER_VIEW
;
408 res
= pipe
->screen
->resource_create(pipe
->screen
, &res_tmpl
);
412 f
= pipe
->transfer_map(pipe
, res
,
413 0, PIPE_TRANSFER_WRITE
| PIPE_TRANSFER_DISCARD_RANGE
,
414 &rect
, &buf_transfer
);
418 pitch
= buf_transfer
->stride
/ sizeof(float);
420 for (i
= 0; i
< blocks_per_line
; ++i
)
421 for (y
= 0; y
< VL_BLOCK_HEIGHT
; ++y
)
422 for (x
= 0; x
< VL_BLOCK_WIDTH
; ++x
) {
423 float addr
= patched_layout
[x
+ y
* VL_BLOCK_WIDTH
] +
424 i
* VL_BLOCK_WIDTH
* VL_BLOCK_HEIGHT
;
428 f
[i
* VL_BLOCK_WIDTH
+ y
* pitch
+ x
] = addr
;
431 pipe
->transfer_unmap(pipe
, buf_transfer
);
433 memset(&sv_tmpl
, 0, sizeof(sv_tmpl
));
434 u_sampler_view_default_template(&sv_tmpl
, res
, res
->format
);
435 sv
= pipe
->create_sampler_view(pipe
, res
, &sv_tmpl
);
436 pipe_resource_reference(&res
, NULL
);
443 pipe_resource_reference(&res
, NULL
);
450 vl_zscan_init(struct vl_zscan
*zscan
, struct pipe_context
*pipe
,
451 unsigned buffer_width
, unsigned buffer_height
,
452 unsigned blocks_per_line
, unsigned blocks_total
,
453 unsigned num_channels
)
455 assert(zscan
&& pipe
);
458 zscan
->buffer_width
= buffer_width
;
459 zscan
->buffer_height
= buffer_height
;
460 zscan
->num_channels
= num_channels
;
461 zscan
->blocks_per_line
= blocks_per_line
;
462 zscan
->blocks_total
= blocks_total
;
464 if(!init_shaders(zscan
))
467 if(!init_state(zscan
)) {
468 cleanup_shaders(zscan
);
476 vl_zscan_cleanup(struct vl_zscan
*zscan
)
480 cleanup_shaders(zscan
);
481 cleanup_state(zscan
);
485 vl_zscan_init_buffer(struct vl_zscan
*zscan
, struct vl_zscan_buffer
*buffer
,
486 struct pipe_sampler_view
*src
, struct pipe_surface
*dst
)
488 struct pipe_resource res_tmpl
, *res
;
489 struct pipe_sampler_view sv_tmpl
;
491 assert(zscan
&& buffer
);
493 memset(buffer
, 0, sizeof(struct vl_zscan_buffer
));
495 pipe_sampler_view_reference(&buffer
->src
, src
);
497 buffer
->viewport
.scale
[0] = dst
->width
;
498 buffer
->viewport
.scale
[1] = dst
->height
;
499 buffer
->viewport
.scale
[2] = 1;
500 buffer
->viewport
.translate
[0] = 0;
501 buffer
->viewport
.translate
[1] = 0;
502 buffer
->viewport
.translate
[2] = 0;
504 buffer
->fb_state
.width
= dst
->width
;
505 buffer
->fb_state
.height
= dst
->height
;
506 buffer
->fb_state
.nr_cbufs
= 1;
507 pipe_surface_reference(&buffer
->fb_state
.cbufs
[0], dst
);
509 memset(&res_tmpl
, 0, sizeof(res_tmpl
));
510 res_tmpl
.target
= PIPE_TEXTURE_3D
;
511 res_tmpl
.format
= PIPE_FORMAT_R8_UNORM
;
512 res_tmpl
.width0
= VL_BLOCK_WIDTH
* zscan
->blocks_per_line
;
513 res_tmpl
.height0
= VL_BLOCK_HEIGHT
;
515 res_tmpl
.array_size
= 1;
516 res_tmpl
.usage
= PIPE_USAGE_IMMUTABLE
;
517 res_tmpl
.bind
= PIPE_BIND_SAMPLER_VIEW
;
519 res
= zscan
->pipe
->screen
->resource_create(zscan
->pipe
->screen
, &res_tmpl
);
523 memset(&sv_tmpl
, 0, sizeof(sv_tmpl
));
524 u_sampler_view_default_template(&sv_tmpl
, res
, res
->format
);
525 sv_tmpl
.swizzle_r
= sv_tmpl
.swizzle_g
= sv_tmpl
.swizzle_b
= sv_tmpl
.swizzle_a
= TGSI_SWIZZLE_X
;
526 buffer
->quant
= zscan
->pipe
->create_sampler_view(zscan
->pipe
, res
, &sv_tmpl
);
527 pipe_resource_reference(&res
, NULL
);
535 vl_zscan_cleanup_buffer(struct vl_zscan_buffer
*buffer
)
539 pipe_sampler_view_reference(&buffer
->src
, NULL
);
540 pipe_sampler_view_reference(&buffer
->layout
, NULL
);
541 pipe_sampler_view_reference(&buffer
->quant
, NULL
);
542 pipe_surface_reference(&buffer
->fb_state
.cbufs
[0], NULL
);
546 vl_zscan_set_layout(struct vl_zscan_buffer
*buffer
, struct pipe_sampler_view
*layout
)
551 pipe_sampler_view_reference(&buffer
->layout
, layout
);
555 vl_zscan_upload_quant(struct vl_zscan
*zscan
, struct vl_zscan_buffer
*buffer
,
556 const uint8_t matrix
[64], bool intra
)
558 struct pipe_context
*pipe
;
559 struct pipe_transfer
*buf_transfer
;
560 unsigned x
, y
, i
, pitch
;
563 struct pipe_box rect
=
576 rect
.width
*= zscan
->blocks_per_line
;
578 data
= pipe
->transfer_map(pipe
, buffer
->quant
->texture
,
579 0, PIPE_TRANSFER_WRITE
|
580 PIPE_TRANSFER_DISCARD_RANGE
,
581 &rect
, &buf_transfer
);
585 pitch
= buf_transfer
->stride
;
587 for (i
= 0; i
< zscan
->blocks_per_line
; ++i
)
588 for (y
= 0; y
< VL_BLOCK_HEIGHT
; ++y
)
589 for (x
= 0; x
< VL_BLOCK_WIDTH
; ++x
)
590 data
[i
* VL_BLOCK_WIDTH
+ y
* pitch
+ x
] = matrix
[x
+ y
* VL_BLOCK_WIDTH
];
592 pipe
->transfer_unmap(pipe
, buf_transfer
);
596 vl_zscan_render(struct vl_zscan
*zscan
, struct vl_zscan_buffer
*buffer
, unsigned num_instances
)
600 zscan
->pipe
->bind_rasterizer_state(zscan
->pipe
, zscan
->rs_state
);
601 zscan
->pipe
->bind_blend_state(zscan
->pipe
, zscan
->blend
);
602 zscan
->pipe
->bind_sampler_states(zscan
->pipe
, PIPE_SHADER_FRAGMENT
,
603 0, 3, zscan
->samplers
);
604 zscan
->pipe
->set_framebuffer_state(zscan
->pipe
, &buffer
->fb_state
);
605 zscan
->pipe
->set_viewport_states(zscan
->pipe
, 0, 1, &buffer
->viewport
);
606 zscan
->pipe
->set_sampler_views(zscan
->pipe
, PIPE_SHADER_FRAGMENT
,
608 zscan
->pipe
->bind_vs_state(zscan
->pipe
, zscan
->vs
);
609 zscan
->pipe
->bind_fs_state(zscan
->pipe
, zscan
->fs
);
610 util_draw_arrays_instanced(zscan
->pipe
, PIPE_PRIM_QUADS
, 0, 4, 0, num_instances
);