1 /**************************************************************************
3 * Copyright 2011 Christian König
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 #include "pipe/p_screen.h"
31 #include "pipe/p_context.h"
33 #include "util/u_draw.h"
34 #include "util/u_sampler.h"
35 #include "util/u_inlines.h"
36 #include "util/u_memory.h"
38 #include "tgsi/tgsi_ureg.h"
40 #include "vl_defines.h"
44 #include "vl_vertex_buffers.h"
52 const int vl_zscan_normal_16
[] =
54 /* Zig-Zag scan pattern */
55 0, 1, 4, 8, 5, 2, 3, 6,
56 9,12,13,10, 7,11,14,15
59 const int vl_zscan_linear
[] =
61 /* Linear scan pattern */
62 0, 1, 2, 3, 4, 5, 6, 7,
63 8, 9,10,11,12,13,14,15,
64 16,17,18,19,20,21,22,23,
65 24,25,26,27,28,29,30,31,
66 32,33,34,35,36,37,38,39,
67 40,41,42,43,44,45,46,47,
68 48,49,50,51,52,53,54,55,
69 56,57,58,59,60,61,62,63
72 const int vl_zscan_normal
[] =
74 /* Zig-Zag scan pattern */
75 0, 1, 8,16, 9, 2, 3,10,
76 17,24,32,25,18,11, 4, 5,
77 12,19,26,33,40,48,41,34,
78 27,20,13, 6, 7,14,21,28,
79 35,42,49,56,57,50,43,36,
80 29,22,15,23,30,37,44,51,
81 58,59,52,45,38,31,39,46,
82 53,60,61,54,47,55,62,63
85 const int vl_zscan_alternate
[] =
87 /* Alternate scan pattern */
88 0, 8,16,24, 1, 9, 2,10,
89 17,25,32,40,48,56,57,49,
90 41,33,26,18, 3,11, 4,12,
91 19,27,34,42,50,58,35,43,
92 51,59,20,28, 5,13, 6,14,
93 21,29,36,44,52,60,37,45,
94 53,61,22,30, 7,15,23,31,
95 38,46,54,62,39,47,55,63
98 const int vl_zscan_h265_up_right_diagonal_16
[] =
100 /* Up-right diagonal scan order for 4x4 blocks - see H.265 section 6.5.3. */
101 0, 4, 1, 8, 5, 2, 12, 9,
102 6, 3, 13, 10, 7, 14, 11, 15,
105 const int vl_zscan_h265_up_right_diagonal
[] =
107 /* Up-right diagonal scan order for 8x8 blocks - see H.265 section 6.5.3. */
108 0, 8, 1, 16, 9, 2, 24, 17,
109 10, 3, 32, 25, 18, 11, 4, 40,
110 33, 26, 19, 12, 5, 48, 41, 34,
111 27, 20, 13, 6, 56, 49, 42, 35,
112 28, 21, 14, 7, 57, 50, 43, 36,
113 29, 22, 15, 58, 51, 44, 37, 30,
114 23, 59, 52, 45, 38, 31, 60, 53,
115 46, 39, 61, 54, 47, 62, 55, 63,
120 create_vert_shader(struct vl_zscan
*zscan
)
122 struct ureg_program
*shader
;
123 struct ureg_src scale
;
124 struct ureg_src vrect
, vpos
, block_num
;
126 struct ureg_dst o_vpos
;
127 struct ureg_dst
*o_vtex
;
130 shader
= ureg_create(PIPE_SHADER_VERTEX
);
134 o_vtex
= MALLOC(zscan
->num_channels
* sizeof(struct ureg_dst
));
136 scale
= ureg_imm2f(shader
,
137 (float)VL_BLOCK_WIDTH
/ zscan
->buffer_width
,
138 (float)VL_BLOCK_HEIGHT
/ zscan
->buffer_height
);
140 vrect
= ureg_DECL_vs_input(shader
, VS_I_RECT
);
141 vpos
= ureg_DECL_vs_input(shader
, VS_I_VPOS
);
142 block_num
= ureg_DECL_vs_input(shader
, VS_I_BLOCK_NUM
);
144 tmp
= ureg_DECL_temporary(shader
);
146 o_vpos
= ureg_DECL_output(shader
, TGSI_SEMANTIC_POSITION
, VS_O_VPOS
);
148 for (i
= 0; i
< zscan
->num_channels
; ++i
)
149 o_vtex
[i
] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_VTEX
+ i
);
152 * o_vpos.xy = (vpos + vrect) * scale
155 * tmp.xy = InstanceID / blocks_per_line
156 * tmp.x = frac(tmp.x)
157 * tmp.y = floor(tmp.y)
159 * o_vtex.x = vrect.x / blocks_per_line + tmp.x
161 * o_vtex.z = tmp.z * blocks_per_line / blocks_total
163 ureg_ADD(shader
, ureg_writemask(tmp
, TGSI_WRITEMASK_XY
), vpos
, vrect
);
164 ureg_MUL(shader
, ureg_writemask(o_vpos
, TGSI_WRITEMASK_XY
), ureg_src(tmp
), scale
);
165 ureg_MOV(shader
, ureg_writemask(o_vpos
, TGSI_WRITEMASK_ZW
), ureg_imm1f(shader
, 1.0f
));
167 ureg_MUL(shader
, ureg_writemask(tmp
, TGSI_WRITEMASK_XW
), ureg_scalar(block_num
, TGSI_SWIZZLE_X
),
168 ureg_imm1f(shader
, 1.0f
/ zscan
->blocks_per_line
));
170 ureg_FRC(shader
, ureg_writemask(tmp
, TGSI_WRITEMASK_Y
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
));
171 ureg_FLR(shader
, ureg_writemask(tmp
, TGSI_WRITEMASK_W
), ureg_src(tmp
));
173 for (i
= 0; i
< zscan
->num_channels
; ++i
) {
174 ureg_ADD(shader
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_Y
),
175 ureg_imm1f(shader
, 1.0f
/ (zscan
->blocks_per_line
* VL_BLOCK_WIDTH
)
176 * ((signed)i
- (signed)zscan
->num_channels
/ 2)));
178 ureg_MAD(shader
, ureg_writemask(o_vtex
[i
], TGSI_WRITEMASK_X
), vrect
,
179 ureg_imm1f(shader
, 1.0f
/ zscan
->blocks_per_line
), ureg_src(tmp
));
180 ureg_MOV(shader
, ureg_writemask(o_vtex
[i
], TGSI_WRITEMASK_Y
), vrect
);
181 ureg_MOV(shader
, ureg_writemask(o_vtex
[i
], TGSI_WRITEMASK_Z
), vpos
);
182 ureg_MUL(shader
, ureg_writemask(o_vtex
[i
], TGSI_WRITEMASK_W
), ureg_src(tmp
),
183 ureg_imm1f(shader
, (float)zscan
->blocks_per_line
/ zscan
->blocks_total
));
186 ureg_release_temporary(shader
, tmp
);
191 return ureg_create_shader_and_destroy(shader
, zscan
->pipe
);
195 create_frag_shader(struct vl_zscan
*zscan
)
197 struct ureg_program
*shader
;
198 struct ureg_src
*vtex
;
200 struct ureg_src samp_src
, samp_scan
, samp_quant
;
202 struct ureg_dst
*tmp
;
203 struct ureg_dst quant
, fragment
;
207 shader
= ureg_create(PIPE_SHADER_FRAGMENT
);
211 vtex
= MALLOC(zscan
->num_channels
* sizeof(struct ureg_src
));
212 tmp
= MALLOC(zscan
->num_channels
* sizeof(struct ureg_dst
));
214 for (i
= 0; i
< zscan
->num_channels
; ++i
)
215 vtex
[i
] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_VTEX
+ i
, TGSI_INTERPOLATE_LINEAR
);
217 samp_src
= ureg_DECL_sampler(shader
, 0);
218 samp_scan
= ureg_DECL_sampler(shader
, 1);
219 samp_quant
= ureg_DECL_sampler(shader
, 2);
221 for (i
= 0; i
< zscan
->num_channels
; ++i
)
222 tmp
[i
] = ureg_DECL_temporary(shader
);
223 quant
= ureg_DECL_temporary(shader
);
225 fragment
= ureg_DECL_output(shader
, TGSI_SEMANTIC_COLOR
, 0);
228 * tmp.x = tex(vtex, 1)
230 * fragment = tex(tmp, 0) * quant
232 for (i
= 0; i
< zscan
->num_channels
; ++i
)
233 ureg_TEX(shader
, ureg_writemask(tmp
[i
], TGSI_WRITEMASK_X
), TGSI_TEXTURE_2D
, vtex
[i
], samp_scan
);
235 for (i
= 0; i
< zscan
->num_channels
; ++i
)
236 ureg_MOV(shader
, ureg_writemask(tmp
[i
], TGSI_WRITEMASK_Y
), ureg_scalar(vtex
[i
], TGSI_SWIZZLE_W
));
238 for (i
= 0; i
< zscan
->num_channels
; ++i
) {
239 ureg_TEX(shader
, ureg_writemask(tmp
[0], TGSI_WRITEMASK_X
<< i
), TGSI_TEXTURE_2D
, ureg_src(tmp
[i
]), samp_src
);
240 ureg_TEX(shader
, ureg_writemask(quant
, TGSI_WRITEMASK_X
<< i
), TGSI_TEXTURE_3D
, vtex
[i
], samp_quant
);
243 ureg_MUL(shader
, quant
, ureg_src(quant
), ureg_imm1f(shader
, 16.0f
));
244 ureg_MUL(shader
, fragment
, ureg_src(tmp
[0]), ureg_src(quant
));
246 for (i
= 0; i
< zscan
->num_channels
; ++i
)
247 ureg_release_temporary(shader
, tmp
[i
]);
253 return ureg_create_shader_and_destroy(shader
, zscan
->pipe
);
257 init_shaders(struct vl_zscan
*zscan
)
261 zscan
->vs
= create_vert_shader(zscan
);
265 zscan
->fs
= create_frag_shader(zscan
);
272 zscan
->pipe
->delete_vs_state(zscan
->pipe
, zscan
->vs
);
279 cleanup_shaders(struct vl_zscan
*zscan
)
283 zscan
->pipe
->delete_vs_state(zscan
->pipe
, zscan
->vs
);
284 zscan
->pipe
->delete_fs_state(zscan
->pipe
, zscan
->fs
);
288 init_state(struct vl_zscan
*zscan
)
290 struct pipe_blend_state blend
;
291 struct pipe_rasterizer_state rs_state
;
292 struct pipe_sampler_state sampler
;
297 memset(&rs_state
, 0, sizeof(rs_state
));
298 rs_state
.half_pixel_center
= true;
299 rs_state
.bottom_edge_rule
= true;
300 rs_state
.depth_clip
= 1;
301 zscan
->rs_state
= zscan
->pipe
->create_rasterizer_state(zscan
->pipe
, &rs_state
);
302 if (!zscan
->rs_state
)
305 memset(&blend
, 0, sizeof blend
);
307 blend
.independent_blend_enable
= 0;
308 blend
.rt
[0].blend_enable
= 0;
309 blend
.rt
[0].rgb_func
= PIPE_BLEND_ADD
;
310 blend
.rt
[0].rgb_src_factor
= PIPE_BLENDFACTOR_ONE
;
311 blend
.rt
[0].rgb_dst_factor
= PIPE_BLENDFACTOR_ONE
;
312 blend
.rt
[0].alpha_func
= PIPE_BLEND_ADD
;
313 blend
.rt
[0].alpha_src_factor
= PIPE_BLENDFACTOR_ONE
;
314 blend
.rt
[0].alpha_dst_factor
= PIPE_BLENDFACTOR_ONE
;
315 blend
.logicop_enable
= 0;
316 blend
.logicop_func
= PIPE_LOGICOP_CLEAR
;
317 /* Needed to allow color writes to FB, even if blending disabled */
318 blend
.rt
[0].colormask
= PIPE_MASK_RGBA
;
320 zscan
->blend
= zscan
->pipe
->create_blend_state(zscan
->pipe
, &blend
);
324 for (i
= 0; i
< 3; ++i
) {
325 memset(&sampler
, 0, sizeof(sampler
));
326 sampler
.wrap_s
= PIPE_TEX_WRAP_REPEAT
;
327 sampler
.wrap_t
= PIPE_TEX_WRAP_REPEAT
;
328 sampler
.wrap_r
= PIPE_TEX_WRAP_CLAMP_TO_EDGE
;
329 sampler
.min_img_filter
= PIPE_TEX_FILTER_NEAREST
;
330 sampler
.min_mip_filter
= PIPE_TEX_MIPFILTER_NONE
;
331 sampler
.mag_img_filter
= PIPE_TEX_FILTER_NEAREST
;
332 sampler
.compare_mode
= PIPE_TEX_COMPARE_NONE
;
333 sampler
.compare_func
= PIPE_FUNC_ALWAYS
;
334 sampler
.normalized_coords
= 1;
335 zscan
->samplers
[i
] = zscan
->pipe
->create_sampler_state(zscan
->pipe
, &sampler
);
336 if (!zscan
->samplers
[i
])
343 for (i
= 0; i
< 2; ++i
)
344 if (zscan
->samplers
[i
])
345 zscan
->pipe
->delete_sampler_state(zscan
->pipe
, zscan
->samplers
[i
]);
347 zscan
->pipe
->delete_rasterizer_state(zscan
->pipe
, zscan
->rs_state
);
350 zscan
->pipe
->delete_blend_state(zscan
->pipe
, zscan
->blend
);
357 cleanup_state(struct vl_zscan
*zscan
)
363 for (i
= 0; i
< 3; ++i
)
364 zscan
->pipe
->delete_sampler_state(zscan
->pipe
, zscan
->samplers
[i
]);
366 zscan
->pipe
->delete_rasterizer_state(zscan
->pipe
, zscan
->rs_state
);
367 zscan
->pipe
->delete_blend_state(zscan
->pipe
, zscan
->blend
);
370 struct pipe_sampler_view
*
371 vl_zscan_layout(struct pipe_context
*pipe
, const int layout
[64], unsigned blocks_per_line
)
373 const unsigned total_size
= blocks_per_line
* VL_BLOCK_WIDTH
* VL_BLOCK_HEIGHT
;
375 int patched_layout
[64];
377 struct pipe_resource res_tmpl
, *res
;
378 struct pipe_sampler_view sv_tmpl
, *sv
;
379 struct pipe_transfer
*buf_transfer
;
380 unsigned x
, y
, i
, pitch
;
383 struct pipe_box rect
=
386 VL_BLOCK_WIDTH
* blocks_per_line
,
391 assert(pipe
&& layout
&& blocks_per_line
);
393 for (i
= 0; i
< 64; ++i
)
394 patched_layout
[layout
[i
]] = i
;
396 memset(&res_tmpl
, 0, sizeof(res_tmpl
));
397 res_tmpl
.target
= PIPE_TEXTURE_2D
;
398 res_tmpl
.format
= PIPE_FORMAT_R32_FLOAT
;
399 res_tmpl
.width0
= VL_BLOCK_WIDTH
* blocks_per_line
;
400 res_tmpl
.height0
= VL_BLOCK_HEIGHT
;
402 res_tmpl
.array_size
= 1;
403 res_tmpl
.usage
= PIPE_USAGE_IMMUTABLE
;
404 res_tmpl
.bind
= PIPE_BIND_SAMPLER_VIEW
;
406 res
= pipe
->screen
->resource_create(pipe
->screen
, &res_tmpl
);
410 f
= pipe
->transfer_map(pipe
, res
,
411 0, PIPE_TRANSFER_WRITE
| PIPE_TRANSFER_DISCARD_RANGE
,
412 &rect
, &buf_transfer
);
416 pitch
= buf_transfer
->stride
/ sizeof(float);
418 for (i
= 0; i
< blocks_per_line
; ++i
)
419 for (y
= 0; y
< VL_BLOCK_HEIGHT
; ++y
)
420 for (x
= 0; x
< VL_BLOCK_WIDTH
; ++x
) {
421 float addr
= patched_layout
[x
+ y
* VL_BLOCK_WIDTH
] +
422 i
* VL_BLOCK_WIDTH
* VL_BLOCK_HEIGHT
;
426 f
[i
* VL_BLOCK_WIDTH
+ y
* pitch
+ x
] = addr
;
429 pipe
->transfer_unmap(pipe
, buf_transfer
);
431 memset(&sv_tmpl
, 0, sizeof(sv_tmpl
));
432 u_sampler_view_default_template(&sv_tmpl
, res
, res
->format
);
433 sv
= pipe
->create_sampler_view(pipe
, res
, &sv_tmpl
);
434 pipe_resource_reference(&res
, NULL
);
441 pipe_resource_reference(&res
, NULL
);
448 vl_zscan_init(struct vl_zscan
*zscan
, struct pipe_context
*pipe
,
449 unsigned buffer_width
, unsigned buffer_height
,
450 unsigned blocks_per_line
, unsigned blocks_total
,
451 unsigned num_channels
)
453 assert(zscan
&& pipe
);
456 zscan
->buffer_width
= buffer_width
;
457 zscan
->buffer_height
= buffer_height
;
458 zscan
->num_channels
= num_channels
;
459 zscan
->blocks_per_line
= blocks_per_line
;
460 zscan
->blocks_total
= blocks_total
;
462 if(!init_shaders(zscan
))
465 if(!init_state(zscan
)) {
466 cleanup_shaders(zscan
);
474 vl_zscan_cleanup(struct vl_zscan
*zscan
)
478 cleanup_shaders(zscan
);
479 cleanup_state(zscan
);
483 vl_zscan_init_buffer(struct vl_zscan
*zscan
, struct vl_zscan_buffer
*buffer
,
484 struct pipe_sampler_view
*src
, struct pipe_surface
*dst
)
486 struct pipe_resource res_tmpl
, *res
;
487 struct pipe_sampler_view sv_tmpl
;
489 assert(zscan
&& buffer
);
491 memset(buffer
, 0, sizeof(struct vl_zscan_buffer
));
493 pipe_sampler_view_reference(&buffer
->src
, src
);
495 buffer
->viewport
.scale
[0] = dst
->width
;
496 buffer
->viewport
.scale
[1] = dst
->height
;
497 buffer
->viewport
.scale
[2] = 1;
498 buffer
->viewport
.translate
[0] = 0;
499 buffer
->viewport
.translate
[1] = 0;
500 buffer
->viewport
.translate
[2] = 0;
502 buffer
->fb_state
.width
= dst
->width
;
503 buffer
->fb_state
.height
= dst
->height
;
504 buffer
->fb_state
.nr_cbufs
= 1;
505 pipe_surface_reference(&buffer
->fb_state
.cbufs
[0], dst
);
507 memset(&res_tmpl
, 0, sizeof(res_tmpl
));
508 res_tmpl
.target
= PIPE_TEXTURE_3D
;
509 res_tmpl
.format
= PIPE_FORMAT_R8_UNORM
;
510 res_tmpl
.width0
= VL_BLOCK_WIDTH
* zscan
->blocks_per_line
;
511 res_tmpl
.height0
= VL_BLOCK_HEIGHT
;
513 res_tmpl
.array_size
= 1;
514 res_tmpl
.usage
= PIPE_USAGE_IMMUTABLE
;
515 res_tmpl
.bind
= PIPE_BIND_SAMPLER_VIEW
;
517 res
= zscan
->pipe
->screen
->resource_create(zscan
->pipe
->screen
, &res_tmpl
);
521 memset(&sv_tmpl
, 0, sizeof(sv_tmpl
));
522 u_sampler_view_default_template(&sv_tmpl
, res
, res
->format
);
523 sv_tmpl
.swizzle_r
= sv_tmpl
.swizzle_g
= sv_tmpl
.swizzle_b
= sv_tmpl
.swizzle_a
= TGSI_SWIZZLE_X
;
524 buffer
->quant
= zscan
->pipe
->create_sampler_view(zscan
->pipe
, res
, &sv_tmpl
);
525 pipe_resource_reference(&res
, NULL
);
533 vl_zscan_cleanup_buffer(struct vl_zscan_buffer
*buffer
)
537 pipe_sampler_view_reference(&buffer
->src
, NULL
);
538 pipe_sampler_view_reference(&buffer
->layout
, NULL
);
539 pipe_sampler_view_reference(&buffer
->quant
, NULL
);
540 pipe_surface_reference(&buffer
->fb_state
.cbufs
[0], NULL
);
544 vl_zscan_set_layout(struct vl_zscan_buffer
*buffer
, struct pipe_sampler_view
*layout
)
549 pipe_sampler_view_reference(&buffer
->layout
, layout
);
553 vl_zscan_upload_quant(struct vl_zscan
*zscan
, struct vl_zscan_buffer
*buffer
,
554 const uint8_t matrix
[64], bool intra
)
556 struct pipe_context
*pipe
;
557 struct pipe_transfer
*buf_transfer
;
558 unsigned x
, y
, i
, pitch
;
561 struct pipe_box rect
=
574 rect
.width
*= zscan
->blocks_per_line
;
576 data
= pipe
->transfer_map(pipe
, buffer
->quant
->texture
,
577 0, PIPE_TRANSFER_WRITE
|
578 PIPE_TRANSFER_DISCARD_RANGE
,
579 &rect
, &buf_transfer
);
583 pitch
= buf_transfer
->stride
;
585 for (i
= 0; i
< zscan
->blocks_per_line
; ++i
)
586 for (y
= 0; y
< VL_BLOCK_HEIGHT
; ++y
)
587 for (x
= 0; x
< VL_BLOCK_WIDTH
; ++x
)
588 data
[i
* VL_BLOCK_WIDTH
+ y
* pitch
+ x
] = matrix
[x
+ y
* VL_BLOCK_WIDTH
];
590 pipe
->transfer_unmap(pipe
, buf_transfer
);
594 vl_zscan_render(struct vl_zscan
*zscan
, struct vl_zscan_buffer
*buffer
, unsigned num_instances
)
598 zscan
->pipe
->bind_rasterizer_state(zscan
->pipe
, zscan
->rs_state
);
599 zscan
->pipe
->bind_blend_state(zscan
->pipe
, zscan
->blend
);
600 zscan
->pipe
->bind_sampler_states(zscan
->pipe
, PIPE_SHADER_FRAGMENT
,
601 0, 3, zscan
->samplers
);
602 zscan
->pipe
->set_framebuffer_state(zscan
->pipe
, &buffer
->fb_state
);
603 zscan
->pipe
->set_viewport_states(zscan
->pipe
, 0, 1, &buffer
->viewport
);
604 zscan
->pipe
->set_sampler_views(zscan
->pipe
, PIPE_SHADER_FRAGMENT
,
606 zscan
->pipe
->bind_vs_state(zscan
->pipe
, zscan
->vs
);
607 zscan
->pipe
->bind_fs_state(zscan
->pipe
, zscan
->fs
);
608 util_draw_arrays_instanced(zscan
->pipe
, PIPE_PRIM_QUADS
, 0, 4, 0, num_instances
);