1 /**************************************************************************
3 * Copyright 2019 Advanced Micro Devices, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 * Authors: James Zhu <james.zhu<@amd.com>
28 **************************************************************************/
32 #include "tgsi/tgsi_text.h"
33 #include "vl_compositor_cs.h"
45 const char *compute_shader_video_buffer
=
47 "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"
48 "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"
49 "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
51 "DCL SV[0], THREAD_ID\n"
52 "DCL SV[1], BLOCK_ID\n"
55 "DCL SVIEW[0..2], RECT, FLOAT\n"
58 "DCL IMAGE[0], 2D, WR\n"
61 "IMM[0] UINT32 { 8, 8, 1, 0}\n"
62 "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"
64 "UMAD TEMP[0], SV[1], IMM[0], SV[0]\n"
66 /* Drawn area check */
67 "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
68 "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"
69 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n"
70 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"
71 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"
75 "UADD TEMP[2].xy, TEMP[0], -CONST[5].xyxy\n"
76 "U2F TEMP[2], TEMP[2]\n"
77 "DIV TEMP[3], TEMP[2], IMM[1].yyyy\n"
80 "DIV TEMP[2], TEMP[2], CONST[3].zwzw\n"
81 "DIV TEMP[3], TEMP[3], CONST[3].zwzw\n"
84 "TEX_LZ TEMP[4].x, TEMP[2], SAMP[0], RECT\n"
85 "TEX_LZ TEMP[4].y, TEMP[3], SAMP[1], RECT\n"
86 "TEX_LZ TEMP[4].z, TEMP[3], SAMP[2], RECT\n"
88 "MOV TEMP[4].w, IMM[1].xxxx\n"
90 /* Color Space Conversion */
91 "DP4 TEMP[7].x, CONST[0], TEMP[4]\n"
92 "DP4 TEMP[7].y, CONST[1], TEMP[4]\n"
93 "DP4 TEMP[7].z, CONST[2], TEMP[4]\n"
95 "MOV TEMP[5].w, TEMP[4].zzzz\n"
96 "SLE TEMP[6].w, TEMP[5], CONST[3].xxxx\n"
97 "SGT TEMP[5].w, TEMP[5], CONST[3].yyyy\n"
99 "MAX TEMP[7].w, TEMP[5], TEMP[6]\n"
101 "STORE IMAGE[0], TEMP[0], TEMP[7], 2D\n"
106 const char *compute_shader_weave
=
108 "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"
109 "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"
110 "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
112 "DCL SV[0], THREAD_ID\n"
113 "DCL SV[1], BLOCK_ID\n"
116 "DCL SVIEW[0..2], 2D_ARRAY, FLOAT\n"
119 "DCL IMAGE[0], 2D, WR\n"
122 "IMM[0] UINT32 { 8, 8, 1, 0}\n"
123 "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"
124 "IMM[2] UINT32 { 1, 2, 4, 0}\n"
125 "IMM[3] FLT32 { 0.25, 0.5, 0.125, 0.125}\n"
127 "UMAD TEMP[0], SV[1], IMM[0], SV[0]\n"
129 /* Drawn area check */
130 "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
131 "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"
132 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n"
133 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"
134 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"
137 "MOV TEMP[2], TEMP[0]\n"
139 "UADD TEMP[2].xy, TEMP[2], -CONST[5].xyxy\n"
142 "U2F TEMP[2], TEMP[2]\n"
143 "DIV TEMP[2].y, TEMP[2].yyyy, IMM[1].yyyy\n"
145 "MOV TEMP[12], TEMP[2]\n"
148 "MOV TEMP[3], TEMP[2]\n"
149 "DIV TEMP[3].xy, TEMP[3], IMM[1].yyyy\n"
151 "MOV TEMP[13], TEMP[3]\n"
154 "ADD TEMP[2].x, TEMP[2].xxxx, IMM[3].yyyy\n"
155 "ADD TEMP[2].y, TEMP[2].yyyy, IMM[3].xxxx\n"
156 "ADD TEMP[12].x, TEMP[12].xxxx, IMM[3].yyyy\n"
157 "ADD TEMP[12].y, TEMP[12].yyyy, IMM[3].xxxx\n"
159 "ADD TEMP[3].x, TEMP[3].xxxx, IMM[3].xxxx\n"
160 "ADD TEMP[3].y, TEMP[3].yyyy, IMM[3].wwww\n"
161 "ADD TEMP[13].x, TEMP[13].xxxx, IMM[3].xxxx\n"
162 "ADD TEMP[13].y, TEMP[13].yyyy, IMM[3].wwww\n"
165 "DIV TEMP[2].xy, TEMP[2], CONST[3].zwzw\n"
166 "DIV TEMP[12].xy, TEMP[12], CONST[3].zwzw\n"
167 "DIV TEMP[3].xy, TEMP[3], CONST[3].zwzw\n"
168 "DIV TEMP[13].xy, TEMP[13], CONST[3].zwzw\n"
171 "ADD TEMP[2].y, TEMP[2].yyyy, IMM[3].xxxx\n"
172 "ADD TEMP[12].y, TEMP[12].yyyy, -IMM[3].xxxx\n"
173 "ADD TEMP[3].y, TEMP[3].yyyy, IMM[3].xxxx\n"
174 "ADD TEMP[13].y, TEMP[13].yyyy, -IMM[3].xxxx\n"
177 "MOV TEMP[14].x, TEMP[2].yyyy\n"
178 "MOV TEMP[14].yz, TEMP[3].yyyy\n"
179 "ROUND TEMP[15], TEMP[14]\n"
180 "ADD TEMP[14], TEMP[14], -TEMP[15]\n"
181 "MOV TEMP[14], |TEMP[14]|\n"
182 "MUL TEMP[14], TEMP[14], IMM[1].yyyy\n"
185 "DIV TEMP[2].xy, TEMP[2], CONST[5].zwzw\n"
186 "DIV TEMP[12].xy, TEMP[12], CONST[5].zwzw\n"
187 "DIV TEMP[15].xy, CONST[5].zwzw, IMM[1].yyyy\n"
188 "DIV TEMP[3].xy, TEMP[3], TEMP[15].xyxy\n"
189 "DIV TEMP[13].xy, TEMP[13], TEMP[15].xyxy\n"
192 "MOV TEMP[2].z, IMM[1].wwww\n"
193 "MOV TEMP[3].z, IMM[1].wwww\n"
194 "TEX_LZ TEMP[10].x, TEMP[2], SAMP[0], 2D_ARRAY\n"
195 "TEX_LZ TEMP[10].y, TEMP[3], SAMP[1], 2D_ARRAY\n"
196 "TEX_LZ TEMP[10].z, TEMP[3], SAMP[2], 2D_ARRAY\n"
198 "MOV TEMP[12].z, IMM[1].xxxx\n"
199 "MOV TEMP[13].z, IMM[1].xxxx\n"
200 "TEX_LZ TEMP[11].x, TEMP[12], SAMP[0], 2D_ARRAY\n"
201 "TEX_LZ TEMP[11].y, TEMP[13], SAMP[1], 2D_ARRAY\n"
202 "TEX_LZ TEMP[11].z, TEMP[13], SAMP[2], 2D_ARRAY\n"
204 "LRP TEMP[6], TEMP[14], TEMP[10], TEMP[11]\n"
205 "MOV TEMP[6].w, IMM[1].xxxx\n"
207 /* Color Space Conversion */
208 "DP4 TEMP[9].x, CONST[0], TEMP[6]\n"
209 "DP4 TEMP[9].y, CONST[1], TEMP[6]\n"
210 "DP4 TEMP[9].z, CONST[2], TEMP[6]\n"
212 "MOV TEMP[7].w, TEMP[6].zzzz\n"
213 "SLE TEMP[8].w, TEMP[7], CONST[3].xxxx\n"
214 "SGT TEMP[7].w, TEMP[7], CONST[3].yyyy\n"
216 "MAX TEMP[9].w, TEMP[7], TEMP[8]\n"
218 "STORE IMAGE[0], TEMP[0], TEMP[9], 2D\n"
223 const char *compute_shader_rgba
=
225 "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"
226 "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"
227 "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
229 "DCL SV[0], THREAD_ID\n"
230 "DCL SV[1], BLOCK_ID\n"
233 "DCL SVIEW[0], RECT, FLOAT\n"
236 "DCL IMAGE[0], 2D, WR\n"
239 "IMM[0] UINT32 { 8, 8, 1, 0}\n"
240 "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"
242 "UMAD TEMP[0], SV[1], IMM[0], SV[0]\n"
244 /* Drawn area check */
245 "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
246 "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"
247 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n"
248 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"
249 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"
253 "UADD TEMP[2].xy, TEMP[0], -CONST[5].xyxy\n"
254 "U2F TEMP[2], TEMP[2]\n"
257 "DIV TEMP[2], TEMP[2], CONST[3].zwzw\n"
260 "TEX_LZ TEMP[3], TEMP[2], SAMP[0], RECT\n"
262 "STORE IMAGE[0], TEMP[0], TEMP[3], 2D\n"
268 cs_launch(struct vl_compositor
*c
,
270 const struct u_rect
*draw_area
)
272 struct pipe_context
*ctx
= c
->pipe
;
275 struct pipe_image_view image
= {};
276 image
.resource
= c
->fb_state
.cbufs
[0]->texture
;
277 image
.shader_access
= image
.access
= PIPE_IMAGE_ACCESS_READ_WRITE
;
278 image
.format
= c
->fb_state
.cbufs
[0]->texture
->format
;
280 ctx
->set_shader_images(c
->pipe
, PIPE_SHADER_COMPUTE
, 0, 1, &image
);
282 /* Bind compute shader */
283 ctx
->bind_compute_state(ctx
, cs
);
285 /* Dispatch compute */
286 struct pipe_grid_info info
= {};
290 info
.grid
[0] = DIV_ROUND_UP(draw_area
->x1
, info
.block
[0]);
291 info
.grid
[1] = DIV_ROUND_UP(draw_area
->y1
, info
.block
[1]);
294 ctx
->launch_grid(ctx
, &info
);
297 static inline struct u_rect
298 calc_drawn_area(struct vl_compositor_state
*s
,
299 struct vl_compositor_layer
*layer
)
301 struct vertex2f tl
, br
;
302 struct u_rect result
;
310 result
.x0
= tl
.x
* layer
->viewport
.scale
[0] + layer
->viewport
.translate
[0];
311 result
.y0
= tl
.y
* layer
->viewport
.scale
[1] + layer
->viewport
.translate
[1];
312 result
.x1
= br
.x
* layer
->viewport
.scale
[0] + layer
->viewport
.translate
[0];
313 result
.y1
= br
.y
* layer
->viewport
.scale
[1] + layer
->viewport
.translate
[1];
316 result
.x0
= MAX2(result
.x0
, s
->scissor
.minx
);
317 result
.y0
= MAX2(result
.y0
, s
->scissor
.miny
);
318 result
.x1
= MIN2(result
.x1
, s
->scissor
.maxx
);
319 result
.y1
= MIN2(result
.y1
, s
->scissor
.maxy
);
324 set_viewport(struct vl_compositor_state
*s
,
325 struct cs_viewport
*drawn
)
327 struct pipe_transfer
*buf_transfer
;
331 void *ptr
= pipe_buffer_map(s
->pipe
, s
->shader_params
,
332 PIPE_TRANSFER_READ
| PIPE_TRANSFER_WRITE
,
338 float *ptr_float
= (float *)ptr
;
339 ptr_float
+= sizeof(vl_csc_matrix
)/sizeof(float) + 2;
340 *ptr_float
++ = drawn
->scale_x
;
341 *ptr_float
++ = drawn
->scale_y
;
343 int *ptr_int
= (int *)ptr_float
;
344 *ptr_int
++ = drawn
->area
.x0
;
345 *ptr_int
++ = drawn
->area
.y0
;
346 *ptr_int
++ = drawn
->area
.x1
;
347 *ptr_int
++ = drawn
->area
.y1
;
348 *ptr_int
++ = drawn
->translate_x
;
349 *ptr_int
++ = drawn
->translate_y
;
351 ptr_float
= (float *)ptr_int
;
352 *ptr_float
++ = drawn
->sampler0_w
;
353 *ptr_float
= drawn
->sampler0_h
;
354 pipe_buffer_unmap(s
->pipe
, buf_transfer
);
360 draw_layers(struct vl_compositor
*c
,
361 struct vl_compositor_state
*s
,
362 struct u_rect
*dirty
)
368 for (i
= 0; i
< VL_COMPOSITOR_MAX_LAYERS
; ++i
) {
369 if (s
->used_layers
& (1 << i
)) {
370 struct vl_compositor_layer
*layer
= &s
->layers
[i
];
371 struct pipe_sampler_view
**samplers
= &layer
->sampler_views
[0];
372 unsigned num_sampler_views
= !samplers
[1] ? 1 : !samplers
[2] ? 2 : 3;
373 struct cs_viewport drawn
;
375 drawn
.area
= calc_drawn_area(s
, layer
);
376 drawn
.scale_x
= layer
->viewport
.scale
[0] /
377 (float)layer
->sampler_views
[0]->texture
->width0
;
378 drawn
.scale_y
= drawn
.scale_x
;
379 drawn
.translate_x
= (int)layer
->viewport
.translate
[0];
380 drawn
.translate_y
= (int)layer
->viewport
.translate
[1];
381 drawn
.sampler0_w
= (float)layer
->sampler_views
[0]->texture
->width0
;
382 drawn
.sampler0_h
= (float)layer
->sampler_views
[0]->texture
->height0
;
383 set_viewport(s
, &drawn
);
385 c
->pipe
->bind_sampler_states(c
->pipe
, PIPE_SHADER_COMPUTE
, 0,
386 num_sampler_views
, layer
->samplers
);
387 c
->pipe
->set_sampler_views(c
->pipe
, PIPE_SHADER_COMPUTE
, 0,
388 num_sampler_views
, samplers
);
390 cs_launch(c
, layer
->cs
, &(drawn
.area
));
393 struct u_rect drawn
= calc_drawn_area(s
, layer
);
394 dirty
->x0
= MIN2(drawn
.x0
, dirty
->x0
);
395 dirty
->y0
= MIN2(drawn
.y0
, dirty
->y0
);
396 dirty
->x1
= MAX2(drawn
.x1
, dirty
->x1
);
397 dirty
->y1
= MAX2(drawn
.y1
, dirty
->y1
);
404 vl_compositor_cs_create_shader(struct vl_compositor
*c
,
405 const char *compute_shader_text
)
407 assert(c
&& compute_shader_text
);
409 struct tgsi_token tokens
[1024];
410 if (!tgsi_text_translate(compute_shader_text
, tokens
, ARRAY_SIZE(tokens
))) {
415 struct pipe_compute_state state
= {};
416 state
.ir_type
= PIPE_SHADER_IR_TGSI
;
419 /* create compute shader */
420 return c
->pipe
->create_compute_state(c
->pipe
, &state
);
424 vl_compositor_cs_render(struct vl_compositor_state
*s
,
425 struct vl_compositor
*c
,
426 struct pipe_surface
*dst_surface
,
427 struct u_rect
*dirty_area
,
433 c
->fb_state
.width
= dst_surface
->width
;
434 c
->fb_state
.height
= dst_surface
->height
;
435 c
->fb_state
.cbufs
[0] = dst_surface
;
437 if (!s
->scissor_valid
) {
440 s
->scissor
.maxx
= dst_surface
->width
;
441 s
->scissor
.maxy
= dst_surface
->height
;
444 if (clear_dirty
&& dirty_area
&&
445 (dirty_area
->x0
< dirty_area
->x1
|| dirty_area
->y0
< dirty_area
->y1
)) {
447 c
->pipe
->clear_render_target(c
->pipe
, dst_surface
, &s
->clear_color
,
448 0, 0, dst_surface
->width
, dst_surface
->height
, false);
449 dirty_area
->x0
= dirty_area
->y0
= VL_COMPOSITOR_MAX_DIRTY
;
450 dirty_area
->x1
= dirty_area
->y1
= VL_COMPOSITOR_MIN_DIRTY
;
453 pipe_set_constant_buffer(c
->pipe
, PIPE_SHADER_COMPUTE
, 0, s
->shader_params
);
455 draw_layers(c
, s
, dirty_area
);