1 /**************************************************************************
3 * Copyright 2019 Advanced Micro Devices, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 * Authors: James Zhu <james.zhu<@amd.com>
28 **************************************************************************/
32 #include "tgsi/tgsi_text.h"
33 #include "vl_compositor_cs.h"
45 const char *compute_shader_video_buffer
=
47 "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"
48 "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"
49 "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
51 "DCL SV[0], THREAD_ID\n"
52 "DCL SV[1], BLOCK_ID\n"
55 "DCL SVIEW[0..2], RECT, FLOAT\n"
58 "DCL IMAGE[0], 2D, WR\n"
61 "IMM[0] UINT32 { 8, 8, 1, 0}\n"
62 "IMM[1] FLT32 { 1.0, 0.0, 0.0, 0.0}\n"
64 "UMAD TEMP[0].xy, SV[1].xyyy, IMM[0].xyyy, SV[0].xyyy\n"
66 /* Drawn area check */
67 "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
68 "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"
69 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n"
70 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"
71 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"
75 "UADD TEMP[2].xy, TEMP[0].xyyy, -CONST[5].xyxy\n"
76 "U2F TEMP[2].xy, TEMP[2].xyyy\n"
77 "MUL TEMP[3].xy, TEMP[2].xyyy, CONST[6].xyyy\n"
80 "DIV TEMP[2].xy, TEMP[2].xyyy, CONST[3].zwww\n"
81 "DIV TEMP[3].xy, TEMP[3].xyyy, CONST[3].zwww\n"
84 "TEX_LZ TEMP[4].x, TEMP[2].xyyy, SAMP[0], RECT\n"
85 "TEX_LZ TEMP[4].y, TEMP[3].xyyy, SAMP[1], RECT\n"
86 "TEX_LZ TEMP[4].z, TEMP[3].xyyy, SAMP[2], RECT\n"
88 "MOV TEMP[4].w, IMM[1].xxxx\n"
90 /* Color Space Conversion */
91 "DP4 TEMP[7].x, CONST[0], TEMP[4]\n"
92 "DP4 TEMP[7].y, CONST[1], TEMP[4]\n"
93 "DP4 TEMP[7].z, CONST[2], TEMP[4]\n"
95 "MOV TEMP[5].w, TEMP[4].zzzz\n"
96 "SLE TEMP[6].w, TEMP[5].wwww, CONST[3].xxxx\n"
97 "SGT TEMP[5].w, TEMP[5].wwww, CONST[3].yyyy\n"
99 "MAX TEMP[7].w, TEMP[5].wwww, TEMP[6].wwww\n"
101 "STORE IMAGE[0], TEMP[0].xyyy, TEMP[7], 2D\n"
106 const char *compute_shader_weave
=
108 "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"
109 "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"
110 "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
112 "DCL SV[0], THREAD_ID\n"
113 "DCL SV[1], BLOCK_ID\n"
116 "DCL SVIEW[0..2], 2D_ARRAY, FLOAT\n"
119 "DCL IMAGE[0], 2D, WR\n"
122 "IMM[0] UINT32 { 8, 8, 1, 0}\n"
123 "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"
124 "IMM[2] UINT32 { 1, 2, 4, 0}\n"
125 "IMM[3] FLT32 { 0.25, 0.5, 0.125, 0.125}\n"
127 "UMAD TEMP[0].xy, SV[1].xyyy, IMM[0].xyyy, SV[0].xyyy\n"
129 /* Drawn area check */
130 "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
131 "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"
132 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n"
133 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"
134 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"
137 "MOV TEMP[2].xy, TEMP[0].xyyy\n"
139 "UADD TEMP[2].xy, TEMP[2].xyyy, -CONST[5].xyxy\n"
142 "U2F TEMP[2].xy, TEMP[2].xyyy\n"
143 "DIV TEMP[2].y, TEMP[2].yyyy, IMM[1].yyyy\n"
145 "MOV TEMP[12].xy, TEMP[2].xyyy\n"
148 "MOV TEMP[3].xy, TEMP[2].xyyy\n"
149 "DIV TEMP[3].xy, TEMP[3], IMM[1].yyyy\n"
151 "MOV TEMP[13].xy, TEMP[3].xyyy\n"
154 "ADD TEMP[2].x, TEMP[2].xxxx, IMM[3].yyyy\n"
155 "ADD TEMP[2].y, TEMP[2].yyyy, IMM[3].xxxx\n"
156 "ADD TEMP[12].x, TEMP[12].xxxx, IMM[3].yyyy\n"
157 "ADD TEMP[12].y, TEMP[12].yyyy, IMM[3].xxxx\n"
159 "ADD TEMP[3].x, TEMP[3].xxxx, IMM[3].xxxx\n"
160 "ADD TEMP[3].y, TEMP[3].yyyy, IMM[3].wwww\n"
161 "ADD TEMP[13].x, TEMP[13].xxxx, IMM[3].xxxx\n"
162 "ADD TEMP[13].y, TEMP[13].yyyy, IMM[3].wwww\n"
165 "DIV TEMP[2].xy, TEMP[2].xyyy, CONST[3].zwzw\n"
166 "DIV TEMP[12].xy, TEMP[12].xyyy, CONST[3].zwzw\n"
167 "DIV TEMP[3].xy, TEMP[3].xyyy, CONST[3].zwzw\n"
168 "DIV TEMP[13].xy, TEMP[13].xyyy, CONST[3].zwzw\n"
171 "ADD TEMP[2].y, TEMP[2].yyyy, IMM[3].xxxx\n"
172 "ADD TEMP[12].y, TEMP[12].yyyy, -IMM[3].xxxx\n"
173 "ADD TEMP[3].y, TEMP[3].yyyy, IMM[3].xxxx\n"
174 "ADD TEMP[13].y, TEMP[13].yyyy, -IMM[3].xxxx\n"
177 "MOV TEMP[14].x, TEMP[2].yyyy\n"
178 "MOV TEMP[14].yz, TEMP[3].yyyy\n"
179 "ROUND TEMP[15].xyz, TEMP[14].xyzz\n"
180 "ADD TEMP[14].xyz, TEMP[14].xyzz, -TEMP[15].xyzz\n"
181 "MOV TEMP[14].xyz, |TEMP[14].xyzz|\n"
182 "MUL TEMP[14].xyz, TEMP[14].xyzz, IMM[1].yyyy\n"
185 "DIV TEMP[2].xy, TEMP[2].xyyy, CONST[5].zwzw\n"
186 "DIV TEMP[12].xy, TEMP[12].xyyy, CONST[5].zwzw\n"
187 "DIV TEMP[15].xy, CONST[5].zwzw, IMM[1].yyyy\n"
188 "DIV TEMP[3].xy, TEMP[3].xyyy, TEMP[15].xyxy\n"
189 "DIV TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xyxy\n"
192 "MOV TEMP[2].z, IMM[1].wwww\n"
193 "MOV TEMP[3].z, IMM[1].wwww\n"
194 "TEX_LZ TEMP[10].x, TEMP[2].xyzz, SAMP[0], 2D_ARRAY\n"
195 "TEX_LZ TEMP[10].y, TEMP[3].xyzz, SAMP[1], 2D_ARRAY\n"
196 "TEX_LZ TEMP[10].z, TEMP[3].xyzz, SAMP[2], 2D_ARRAY\n"
198 "MOV TEMP[12].z, IMM[1].xxxx\n"
199 "MOV TEMP[13].z, IMM[1].xxxx\n"
200 "TEX_LZ TEMP[11].x, TEMP[12].xyzz, SAMP[0], 2D_ARRAY\n"
201 "TEX_LZ TEMP[11].y, TEMP[13].xyzz, SAMP[1], 2D_ARRAY\n"
202 "TEX_LZ TEMP[11].z, TEMP[13].xyzz, SAMP[2], 2D_ARRAY\n"
204 "LRP TEMP[6].xyz, TEMP[14].xyzz, TEMP[10].xyzz, TEMP[11].xyzz\n"
205 "MOV TEMP[6].w, IMM[1].xxxx\n"
207 /* Color Space Conversion */
208 "DP4 TEMP[9].x, CONST[0], TEMP[6]\n"
209 "DP4 TEMP[9].y, CONST[1], TEMP[6]\n"
210 "DP4 TEMP[9].z, CONST[2], TEMP[6]\n"
212 "MOV TEMP[7].w, TEMP[6].zzzz\n"
213 "SLE TEMP[8].w, TEMP[7].wwww, CONST[3].xxxx\n"
214 "SGT TEMP[7].w, TEMP[7].wwww, CONST[3].yyyy\n"
216 "MAX TEMP[9].w, TEMP[7].wwww, TEMP[8].wwww\n"
218 "STORE IMAGE[0], TEMP[0].xyyy, TEMP[9], 2D\n"
223 const char *compute_shader_rgba
=
225 "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"
226 "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"
227 "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
229 "DCL SV[0], THREAD_ID\n"
230 "DCL SV[1], BLOCK_ID\n"
233 "DCL SVIEW[0], RECT, FLOAT\n"
236 "DCL IMAGE[0], 2D, WR\n"
239 "IMM[0] UINT32 { 8, 8, 1, 0}\n"
240 "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"
242 "UMAD TEMP[0].xy, SV[1].xyyy, IMM[0].xyyy, SV[0].xyyy\n"
244 /* Drawn area check */
245 "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
246 "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"
247 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n"
248 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"
249 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"
253 "UADD TEMP[2].xy, TEMP[0].xyyy, -CONST[5].xyxy\n"
254 "U2F TEMP[2].xy, TEMP[2].xyyy\n"
257 "DIV TEMP[2].xy, TEMP[2].xyyy, CONST[3].zwzw\n"
260 "TEX_LZ TEMP[3], TEMP[2].xyyy, SAMP[0], RECT\n"
262 "STORE IMAGE[0], TEMP[0].xyyy, TEMP[3], 2D\n"
267 static const char *compute_shader_yuv_weave_y
=
269 "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"
270 "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"
271 "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
273 "DCL SV[0], THREAD_ID\n"
274 "DCL SV[1], BLOCK_ID\n"
277 "DCL SVIEW[0..2], 2D_ARRAY, FLOAT\n"
280 "DCL IMAGE[0], 2D, WR\n"
283 "IMM[0] UINT32 { 8, 8, 1, 0}\n"
284 "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"
285 "IMM[2] UINT32 { 1, 2, 4, 0}\n"
286 "IMM[3] FLT32 { 0.25, 0.5, 0.125, 0.125}\n"
288 "UMAD TEMP[0], SV[1], IMM[0], SV[0]\n"
290 /* Drawn area check */
291 "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
292 "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"
293 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n"
294 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"
295 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"
298 "MOV TEMP[2], TEMP[0]\n"
300 "UADD TEMP[2].xy, TEMP[2], -CONST[5].xyxy\n"
303 "U2F TEMP[2], TEMP[2]\n"
304 "DIV TEMP[2].y, TEMP[2].yyyy, IMM[1].yyyy\n"
306 "MOV TEMP[12], TEMP[2]\n"
309 "MOV TEMP[3], TEMP[2]\n"
310 "DIV TEMP[3].xy, TEMP[3], IMM[1].yyyy\n"
312 "MOV TEMP[13], TEMP[3]\n"
315 "ADD TEMP[2].x, TEMP[2].xxxx, IMM[3].yyyy\n"
316 "ADD TEMP[2].y, TEMP[2].yyyy, IMM[3].xxxx\n"
317 "ADD TEMP[12].x, TEMP[12].xxxx, IMM[3].yyyy\n"
318 "ADD TEMP[12].y, TEMP[12].yyyy, IMM[3].xxxx\n"
320 "ADD TEMP[3].x, TEMP[3].xxxx, IMM[3].xxxx\n"
321 "ADD TEMP[3].y, TEMP[3].yyyy, IMM[3].wwww\n"
322 "ADD TEMP[13].x, TEMP[13].xxxx, IMM[3].xxxx\n"
323 "ADD TEMP[13].y, TEMP[13].yyyy, IMM[3].wwww\n"
326 "DIV TEMP[2].xy, TEMP[2], CONST[3].zwzw\n"
327 "DIV TEMP[12].xy, TEMP[12], CONST[3].zwzw\n"
328 "DIV TEMP[3].xy, TEMP[3], CONST[3].zwzw\n"
329 "DIV TEMP[13].xy, TEMP[13], CONST[3].zwzw\n"
332 "ADD TEMP[2].y, TEMP[2].yyyy, IMM[3].xxxx\n"
333 "ADD TEMP[12].y, TEMP[12].yyyy, -IMM[3].xxxx\n"
334 "ADD TEMP[3].y, TEMP[3].yyyy, IMM[3].xxxx\n"
335 "ADD TEMP[13].y, TEMP[13].yyyy, -IMM[3].xxxx\n"
338 "MOV TEMP[14].x, TEMP[2].yyyy\n"
339 "MOV TEMP[14].yz, TEMP[3].yyyy\n"
340 "ROUND TEMP[15], TEMP[14]\n"
341 "ADD TEMP[14], TEMP[14], -TEMP[15]\n"
342 "MOV TEMP[14], |TEMP[14]|\n"
343 "MUL TEMP[14], TEMP[14], IMM[1].yyyy\n"
346 "DIV TEMP[2].xy, TEMP[2], CONST[5].zwzw\n"
347 "DIV TEMP[12].xy, TEMP[12], CONST[5].zwzw\n"
348 "DIV TEMP[15].xy, CONST[5].zwzw, IMM[1].yyyy\n"
349 "DIV TEMP[3].xy, TEMP[3], TEMP[15].xyxy\n"
350 "DIV TEMP[13].xy, TEMP[13], TEMP[15].xyxy\n"
353 "MOV TEMP[2].z, IMM[1].wwww\n"
354 "MOV TEMP[3].z, IMM[1].wwww\n"
355 "TEX_LZ TEMP[10].x, TEMP[2], SAMP[0], 2D_ARRAY\n"
356 "TEX_LZ TEMP[10].y, TEMP[3], SAMP[1], 2D_ARRAY\n"
357 "TEX_LZ TEMP[10].z, TEMP[3], SAMP[2], 2D_ARRAY\n"
359 "MOV TEMP[12].z, IMM[1].xxxx\n"
360 "MOV TEMP[13].z, IMM[1].xxxx\n"
361 "TEX_LZ TEMP[11].x, TEMP[12], SAMP[0], 2D_ARRAY\n"
362 "TEX_LZ TEMP[11].y, TEMP[13], SAMP[1], 2D_ARRAY\n"
363 "TEX_LZ TEMP[11].z, TEMP[13], SAMP[2], 2D_ARRAY\n"
365 "LRP TEMP[6], TEMP[14], TEMP[10], TEMP[11]\n"
366 "MOV TEMP[6].w, IMM[1].xxxx\n"
368 "STORE IMAGE[0], TEMP[0], TEMP[6], 2D\n"
373 static const char *compute_shader_yuv_weave_uv
=
375 "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"
376 "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"
377 "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
379 "DCL SV[0], THREAD_ID\n"
380 "DCL SV[1], BLOCK_ID\n"
383 "DCL SVIEW[0..2], 2D_ARRAY, FLOAT\n"
386 "DCL IMAGE[0], 2D, WR\n"
389 "IMM[0] UINT32 { 8, 8, 1, 0}\n"
390 "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"
391 "IMM[2] UINT32 { 1, 2, 4, 0}\n"
392 "IMM[3] FLT32 { 0.25, 0.5, 0.125, 0.125}\n"
394 "UMAD TEMP[0], SV[1], IMM[0], SV[0]\n"
396 /* Drawn area check */
397 "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
398 "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"
399 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n"
400 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"
401 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"
404 "MOV TEMP[2], TEMP[0]\n"
406 "UADD TEMP[2].xy, TEMP[2], -CONST[5].xyxy\n"
409 "U2F TEMP[2], TEMP[2]\n"
410 "DIV TEMP[2].y, TEMP[2].yyyy, IMM[1].yyyy\n"
412 "MOV TEMP[12], TEMP[2]\n"
415 "MOV TEMP[3], TEMP[2]\n"
416 "DIV TEMP[3].xy, TEMP[3], IMM[1].yyyy\n"
418 "MOV TEMP[13], TEMP[3]\n"
421 "ADD TEMP[2].x, TEMP[2].xxxx, IMM[3].yyyy\n"
422 "ADD TEMP[2].y, TEMP[2].yyyy, IMM[3].xxxx\n"
423 "ADD TEMP[12].x, TEMP[12].xxxx, IMM[3].yyyy\n"
424 "ADD TEMP[12].y, TEMP[12].yyyy, IMM[3].xxxx\n"
426 "ADD TEMP[3].x, TEMP[3].xxxx, IMM[3].xxxx\n"
427 "ADD TEMP[3].y, TEMP[3].yyyy, IMM[3].wwww\n"
428 "ADD TEMP[13].x, TEMP[13].xxxx, IMM[3].xxxx\n"
429 "ADD TEMP[13].y, TEMP[13].yyyy, IMM[3].wwww\n"
432 "DIV TEMP[2].xy, TEMP[2], CONST[3].zwzw\n"
433 "DIV TEMP[12].xy, TEMP[12], CONST[3].zwzw\n"
434 "DIV TEMP[3].xy, TEMP[3], CONST[3].zwzw\n"
435 "DIV TEMP[13].xy, TEMP[13], CONST[3].zwzw\n"
438 "ADD TEMP[2].y, TEMP[2].yyyy, IMM[3].xxxx\n"
439 "ADD TEMP[12].y, TEMP[12].yyyy, -IMM[3].xxxx\n"
440 "ADD TEMP[3].y, TEMP[3].yyyy, IMM[3].xxxx\n"
441 "ADD TEMP[13].y, TEMP[13].yyyy, -IMM[3].xxxx\n"
444 "MOV TEMP[14].x, TEMP[2].yyyy\n"
445 "MOV TEMP[14].yz, TEMP[3].yyyy\n"
446 "ROUND TEMP[15], TEMP[14]\n"
447 "ADD TEMP[14], TEMP[14], -TEMP[15]\n"
448 "MOV TEMP[14], |TEMP[14]|\n"
449 "MUL TEMP[14], TEMP[14], IMM[1].yyyy\n"
452 "DIV TEMP[2].xy, TEMP[2], CONST[5].zwzw\n"
453 "DIV TEMP[12].xy, TEMP[12], CONST[5].zwzw\n"
454 "DIV TEMP[15].xy, CONST[5].zwzw, IMM[1].yyyy\n"
455 "DIV TEMP[3].xy, TEMP[3], TEMP[15].xyxy\n"
456 "DIV TEMP[13].xy, TEMP[13], TEMP[15].xyxy\n"
459 "MOV TEMP[2].z, IMM[1].wwww\n"
460 "MOV TEMP[3].z, IMM[1].wwww\n"
461 "TEX_LZ TEMP[10].x, TEMP[2], SAMP[0], 2D_ARRAY\n"
462 "TEX_LZ TEMP[10].y, TEMP[3], SAMP[1], 2D_ARRAY\n"
463 "TEX_LZ TEMP[10].z, TEMP[3], SAMP[2], 2D_ARRAY\n"
465 "MOV TEMP[12].z, IMM[1].xxxx\n"
466 "MOV TEMP[13].z, IMM[1].xxxx\n"
467 "TEX_LZ TEMP[11].x, TEMP[12], SAMP[0], 2D_ARRAY\n"
468 "TEX_LZ TEMP[11].y, TEMP[13], SAMP[1], 2D_ARRAY\n"
469 "TEX_LZ TEMP[11].z, TEMP[13], SAMP[2], 2D_ARRAY\n"
471 "LRP TEMP[6], TEMP[14], TEMP[10], TEMP[11]\n"
472 "MOV TEMP[6].w, IMM[1].xxxx\n"
474 "MOV TEMP[7].xy, TEMP[6].yzww\n"
476 "STORE IMAGE[0], TEMP[0], TEMP[7], 2D\n"
481 static const char *compute_shader_yuv_bob_y
=
483 "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"
484 "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"
485 "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
487 "DCL SV[0], THREAD_ID\n"
488 "DCL SV[1], BLOCK_ID\n"
491 "DCL SVIEW[0..2], RECT, FLOAT\n"
494 "DCL IMAGE[0], 2D, WR\n"
497 "IMM[0] UINT32 { 8, 8, 1, 0}\n"
498 "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"
500 "UMAD TEMP[0], SV[1], IMM[0], SV[0]\n"
502 /* Drawn area check */
503 "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
504 "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"
505 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n"
506 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"
507 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"
511 "UADD TEMP[2].xy, TEMP[0], -CONST[5].xyxy\n"
512 "U2F TEMP[2], TEMP[2]\n"
513 "DIV TEMP[3], TEMP[2], IMM[1].yyyy\n"
516 "DIV TEMP[2], TEMP[2], CONST[3].zwzw\n"
517 "DIV TEMP[2], TEMP[2], IMM[1].xyxy\n"
518 "DIV TEMP[3], TEMP[3], CONST[3].zwzw\n"
519 "DIV TEMP[3], TEMP[3], IMM[1].xyxy\n"
522 "TEX_LZ TEMP[4].x, TEMP[2], SAMP[0], RECT\n"
523 "TEX_LZ TEMP[4].y, TEMP[3], SAMP[1], RECT\n"
524 "TEX_LZ TEMP[4].z, TEMP[3], SAMP[2], RECT\n"
526 "MOV TEMP[4].w, IMM[1].xxxx\n"
528 "STORE IMAGE[0], TEMP[0], TEMP[4], 2D\n"
533 static const char *compute_shader_yuv_bob_uv
=
535 "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"
536 "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"
537 "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
539 "DCL SV[0], THREAD_ID\n"
540 "DCL SV[1], BLOCK_ID\n"
543 "DCL SVIEW[0..2], RECT, FLOAT\n"
546 "DCL IMAGE[0], 2D, WR\n"
549 "IMM[0] UINT32 { 8, 8, 1, 0}\n"
550 "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"
552 "UMAD TEMP[0], SV[1], IMM[0], SV[0]\n"
554 /* Drawn area check */
555 "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
556 "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"
557 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n"
558 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"
559 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"
563 "UADD TEMP[2].xy, TEMP[0], -CONST[5].xyxy\n"
564 "U2F TEMP[2], TEMP[2]\n"
565 "DIV TEMP[3], TEMP[2], IMM[1].yyyy\n"
568 "DIV TEMP[2], TEMP[2], CONST[3].zwzw\n"
569 "DIV TEMP[2], TEMP[2], IMM[1].xyxy\n"
570 "DIV TEMP[3], TEMP[3], CONST[3].zwzw\n"
571 "DIV TEMP[3], TEMP[3], IMM[1].xyxy\n"
574 "TEX_LZ TEMP[4].x, TEMP[2], SAMP[0], RECT\n"
575 "TEX_LZ TEMP[4].y, TEMP[3], SAMP[1], RECT\n"
576 "TEX_LZ TEMP[4].z, TEMP[3], SAMP[2], RECT\n"
578 "MOV TEMP[4].w, IMM[1].xxxx\n"
580 "MOV TEMP[5].xy, TEMP[4].yzww\n"
582 "STORE IMAGE[0], TEMP[0], TEMP[5], 2D\n"
588 cs_launch(struct vl_compositor
*c
,
590 const struct u_rect
*draw_area
)
592 struct pipe_context
*ctx
= c
->pipe
;
595 struct pipe_image_view image
= {0};
596 image
.resource
= c
->fb_state
.cbufs
[0]->texture
;
597 image
.shader_access
= image
.access
= PIPE_IMAGE_ACCESS_READ_WRITE
;
598 image
.format
= c
->fb_state
.cbufs
[0]->texture
->format
;
600 ctx
->set_shader_images(c
->pipe
, PIPE_SHADER_COMPUTE
, 0, 1, &image
);
602 /* Bind compute shader */
603 ctx
->bind_compute_state(ctx
, cs
);
605 /* Dispatch compute */
606 struct pipe_grid_info info
= {0};
610 info
.grid
[0] = DIV_ROUND_UP(draw_area
->x1
, info
.block
[0]);
611 info
.grid
[1] = DIV_ROUND_UP(draw_area
->y1
, info
.block
[1]);
614 ctx
->launch_grid(ctx
, &info
);
616 /* Make the result visible to all clients. */
617 ctx
->memory_barrier(ctx
, PIPE_BARRIER_ALL
);
621 static inline struct u_rect
622 calc_drawn_area(struct vl_compositor_state
*s
,
623 struct vl_compositor_layer
*layer
)
625 struct vertex2f tl
, br
;
626 struct u_rect result
;
634 result
.x0
= tl
.x
* layer
->viewport
.scale
[0] + layer
->viewport
.translate
[0];
635 result
.y0
= tl
.y
* layer
->viewport
.scale
[1] + layer
->viewport
.translate
[1];
636 result
.x1
= br
.x
* layer
->viewport
.scale
[0] + layer
->viewport
.translate
[0];
637 result
.y1
= br
.y
* layer
->viewport
.scale
[1] + layer
->viewport
.translate
[1];
640 result
.x0
= MAX2(result
.x0
, s
->scissor
.minx
);
641 result
.y0
= MAX2(result
.y0
, s
->scissor
.miny
);
642 result
.x1
= MIN2(result
.x1
, s
->scissor
.maxx
);
643 result
.y1
= MIN2(result
.y1
, s
->scissor
.maxy
);
648 set_viewport(struct vl_compositor_state
*s
,
649 struct cs_viewport
*drawn
,
650 struct pipe_sampler_view
**samplers
)
652 struct pipe_transfer
*buf_transfer
;
656 void *ptr
= pipe_buffer_map(s
->pipe
, s
->shader_params
,
657 PIPE_TRANSFER_READ
| PIPE_TRANSFER_WRITE
,
663 float *ptr_float
= (float *)ptr
;
664 ptr_float
+= sizeof(vl_csc_matrix
)/sizeof(float) + 2;
665 *ptr_float
++ = drawn
->scale_x
;
666 *ptr_float
++ = drawn
->scale_y
;
668 int *ptr_int
= (int *)ptr_float
;
669 *ptr_int
++ = drawn
->area
.x0
;
670 *ptr_int
++ = drawn
->area
.y0
;
671 *ptr_int
++ = drawn
->area
.x1
;
672 *ptr_int
++ = drawn
->area
.y1
;
673 *ptr_int
++ = drawn
->translate_x
;
674 *ptr_int
++ = drawn
->translate_y
;
676 ptr_float
= (float *)ptr_int
;
677 *ptr_float
++ = drawn
->sampler0_w
;
678 *ptr_float
++ = drawn
->sampler0_h
;
680 /* compute_shader_video_buffer uses pixel coordinates based on the
681 * Y sampler dimensions. If U/V are using separate planes and are
682 * subsampled, we need to scale the coordinates */
684 float h_ratio
= samplers
[1]->texture
->width0
/
685 (float) samplers
[0]->texture
->width0
;
686 *ptr_float
++ = h_ratio
;
687 float v_ratio
= samplers
[1]->texture
->height0
/
688 (float) samplers
[0]->texture
->height0
;
689 *ptr_float
++ = v_ratio
;
691 pipe_buffer_unmap(s
->pipe
, buf_transfer
);
697 draw_layers(struct vl_compositor
*c
,
698 struct vl_compositor_state
*s
,
699 struct u_rect
*dirty
)
705 for (i
= 0; i
< VL_COMPOSITOR_MAX_LAYERS
; ++i
) {
706 if (s
->used_layers
& (1 << i
)) {
707 struct vl_compositor_layer
*layer
= &s
->layers
[i
];
708 struct pipe_sampler_view
**samplers
= &layer
->sampler_views
[0];
709 unsigned num_sampler_views
= !samplers
[1] ? 1 : !samplers
[2] ? 2 : 3;
710 struct cs_viewport drawn
;
712 drawn
.area
= calc_drawn_area(s
, layer
);
713 drawn
.scale_x
= layer
->viewport
.scale
[0] /
714 (float)layer
->sampler_views
[0]->texture
->width0
;
715 drawn
.scale_y
= layer
->viewport
.scale
[1] /
716 (float)layer
->sampler_views
[0]->texture
->height0
;
717 drawn
.translate_x
= (int)layer
->viewport
.translate
[0];
718 drawn
.translate_y
= (int)layer
->viewport
.translate
[1];
719 drawn
.sampler0_w
= (float)layer
->sampler_views
[0]->texture
->width0
;
720 drawn
.sampler0_h
= (float)layer
->sampler_views
[0]->texture
->height0
;
721 set_viewport(s
, &drawn
, samplers
);
723 c
->pipe
->bind_sampler_states(c
->pipe
, PIPE_SHADER_COMPUTE
, 0,
724 num_sampler_views
, layer
->samplers
);
725 c
->pipe
->set_sampler_views(c
->pipe
, PIPE_SHADER_COMPUTE
, 0,
726 num_sampler_views
, samplers
);
728 cs_launch(c
, layer
->cs
, &(drawn
.area
));
731 c
->pipe
->set_shader_images(c
->pipe
, PIPE_SHADER_COMPUTE
, 0, 1, NULL
);
732 c
->pipe
->set_constant_buffer(c
->pipe
, PIPE_SHADER_COMPUTE
, 0, NULL
);
733 c
->pipe
->set_sampler_views(c
->pipe
, PIPE_SHADER_FRAGMENT
, 0,
734 num_sampler_views
, NULL
);
735 c
->pipe
->bind_compute_state(c
->pipe
, NULL
);
736 c
->pipe
->bind_sampler_states(c
->pipe
, PIPE_SHADER_COMPUTE
, 0,
737 num_sampler_views
, NULL
);
740 struct u_rect drawn
= calc_drawn_area(s
, layer
);
741 dirty
->x0
= MIN2(drawn
.x0
, dirty
->x0
);
742 dirty
->y0
= MIN2(drawn
.y0
, dirty
->y0
);
743 dirty
->x1
= MAX2(drawn
.x1
, dirty
->x1
);
744 dirty
->y1
= MAX2(drawn
.y1
, dirty
->y1
);
751 vl_compositor_cs_create_shader(struct vl_compositor
*c
,
752 const char *compute_shader_text
)
754 assert(c
&& compute_shader_text
);
756 struct tgsi_token tokens
[1024];
757 if (!tgsi_text_translate(compute_shader_text
, tokens
, ARRAY_SIZE(tokens
))) {
762 struct pipe_compute_state state
= {0};
763 state
.ir_type
= PIPE_SHADER_IR_TGSI
;
766 /* create compute shader */
767 return c
->pipe
->create_compute_state(c
->pipe
, &state
);
771 vl_compositor_cs_render(struct vl_compositor_state
*s
,
772 struct vl_compositor
*c
,
773 struct pipe_surface
*dst_surface
,
774 struct u_rect
*dirty_area
,
780 c
->fb_state
.width
= dst_surface
->width
;
781 c
->fb_state
.height
= dst_surface
->height
;
782 c
->fb_state
.cbufs
[0] = dst_surface
;
784 if (!s
->scissor_valid
) {
787 s
->scissor
.maxx
= dst_surface
->width
;
788 s
->scissor
.maxy
= dst_surface
->height
;
791 if (clear_dirty
&& dirty_area
&&
792 (dirty_area
->x0
< dirty_area
->x1
|| dirty_area
->y0
< dirty_area
->y1
)) {
794 c
->pipe
->clear_render_target(c
->pipe
, dst_surface
, &s
->clear_color
,
795 0, 0, dst_surface
->width
, dst_surface
->height
, false);
796 dirty_area
->x0
= dirty_area
->y0
= VL_COMPOSITOR_MAX_DIRTY
;
797 dirty_area
->x1
= dirty_area
->y1
= VL_COMPOSITOR_MIN_DIRTY
;
800 pipe_set_constant_buffer(c
->pipe
, PIPE_SHADER_COMPUTE
, 0, s
->shader_params
);
802 draw_layers(c
, s
, dirty_area
);
805 bool vl_compositor_cs_init_shaders(struct vl_compositor
*c
)
809 c
->cs_video_buffer
= vl_compositor_cs_create_shader(c
, compute_shader_video_buffer
);
810 if (!c
->cs_video_buffer
) {
811 debug_printf("Unable to create video_buffer compute shader.\n");
815 c
->cs_weave_rgb
= vl_compositor_cs_create_shader(c
, compute_shader_weave
);
816 if (!c
->cs_weave_rgb
) {
817 debug_printf("Unable to create weave_rgb compute shader.\n");
821 c
->cs_yuv
.weave
.y
= vl_compositor_cs_create_shader(c
, compute_shader_yuv_weave_y
);
822 c
->cs_yuv
.weave
.uv
= vl_compositor_cs_create_shader(c
, compute_shader_yuv_weave_uv
);
823 c
->cs_yuv
.bob
.y
= vl_compositor_cs_create_shader(c
, compute_shader_yuv_bob_y
);
824 c
->cs_yuv
.bob
.uv
= vl_compositor_cs_create_shader(c
, compute_shader_yuv_bob_uv
);
825 if (!c
->cs_yuv
.weave
.y
|| !c
->cs_yuv
.weave
.uv
||
826 !c
->cs_yuv
.bob
.y
|| !c
->cs_yuv
.bob
.uv
) {
827 debug_printf("Unable to create YCbCr i-to-YCbCr p deint compute shader.\n");
834 void vl_compositor_cs_cleanup_shaders(struct vl_compositor
*c
)
838 if (c
->cs_video_buffer
)
839 c
->pipe
->delete_compute_state(c
->pipe
, c
->cs_video_buffer
);
841 c
->pipe
->delete_compute_state(c
->pipe
, c
->cs_weave_rgb
);
842 if (c
->cs_yuv
.weave
.y
)
843 c
->pipe
->delete_compute_state(c
->pipe
, c
->cs_yuv
.weave
.y
);
844 if (c
->cs_yuv
.weave
.uv
)
845 c
->pipe
->delete_compute_state(c
->pipe
, c
->cs_yuv
.weave
.uv
);
847 c
->pipe
->delete_compute_state(c
->pipe
, c
->cs_yuv
.bob
.y
);
848 if (c
->cs_yuv
.bob
.uv
)
849 c
->pipe
->delete_compute_state(c
->pipe
, c
->cs_yuv
.bob
.uv
);