eb19dd7b15992861a3a020d65d51914b2dd08777
[mesa.git] / src / gallium / auxiliary / vl / vl_compositor_cs.c
1 /**************************************************************************
2 *
3 * Copyright 2019 Advanced Micro Devices, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 * Authors: James Zhu <james.zhu<@amd.com>
27 *
28 **************************************************************************/
29
30 #include <assert.h>
31
32 #include "tgsi/tgsi_text.h"
33 #include "vl_compositor_cs.h"
34
35 struct cs_viewport {
36 float scale_x;
37 float scale_y;
38 struct u_rect area;
39 int translate_x;
40 int translate_y;
41 float sampler0_w;
42 float sampler0_h;
43 };
44
45 const char *compute_shader_video_buffer =
46 "COMP\n"
47 "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"
48 "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"
49 "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
50
51 "DCL SV[0], THREAD_ID\n"
52 "DCL SV[1], BLOCK_ID\n"
53
54 "DCL CONST[0..6]\n"
55 "DCL SVIEW[0..2], RECT, FLOAT\n"
56 "DCL SAMP[0..2]\n"
57
58 "DCL IMAGE[0], 2D, WR\n"
59 "DCL TEMP[0..7]\n"
60
61 "IMM[0] UINT32 { 8, 8, 1, 0}\n"
62 "IMM[1] FLT32 { 1.0, 0.0, 0.0, 0.0}\n"
63
64 "UMAD TEMP[0].xy, SV[1].xyyy, IMM[0].xyyy, SV[0].xyyy\n"
65
66 /* Drawn area check */
67 "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
68 "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"
69 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n"
70 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"
71 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"
72
73 "UIF TEMP[1].xxxx\n"
74 /* Translate */
75 "UADD TEMP[2].xy, TEMP[0].xyyy, -CONST[5].xyxy\n"
76 "U2F TEMP[2].xy, TEMP[2].xyyy\n"
77 "MUL TEMP[3].xy, TEMP[2].xyyy, CONST[6].xyyy\n"
78
79 /* Scale */
80 "DIV TEMP[2].xy, TEMP[2].xyyy, CONST[3].zwww\n"
81 "DIV TEMP[3].xy, TEMP[3].xyyy, CONST[3].zwww\n"
82
83 /* Fetch texels */
84 "TEX_LZ TEMP[4].x, TEMP[2].xyyy, SAMP[0], RECT\n"
85 "TEX_LZ TEMP[4].y, TEMP[3].xyyy, SAMP[1], RECT\n"
86 "TEX_LZ TEMP[4].z, TEMP[3].xyyy, SAMP[2], RECT\n"
87
88 "MOV TEMP[4].w, IMM[1].xxxx\n"
89
90 /* Color Space Conversion */
91 "DP4 TEMP[7].x, CONST[0], TEMP[4]\n"
92 "DP4 TEMP[7].y, CONST[1], TEMP[4]\n"
93 "DP4 TEMP[7].z, CONST[2], TEMP[4]\n"
94
95 "MOV TEMP[5].w, TEMP[4].zzzz\n"
96 "SLE TEMP[6].w, TEMP[5].wwww, CONST[3].xxxx\n"
97 "SGT TEMP[5].w, TEMP[5].wwww, CONST[3].yyyy\n"
98
99 "MAX TEMP[7].w, TEMP[5].wwww, TEMP[6].wwww\n"
100
101 "STORE IMAGE[0], TEMP[0].xyyy, TEMP[7], 2D\n"
102 "ENDIF\n"
103
104 "END\n";
105
106 const char *compute_shader_weave =
107 "COMP\n"
108 "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"
109 "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"
110 "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
111
112 "DCL SV[0], THREAD_ID\n"
113 "DCL SV[1], BLOCK_ID\n"
114
115 "DCL CONST[0..5]\n"
116 "DCL SVIEW[0..2], 2D_ARRAY, FLOAT\n"
117 "DCL SAMP[0..2]\n"
118
119 "DCL IMAGE[0], 2D, WR\n"
120 "DCL TEMP[0..15]\n"
121
122 "IMM[0] UINT32 { 8, 8, 1, 0}\n"
123 "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"
124 "IMM[2] UINT32 { 1, 2, 4, 0}\n"
125 "IMM[3] FLT32 { 0.25, 0.5, 0.125, 0.125}\n"
126
127 "UMAD TEMP[0].xy, SV[1].xyyy, IMM[0].xyyy, SV[0].xyyy\n"
128
129 /* Drawn area check */
130 "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
131 "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"
132 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n"
133 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"
134 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"
135
136 "UIF TEMP[1].xxxx\n"
137 "MOV TEMP[2].xy, TEMP[0].xyyy\n"
138 /* Translate */
139 "UADD TEMP[2].xy, TEMP[2].xyyy, -CONST[5].xyxy\n"
140
141 /* Top Y */
142 "U2F TEMP[2].xy, TEMP[2].xyyy\n"
143 "DIV TEMP[2].y, TEMP[2].yyyy, IMM[1].yyyy\n"
144 /* Down Y */
145 "MOV TEMP[12].xy, TEMP[2].xyyy\n"
146
147 /* Top UV */
148 "MOV TEMP[3].xy, TEMP[2].xyyy\n"
149 "DIV TEMP[3].xy, TEMP[3], IMM[1].yyyy\n"
150 /* Down UV */
151 "MOV TEMP[13].xy, TEMP[3].xyyy\n"
152
153 /* Texture offset */
154 "ADD TEMP[2].x, TEMP[2].xxxx, IMM[3].yyyy\n"
155 "ADD TEMP[2].y, TEMP[2].yyyy, IMM[3].xxxx\n"
156 "ADD TEMP[12].x, TEMP[12].xxxx, IMM[3].yyyy\n"
157 "ADD TEMP[12].y, TEMP[12].yyyy, IMM[3].xxxx\n"
158
159 "ADD TEMP[3].x, TEMP[3].xxxx, IMM[3].xxxx\n"
160 "ADD TEMP[3].y, TEMP[3].yyyy, IMM[3].wwww\n"
161 "ADD TEMP[13].x, TEMP[13].xxxx, IMM[3].xxxx\n"
162 "ADD TEMP[13].y, TEMP[13].yyyy, IMM[3].wwww\n"
163
164 /* Scale */
165 "DIV TEMP[2].xy, TEMP[2].xyyy, CONST[3].zwzw\n"
166 "DIV TEMP[12].xy, TEMP[12].xyyy, CONST[3].zwzw\n"
167 "DIV TEMP[3].xy, TEMP[3].xyyy, CONST[3].zwzw\n"
168 "DIV TEMP[13].xy, TEMP[13].xyyy, CONST[3].zwzw\n"
169
170 /* Weave offset */
171 "ADD TEMP[2].y, TEMP[2].yyyy, IMM[3].xxxx\n"
172 "ADD TEMP[12].y, TEMP[12].yyyy, -IMM[3].xxxx\n"
173 "ADD TEMP[3].y, TEMP[3].yyyy, IMM[3].xxxx\n"
174 "ADD TEMP[13].y, TEMP[13].yyyy, -IMM[3].xxxx\n"
175
176 /* Texture layer */
177 "MOV TEMP[14].x, TEMP[2].yyyy\n"
178 "MOV TEMP[14].yz, TEMP[3].yyyy\n"
179 "ROUND TEMP[15].xyz, TEMP[14].xyzz\n"
180 "ADD TEMP[14].xyz, TEMP[14].xyzz, -TEMP[15].xyzz\n"
181 "MOV TEMP[14].xyz, |TEMP[14].xyzz|\n"
182 "MUL TEMP[14].xyz, TEMP[14].xyzz, IMM[1].yyyy\n"
183
184 /* Normalize */
185 "DIV TEMP[2].xy, TEMP[2].xyyy, CONST[5].zwzw\n"
186 "DIV TEMP[12].xy, TEMP[12].xyyy, CONST[5].zwzw\n"
187 "DIV TEMP[15].xy, CONST[5].zwzw, IMM[1].yyyy\n"
188 "DIV TEMP[3].xy, TEMP[3].xyyy, TEMP[15].xyxy\n"
189 "DIV TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xyxy\n"
190
191 /* Fetch texels */
192 "MOV TEMP[2].z, IMM[1].wwww\n"
193 "MOV TEMP[3].z, IMM[1].wwww\n"
194 "TEX_LZ TEMP[10].x, TEMP[2].xyzz, SAMP[0], 2D_ARRAY\n"
195 "TEX_LZ TEMP[10].y, TEMP[3].xyzz, SAMP[1], 2D_ARRAY\n"
196 "TEX_LZ TEMP[10].z, TEMP[3].xyzz, SAMP[2], 2D_ARRAY\n"
197
198 "MOV TEMP[12].z, IMM[1].xxxx\n"
199 "MOV TEMP[13].z, IMM[1].xxxx\n"
200 "TEX_LZ TEMP[11].x, TEMP[12].xyzz, SAMP[0], 2D_ARRAY\n"
201 "TEX_LZ TEMP[11].y, TEMP[13].xyzz, SAMP[1], 2D_ARRAY\n"
202 "TEX_LZ TEMP[11].z, TEMP[13].xyzz, SAMP[2], 2D_ARRAY\n"
203
204 "LRP TEMP[6].xyz, TEMP[14].xyzz, TEMP[10].xyzz, TEMP[11].xyzz\n"
205 "MOV TEMP[6].w, IMM[1].xxxx\n"
206
207 /* Color Space Conversion */
208 "DP4 TEMP[9].x, CONST[0], TEMP[6]\n"
209 "DP4 TEMP[9].y, CONST[1], TEMP[6]\n"
210 "DP4 TEMP[9].z, CONST[2], TEMP[6]\n"
211
212 "MOV TEMP[7].w, TEMP[6].zzzz\n"
213 "SLE TEMP[8].w, TEMP[7].wwww, CONST[3].xxxx\n"
214 "SGT TEMP[7].w, TEMP[7].wwww, CONST[3].yyyy\n"
215
216 "MAX TEMP[9].w, TEMP[7].wwww, TEMP[8].wwww\n"
217
218 "STORE IMAGE[0], TEMP[0].xyyy, TEMP[9], 2D\n"
219 "ENDIF\n"
220
221 "END\n";
222
223 const char *compute_shader_rgba =
224 "COMP\n"
225 "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"
226 "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"
227 "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
228
229 "DCL SV[0], THREAD_ID\n"
230 "DCL SV[1], BLOCK_ID\n"
231
232 "DCL CONST[0..5]\n"
233 "DCL SVIEW[0], RECT, FLOAT\n"
234 "DCL SAMP[0]\n"
235
236 "DCL IMAGE[0], 2D, WR\n"
237 "DCL TEMP[0..3]\n"
238
239 "IMM[0] UINT32 { 8, 8, 1, 0}\n"
240 "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"
241
242 "UMAD TEMP[0].xy, SV[1].xyyy, IMM[0].xyyy, SV[0].xyyy\n"
243
244 /* Drawn area check */
245 "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
246 "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"
247 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n"
248 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"
249 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"
250
251 "UIF TEMP[1].xxxx\n"
252 /* Translate */
253 "UADD TEMP[2].xy, TEMP[0].xyyy, -CONST[5].xyxy\n"
254 "U2F TEMP[2].xy, TEMP[2].xyyy\n"
255
256 /* Scale */
257 "DIV TEMP[2].xy, TEMP[2].xyyy, CONST[3].zwzw\n"
258
259 /* Fetch texels */
260 "TEX_LZ TEMP[3], TEMP[2].xyyy, SAMP[0], RECT\n"
261
262 "STORE IMAGE[0], TEMP[0].xyyy, TEMP[3], 2D\n"
263 "ENDIF\n"
264
265 "END\n";
266
267 static const char *compute_shader_yuv_weave_y =
268 "COMP\n"
269 "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"
270 "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"
271 "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
272
273 "DCL SV[0], THREAD_ID\n"
274 "DCL SV[1], BLOCK_ID\n"
275
276 "DCL CONST[0..5]\n"
277 "DCL SVIEW[0..2], 2D_ARRAY, FLOAT\n"
278 "DCL SAMP[0..2]\n"
279
280 "DCL IMAGE[0], 2D, WR\n"
281 "DCL TEMP[0..15]\n"
282
283 "IMM[0] UINT32 { 8, 8, 1, 0}\n"
284 "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"
285 "IMM[2] UINT32 { 1, 2, 4, 0}\n"
286 "IMM[3] FLT32 { 0.25, 0.5, 0.125, 0.125}\n"
287
288 "UMAD TEMP[0], SV[1], IMM[0], SV[0]\n"
289
290 /* Drawn area check */
291 "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
292 "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"
293 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n"
294 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"
295 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"
296
297 "UIF TEMP[1]\n"
298 "MOV TEMP[2], TEMP[0]\n"
299 /* Translate */
300 "UADD TEMP[2].xy, TEMP[2], -CONST[5].xyxy\n"
301
302 /* Top Y */
303 "U2F TEMP[2], TEMP[2]\n"
304 "DIV TEMP[2].y, TEMP[2].yyyy, IMM[1].yyyy\n"
305 /* Down Y */
306 "MOV TEMP[12], TEMP[2]\n"
307
308 /* Top UV */
309 "MOV TEMP[3], TEMP[2]\n"
310 "DIV TEMP[3].xy, TEMP[3], IMM[1].yyyy\n"
311 /* Down UV */
312 "MOV TEMP[13], TEMP[3]\n"
313
314 /* Texture offset */
315 "ADD TEMP[2].x, TEMP[2].xxxx, IMM[3].yyyy\n"
316 "ADD TEMP[2].y, TEMP[2].yyyy, IMM[3].xxxx\n"
317 "ADD TEMP[12].x, TEMP[12].xxxx, IMM[3].yyyy\n"
318 "ADD TEMP[12].y, TEMP[12].yyyy, IMM[3].xxxx\n"
319
320 "ADD TEMP[3].x, TEMP[3].xxxx, IMM[3].xxxx\n"
321 "ADD TEMP[3].y, TEMP[3].yyyy, IMM[3].wwww\n"
322 "ADD TEMP[13].x, TEMP[13].xxxx, IMM[3].xxxx\n"
323 "ADD TEMP[13].y, TEMP[13].yyyy, IMM[3].wwww\n"
324
325 /* Scale */
326 "DIV TEMP[2].xy, TEMP[2], CONST[3].zwzw\n"
327 "DIV TEMP[12].xy, TEMP[12], CONST[3].zwzw\n"
328 "DIV TEMP[3].xy, TEMP[3], CONST[3].zwzw\n"
329 "DIV TEMP[13].xy, TEMP[13], CONST[3].zwzw\n"
330
331 /* Weave offset */
332 "ADD TEMP[2].y, TEMP[2].yyyy, IMM[3].xxxx\n"
333 "ADD TEMP[12].y, TEMP[12].yyyy, -IMM[3].xxxx\n"
334 "ADD TEMP[3].y, TEMP[3].yyyy, IMM[3].xxxx\n"
335 "ADD TEMP[13].y, TEMP[13].yyyy, -IMM[3].xxxx\n"
336
337 /* Texture layer */
338 "MOV TEMP[14].x, TEMP[2].yyyy\n"
339 "MOV TEMP[14].yz, TEMP[3].yyyy\n"
340 "ROUND TEMP[15], TEMP[14]\n"
341 "ADD TEMP[14], TEMP[14], -TEMP[15]\n"
342 "MOV TEMP[14], |TEMP[14]|\n"
343 "MUL TEMP[14], TEMP[14], IMM[1].yyyy\n"
344
345 /* Normalize */
346 "DIV TEMP[2].xy, TEMP[2], CONST[5].zwzw\n"
347 "DIV TEMP[12].xy, TEMP[12], CONST[5].zwzw\n"
348 "DIV TEMP[15].xy, CONST[5].zwzw, IMM[1].yyyy\n"
349 "DIV TEMP[3].xy, TEMP[3], TEMP[15].xyxy\n"
350 "DIV TEMP[13].xy, TEMP[13], TEMP[15].xyxy\n"
351
352 /* Fetch texels */
353 "MOV TEMP[2].z, IMM[1].wwww\n"
354 "MOV TEMP[3].z, IMM[1].wwww\n"
355 "TEX_LZ TEMP[10].x, TEMP[2], SAMP[0], 2D_ARRAY\n"
356 "TEX_LZ TEMP[10].y, TEMP[3], SAMP[1], 2D_ARRAY\n"
357 "TEX_LZ TEMP[10].z, TEMP[3], SAMP[2], 2D_ARRAY\n"
358
359 "MOV TEMP[12].z, IMM[1].xxxx\n"
360 "MOV TEMP[13].z, IMM[1].xxxx\n"
361 "TEX_LZ TEMP[11].x, TEMP[12], SAMP[0], 2D_ARRAY\n"
362 "TEX_LZ TEMP[11].y, TEMP[13], SAMP[1], 2D_ARRAY\n"
363 "TEX_LZ TEMP[11].z, TEMP[13], SAMP[2], 2D_ARRAY\n"
364
365 "LRP TEMP[6], TEMP[14], TEMP[10], TEMP[11]\n"
366 "MOV TEMP[6].w, IMM[1].xxxx\n"
367
368 "STORE IMAGE[0], TEMP[0], TEMP[6], 2D\n"
369 "ENDIF\n"
370
371 "END\n";
372
373 static const char *compute_shader_yuv_weave_uv =
374 "COMP\n"
375 "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"
376 "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"
377 "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
378
379 "DCL SV[0], THREAD_ID\n"
380 "DCL SV[1], BLOCK_ID\n"
381
382 "DCL CONST[0..5]\n"
383 "DCL SVIEW[0..2], 2D_ARRAY, FLOAT\n"
384 "DCL SAMP[0..2]\n"
385
386 "DCL IMAGE[0], 2D, WR\n"
387 "DCL TEMP[0..15]\n"
388
389 "IMM[0] UINT32 { 8, 8, 1, 0}\n"
390 "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"
391 "IMM[2] UINT32 { 1, 2, 4, 0}\n"
392 "IMM[3] FLT32 { 0.25, 0.5, 0.125, 0.125}\n"
393
394 "UMAD TEMP[0], SV[1], IMM[0], SV[0]\n"
395
396 /* Drawn area check */
397 "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
398 "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"
399 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n"
400 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"
401 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"
402
403 "UIF TEMP[1]\n"
404 "MOV TEMP[2], TEMP[0]\n"
405 /* Translate */
406 "UADD TEMP[2].xy, TEMP[2], -CONST[5].xyxy\n"
407
408 /* Top Y */
409 "U2F TEMP[2], TEMP[2]\n"
410 "DIV TEMP[2].y, TEMP[2].yyyy, IMM[1].yyyy\n"
411 /* Down Y */
412 "MOV TEMP[12], TEMP[2]\n"
413
414 /* Top UV */
415 "MOV TEMP[3], TEMP[2]\n"
416 "DIV TEMP[3].xy, TEMP[3], IMM[1].yyyy\n"
417 /* Down UV */
418 "MOV TEMP[13], TEMP[3]\n"
419
420 /* Texture offset */
421 "ADD TEMP[2].x, TEMP[2].xxxx, IMM[3].yyyy\n"
422 "ADD TEMP[2].y, TEMP[2].yyyy, IMM[3].xxxx\n"
423 "ADD TEMP[12].x, TEMP[12].xxxx, IMM[3].yyyy\n"
424 "ADD TEMP[12].y, TEMP[12].yyyy, IMM[3].xxxx\n"
425
426 "ADD TEMP[3].x, TEMP[3].xxxx, IMM[3].xxxx\n"
427 "ADD TEMP[3].y, TEMP[3].yyyy, IMM[3].wwww\n"
428 "ADD TEMP[13].x, TEMP[13].xxxx, IMM[3].xxxx\n"
429 "ADD TEMP[13].y, TEMP[13].yyyy, IMM[3].wwww\n"
430
431 /* Scale */
432 "DIV TEMP[2].xy, TEMP[2], CONST[3].zwzw\n"
433 "DIV TEMP[12].xy, TEMP[12], CONST[3].zwzw\n"
434 "DIV TEMP[3].xy, TEMP[3], CONST[3].zwzw\n"
435 "DIV TEMP[13].xy, TEMP[13], CONST[3].zwzw\n"
436
437 /* Weave offset */
438 "ADD TEMP[2].y, TEMP[2].yyyy, IMM[3].xxxx\n"
439 "ADD TEMP[12].y, TEMP[12].yyyy, -IMM[3].xxxx\n"
440 "ADD TEMP[3].y, TEMP[3].yyyy, IMM[3].xxxx\n"
441 "ADD TEMP[13].y, TEMP[13].yyyy, -IMM[3].xxxx\n"
442
443 /* Texture layer */
444 "MOV TEMP[14].x, TEMP[2].yyyy\n"
445 "MOV TEMP[14].yz, TEMP[3].yyyy\n"
446 "ROUND TEMP[15], TEMP[14]\n"
447 "ADD TEMP[14], TEMP[14], -TEMP[15]\n"
448 "MOV TEMP[14], |TEMP[14]|\n"
449 "MUL TEMP[14], TEMP[14], IMM[1].yyyy\n"
450
451 /* Normalize */
452 "DIV TEMP[2].xy, TEMP[2], CONST[5].zwzw\n"
453 "DIV TEMP[12].xy, TEMP[12], CONST[5].zwzw\n"
454 "DIV TEMP[15].xy, CONST[5].zwzw, IMM[1].yyyy\n"
455 "DIV TEMP[3].xy, TEMP[3], TEMP[15].xyxy\n"
456 "DIV TEMP[13].xy, TEMP[13], TEMP[15].xyxy\n"
457
458 /* Fetch texels */
459 "MOV TEMP[2].z, IMM[1].wwww\n"
460 "MOV TEMP[3].z, IMM[1].wwww\n"
461 "TEX_LZ TEMP[10].x, TEMP[2], SAMP[0], 2D_ARRAY\n"
462 "TEX_LZ TEMP[10].y, TEMP[3], SAMP[1], 2D_ARRAY\n"
463 "TEX_LZ TEMP[10].z, TEMP[3], SAMP[2], 2D_ARRAY\n"
464
465 "MOV TEMP[12].z, IMM[1].xxxx\n"
466 "MOV TEMP[13].z, IMM[1].xxxx\n"
467 "TEX_LZ TEMP[11].x, TEMP[12], SAMP[0], 2D_ARRAY\n"
468 "TEX_LZ TEMP[11].y, TEMP[13], SAMP[1], 2D_ARRAY\n"
469 "TEX_LZ TEMP[11].z, TEMP[13], SAMP[2], 2D_ARRAY\n"
470
471 "LRP TEMP[6], TEMP[14], TEMP[10], TEMP[11]\n"
472 "MOV TEMP[6].w, IMM[1].xxxx\n"
473
474 "MOV TEMP[7].xy, TEMP[6].yzww\n"
475
476 "STORE IMAGE[0], TEMP[0], TEMP[7], 2D\n"
477 "ENDIF\n"
478
479 "END\n";
480
481 static const char *compute_shader_yuv_bob_y =
482 "COMP\n"
483 "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"
484 "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"
485 "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
486
487 "DCL SV[0], THREAD_ID\n"
488 "DCL SV[1], BLOCK_ID\n"
489
490 "DCL CONST[0..5]\n"
491 "DCL SVIEW[0..2], RECT, FLOAT\n"
492 "DCL SAMP[0..2]\n"
493
494 "DCL IMAGE[0], 2D, WR\n"
495 "DCL TEMP[0..4]\n"
496
497 "IMM[0] UINT32 { 8, 8, 1, 0}\n"
498 "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"
499
500 "UMAD TEMP[0], SV[1], IMM[0], SV[0]\n"
501
502 /* Drawn area check */
503 "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
504 "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"
505 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n"
506 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"
507 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"
508
509 "UIF TEMP[1]\n"
510 /* Translate */
511 "UADD TEMP[2].xy, TEMP[0], -CONST[5].xyxy\n"
512 "U2F TEMP[2], TEMP[2]\n"
513 "DIV TEMP[3], TEMP[2], IMM[1].yyyy\n"
514
515 /* Scale */
516 "DIV TEMP[2], TEMP[2], CONST[3].zwzw\n"
517 "DIV TEMP[2], TEMP[2], IMM[1].xyxy\n"
518 "DIV TEMP[3], TEMP[3], CONST[3].zwzw\n"
519 "DIV TEMP[3], TEMP[3], IMM[1].xyxy\n"
520
521 /* Fetch texels */
522 "TEX_LZ TEMP[4].x, TEMP[2], SAMP[0], RECT\n"
523 "TEX_LZ TEMP[4].y, TEMP[3], SAMP[1], RECT\n"
524 "TEX_LZ TEMP[4].z, TEMP[3], SAMP[2], RECT\n"
525
526 "MOV TEMP[4].w, IMM[1].xxxx\n"
527
528 "STORE IMAGE[0], TEMP[0], TEMP[4], 2D\n"
529 "ENDIF\n"
530
531 "END\n";
532
533 static const char *compute_shader_yuv_bob_uv =
534 "COMP\n"
535 "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"
536 "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"
537 "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
538
539 "DCL SV[0], THREAD_ID\n"
540 "DCL SV[1], BLOCK_ID\n"
541
542 "DCL CONST[0..5]\n"
543 "DCL SVIEW[0..2], RECT, FLOAT\n"
544 "DCL SAMP[0..2]\n"
545
546 "DCL IMAGE[0], 2D, WR\n"
547 "DCL TEMP[0..5]\n"
548
549 "IMM[0] UINT32 { 8, 8, 1, 0}\n"
550 "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"
551
552 "UMAD TEMP[0], SV[1], IMM[0], SV[0]\n"
553
554 /* Drawn area check */
555 "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
556 "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"
557 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n"
558 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"
559 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"
560
561 "UIF TEMP[1]\n"
562 /* Translate */
563 "UADD TEMP[2].xy, TEMP[0], -CONST[5].xyxy\n"
564 "U2F TEMP[2], TEMP[2]\n"
565 "DIV TEMP[3], TEMP[2], IMM[1].yyyy\n"
566
567 /* Scale */
568 "DIV TEMP[2], TEMP[2], CONST[3].zwzw\n"
569 "DIV TEMP[2], TEMP[2], IMM[1].xyxy\n"
570 "DIV TEMP[3], TEMP[3], CONST[3].zwzw\n"
571 "DIV TEMP[3], TEMP[3], IMM[1].xyxy\n"
572
573 /* Fetch texels */
574 "TEX_LZ TEMP[4].x, TEMP[2], SAMP[0], RECT\n"
575 "TEX_LZ TEMP[4].y, TEMP[3], SAMP[1], RECT\n"
576 "TEX_LZ TEMP[4].z, TEMP[3], SAMP[2], RECT\n"
577
578 "MOV TEMP[4].w, IMM[1].xxxx\n"
579
580 "MOV TEMP[5].xy, TEMP[4].yzww\n"
581
582 "STORE IMAGE[0], TEMP[0], TEMP[5], 2D\n"
583 "ENDIF\n"
584
585 "END\n";
586
587 static void
588 cs_launch(struct vl_compositor *c,
589 void *cs,
590 const struct u_rect *draw_area)
591 {
592 struct pipe_context *ctx = c->pipe;
593
594 /* Bind the image */
595 struct pipe_image_view image = {0};
596 image.resource = c->fb_state.cbufs[0]->texture;
597 image.shader_access = image.access = PIPE_IMAGE_ACCESS_READ_WRITE;
598 image.format = c->fb_state.cbufs[0]->texture->format;
599
600 ctx->set_shader_images(c->pipe, PIPE_SHADER_COMPUTE, 0, 1, &image);
601
602 /* Bind compute shader */
603 ctx->bind_compute_state(ctx, cs);
604
605 /* Dispatch compute */
606 struct pipe_grid_info info = {0};
607 info.block[0] = 8;
608 info.block[1] = 8;
609 info.block[2] = 1;
610 info.grid[0] = DIV_ROUND_UP(draw_area->x1, info.block[0]);
611 info.grid[1] = DIV_ROUND_UP(draw_area->y1, info.block[1]);
612 info.grid[2] = 1;
613
614 ctx->launch_grid(ctx, &info);
615
616 /* Make the result visible to all clients. */
617 ctx->memory_barrier(ctx, PIPE_BARRIER_ALL);
618
619 }
620
621 static inline struct u_rect
622 calc_drawn_area(struct vl_compositor_state *s,
623 struct vl_compositor_layer *layer)
624 {
625 struct vertex2f tl, br;
626 struct u_rect result;
627
628 assert(s && layer);
629
630 tl = layer->dst.tl;
631 br = layer->dst.br;
632
633 /* Scale */
634 result.x0 = tl.x * layer->viewport.scale[0] + layer->viewport.translate[0];
635 result.y0 = tl.y * layer->viewport.scale[1] + layer->viewport.translate[1];
636 result.x1 = br.x * layer->viewport.scale[0] + layer->viewport.translate[0];
637 result.y1 = br.y * layer->viewport.scale[1] + layer->viewport.translate[1];
638
639 /* Clip */
640 result.x0 = MAX2(result.x0, s->scissor.minx);
641 result.y0 = MAX2(result.y0, s->scissor.miny);
642 result.x1 = MIN2(result.x1, s->scissor.maxx);
643 result.y1 = MIN2(result.y1, s->scissor.maxy);
644 return result;
645 }
646
647 static bool
648 set_viewport(struct vl_compositor_state *s,
649 struct cs_viewport *drawn,
650 struct pipe_sampler_view **samplers)
651 {
652 struct pipe_transfer *buf_transfer;
653
654 assert(s && drawn);
655
656 void *ptr = pipe_buffer_map(s->pipe, s->shader_params,
657 PIPE_TRANSFER_READ | PIPE_TRANSFER_WRITE,
658 &buf_transfer);
659
660 if (!ptr)
661 return false;
662
663 float *ptr_float = (float *)ptr;
664 ptr_float += sizeof(vl_csc_matrix)/sizeof(float) + 2;
665 *ptr_float++ = drawn->scale_x;
666 *ptr_float++ = drawn->scale_y;
667
668 int *ptr_int = (int *)ptr_float;
669 *ptr_int++ = drawn->area.x0;
670 *ptr_int++ = drawn->area.y0;
671 *ptr_int++ = drawn->area.x1;
672 *ptr_int++ = drawn->area.y1;
673 *ptr_int++ = drawn->translate_x;
674 *ptr_int++ = drawn->translate_y;
675
676 ptr_float = (float *)ptr_int;
677 *ptr_float++ = drawn->sampler0_w;
678 *ptr_float++ = drawn->sampler0_h;
679
680 /* compute_shader_video_buffer uses pixel coordinates based on the
681 * Y sampler dimensions. If U/V are using separate planes and are
682 * subsampled, we need to scale the coordinates */
683 if (samplers[1]) {
684 float h_ratio = samplers[1]->texture->width0 /
685 (float) samplers[0]->texture->width0;
686 *ptr_float++ = h_ratio;
687 float v_ratio = samplers[1]->texture->height0 /
688 (float) samplers[0]->texture->height0;
689 *ptr_float++ = v_ratio;
690 }
691 pipe_buffer_unmap(s->pipe, buf_transfer);
692
693 return true;
694 }
695
696 static void
697 draw_layers(struct vl_compositor *c,
698 struct vl_compositor_state *s,
699 struct u_rect *dirty)
700 {
701 unsigned i;
702
703 assert(c);
704
705 for (i = 0; i < VL_COMPOSITOR_MAX_LAYERS; ++i) {
706 if (s->used_layers & (1 << i)) {
707 struct vl_compositor_layer *layer = &s->layers[i];
708 struct pipe_sampler_view **samplers = &layer->sampler_views[0];
709 unsigned num_sampler_views = !samplers[1] ? 1 : !samplers[2] ? 2 : 3;
710 struct cs_viewport drawn;
711
712 drawn.area = calc_drawn_area(s, layer);
713 drawn.scale_x = layer->viewport.scale[0] /
714 (float)layer->sampler_views[0]->texture->width0;
715 drawn.scale_y = drawn.scale_x;
716 drawn.translate_x = (int)layer->viewport.translate[0];
717 drawn.translate_y = (int)layer->viewport.translate[1];
718 drawn.sampler0_w = (float)layer->sampler_views[0]->texture->width0;
719 drawn.sampler0_h = (float)layer->sampler_views[0]->texture->height0;
720 set_viewport(s, &drawn, samplers);
721
722 c->pipe->bind_sampler_states(c->pipe, PIPE_SHADER_COMPUTE, 0,
723 num_sampler_views, layer->samplers);
724 c->pipe->set_sampler_views(c->pipe, PIPE_SHADER_COMPUTE, 0,
725 num_sampler_views, samplers);
726
727 cs_launch(c, layer->cs, &(drawn.area));
728
729 /* Unbind. */
730 c->pipe->set_shader_images(c->pipe, PIPE_SHADER_COMPUTE, 0, 1, NULL);
731 c->pipe->set_constant_buffer(c->pipe, PIPE_SHADER_COMPUTE, 0, NULL);
732 c->pipe->set_sampler_views(c->pipe, PIPE_SHADER_FRAGMENT, 0,
733 num_sampler_views, NULL);
734 c->pipe->bind_compute_state(c->pipe, NULL);
735 c->pipe->bind_sampler_states(c->pipe, PIPE_SHADER_COMPUTE, 0,
736 num_sampler_views, NULL);
737
738 if (dirty) {
739 struct u_rect drawn = calc_drawn_area(s, layer);
740 dirty->x0 = MIN2(drawn.x0, dirty->x0);
741 dirty->y0 = MIN2(drawn.y0, dirty->y0);
742 dirty->x1 = MAX2(drawn.x1, dirty->x1);
743 dirty->y1 = MAX2(drawn.y1, dirty->y1);
744 }
745 }
746 }
747 }
748
749 void *
750 vl_compositor_cs_create_shader(struct vl_compositor *c,
751 const char *compute_shader_text)
752 {
753 assert(c && compute_shader_text);
754
755 struct tgsi_token tokens[1024];
756 if (!tgsi_text_translate(compute_shader_text, tokens, ARRAY_SIZE(tokens))) {
757 assert(0);
758 return NULL;
759 }
760
761 struct pipe_compute_state state = {0};
762 state.ir_type = PIPE_SHADER_IR_TGSI;
763 state.prog = tokens;
764
765 /* create compute shader */
766 return c->pipe->create_compute_state(c->pipe, &state);
767 }
768
769 void
770 vl_compositor_cs_render(struct vl_compositor_state *s,
771 struct vl_compositor *c,
772 struct pipe_surface *dst_surface,
773 struct u_rect *dirty_area,
774 bool clear_dirty)
775 {
776 assert(c && s);
777 assert(dst_surface);
778
779 c->fb_state.width = dst_surface->width;
780 c->fb_state.height = dst_surface->height;
781 c->fb_state.cbufs[0] = dst_surface;
782
783 if (!s->scissor_valid) {
784 s->scissor.minx = 0;
785 s->scissor.miny = 0;
786 s->scissor.maxx = dst_surface->width;
787 s->scissor.maxy = dst_surface->height;
788 }
789
790 if (clear_dirty && dirty_area &&
791 (dirty_area->x0 < dirty_area->x1 || dirty_area->y0 < dirty_area->y1)) {
792
793 c->pipe->clear_render_target(c->pipe, dst_surface, &s->clear_color,
794 0, 0, dst_surface->width, dst_surface->height, false);
795 dirty_area->x0 = dirty_area->y0 = VL_COMPOSITOR_MAX_DIRTY;
796 dirty_area->x1 = dirty_area->y1 = VL_COMPOSITOR_MIN_DIRTY;
797 }
798
799 pipe_set_constant_buffer(c->pipe, PIPE_SHADER_COMPUTE, 0, s->shader_params);
800
801 draw_layers(c, s, dirty_area);
802 }
803
804 bool vl_compositor_cs_init_shaders(struct vl_compositor *c)
805 {
806 assert(c);
807
808 c->cs_video_buffer = vl_compositor_cs_create_shader(c, compute_shader_video_buffer);
809 if (!c->cs_video_buffer) {
810 debug_printf("Unable to create video_buffer compute shader.\n");
811 return false;
812 }
813
814 c->cs_weave_rgb = vl_compositor_cs_create_shader(c, compute_shader_weave);
815 if (!c->cs_weave_rgb) {
816 debug_printf("Unable to create weave_rgb compute shader.\n");
817 return false;
818 }
819
820 c->cs_yuv.weave.y = vl_compositor_cs_create_shader(c, compute_shader_yuv_weave_y);
821 c->cs_yuv.weave.uv = vl_compositor_cs_create_shader(c, compute_shader_yuv_weave_uv);
822 c->cs_yuv.bob.y = vl_compositor_cs_create_shader(c, compute_shader_yuv_bob_y);
823 c->cs_yuv.bob.uv = vl_compositor_cs_create_shader(c, compute_shader_yuv_bob_uv);
824 if (!c->cs_yuv.weave.y || !c->cs_yuv.weave.uv ||
825 !c->cs_yuv.bob.y || !c->cs_yuv.bob.uv) {
826 debug_printf("Unable to create YCbCr i-to-YCbCr p deint compute shader.\n");
827 return false;
828 }
829
830 return true;
831 }
832
833 void vl_compositor_cs_cleanup_shaders(struct vl_compositor *c)
834 {
835 assert(c);
836
837 if (c->cs_video_buffer)
838 c->pipe->delete_compute_state(c->pipe, c->cs_video_buffer);
839 if (c->cs_weave_rgb)
840 c->pipe->delete_compute_state(c->pipe, c->cs_weave_rgb);
841 if (c->cs_yuv.weave.y)
842 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.weave.y);
843 if (c->cs_yuv.weave.uv)
844 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.weave.uv);
845 if (c->cs_yuv.bob.y)
846 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.bob.y);
847 if (c->cs_yuv.bob.uv)
848 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.bob.uv);
849 }