gallium/auxiliary/vl: Add compute shader to support video compositor render
[mesa.git] / src / gallium / auxiliary / vl / vl_compositor_cs.c
1 /**************************************************************************
2 *
3 * Copyright 2019 Advanced Micro Devices, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 * Authors: James Zhu <james.zhu<@amd.com>
27 *
28 **************************************************************************/
29
30 #include <assert.h>
31
32 #include "tgsi/tgsi_text.h"
33 #include "vl_compositor_cs.h"
34
35 struct cs_viewport {
36 float scale_x;
37 float scale_y;
38 struct u_rect area;
39 int translate_x;
40 int translate_y;
41 };
42
43 char *compute_shader_video_buffer =
44 "COMP\n"
45 "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"
46 "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"
47 "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
48
49 "DCL SV[0], THREAD_ID\n"
50 "DCL SV[1], BLOCK_ID\n"
51
52 "DCL CONST[0..5]\n"
53 "DCL SVIEW[0..2], RECT, FLOAT\n"
54 "DCL SAMP[0..2]\n"
55
56 "DCL IMAGE[0], 2D, WR\n"
57 "DCL TEMP[0..7]\n"
58
59 "IMM[0] UINT32 { 8, 8, 1, 0}\n"
60 "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"
61
62 "UMAD TEMP[0], SV[1], IMM[0], SV[0]\n"
63
64 /* Drawn area check */
65 "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
66 "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"
67 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n"
68 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"
69 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"
70
71 "UIF TEMP[1]\n"
72 /* Translate */
73 "UADD TEMP[2].xy, TEMP[0], -CONST[5].xyxy\n"
74 "U2F TEMP[2], TEMP[2]\n"
75 "DIV TEMP[3], TEMP[2], IMM[1].yyyy\n"
76
77 /* Scale */
78 "DIV TEMP[2], TEMP[2], CONST[3].zwzw\n"
79 "DIV TEMP[3], TEMP[3], CONST[3].zwzw\n"
80
81 /* Fetch texels */
82 "TEX_LZ TEMP[4].x, TEMP[2], SAMP[0], RECT\n"
83 "TEX_LZ TEMP[4].y, TEMP[3], SAMP[1], RECT\n"
84 "TEX_LZ TEMP[4].z, TEMP[3], SAMP[2], RECT\n"
85
86 "MOV TEMP[4].w, IMM[1].xxxx\n"
87
88 /* Color Space Conversion */
89 "DP4 TEMP[7].x, CONST[0], TEMP[4]\n"
90 "DP4 TEMP[7].y, CONST[1], TEMP[4]\n"
91 "DP4 TEMP[7].z, CONST[2], TEMP[4]\n"
92
93 "MOV TEMP[5].w, TEMP[4].zzzz\n"
94 "SLE TEMP[6].w, TEMP[5], CONST[3].xxxx\n"
95 "SGT TEMP[5].w, TEMP[5], CONST[3].yyyy\n"
96
97 "MAX TEMP[7].w, TEMP[5], TEMP[6]\n"
98
99 "STORE IMAGE[0], TEMP[0], TEMP[7], 2D\n"
100 "ENDIF\n"
101
102 "END\n";
103
104 char *compute_shader_weave =
105 "COMP\n"
106 "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"
107 "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"
108 "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
109
110 "DCL SV[0], THREAD_ID\n"
111 "DCL SV[1], BLOCK_ID\n"
112
113 "DCL CONST[0..5]\n"
114 "DCL SVIEW[0..2], RECT, FLOAT\n"
115 "DCL SAMP[0..2]\n"
116
117 "DCL IMAGE[0], 2D, WR\n"
118 "DCL TEMP[0..9]\n"
119
120 "IMM[0] UINT32 { 8, 8, 1, 0}\n"
121 "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"
122 "IMM[2] UINT32 { 1, 2, 4, 0}\n"
123
124 "UMAD TEMP[0], SV[1], IMM[0], SV[0]\n"
125
126 /* Drawn area check */
127 "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
128 "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"
129 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n"
130 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"
131 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"
132
133 "UIF TEMP[1]\n"
134 "MOV TEMP[2], TEMP[0]\n"
135 /* Translate */
136 "UADD TEMP[2].xy, TEMP[2], -CONST[5].xyxy\n"
137
138 /* Texture layer */
139 "UMOD TEMP[2].z, TEMP[2].yyyy, IMM[2].yyyy\n"
140 "UMOD TEMP[3].z, TEMP[2].yyyy, IMM[2].zzzz\n"
141 "USHR TEMP[3].z, TEMP[3].zzzz, IMM[2].xxxx\n"
142
143 "USHR TEMP[2].y, TEMP[2], IMM[2].xxxx\n"
144 "USHR TEMP[3].xy, TEMP[2], IMM[2].xxxx\n"
145
146 "U2F TEMP[4], TEMP[2]\n"
147 "U2F TEMP[5], TEMP[3]\n"
148
149 /* Scale */
150 "DIV TEMP[4], TEMP[4], CONST[3].zwzw\n"
151 "DIV TEMP[5], TEMP[5], CONST[3].zwzw\n"
152
153 /* Fetch texels */
154 "TEX_LZ TEMP[6].x, TEMP[4], SAMP[0], RECT\n"
155 "TEX_LZ TEMP[6].y, TEMP[5], SAMP[1], RECT\n"
156 "TEX_LZ TEMP[6].z, TEMP[5], SAMP[2], RECT\n"
157
158 "MOV TEMP[6].w, IMM[1].xxxx\n"
159
160 /* Color Space Conversion */
161 "DP4 TEMP[9].x, CONST[0], TEMP[6]\n"
162 "DP4 TEMP[9].y, CONST[1], TEMP[6]\n"
163 "DP4 TEMP[9].z, CONST[2], TEMP[6]\n"
164
165 "MOV TEMP[7].w, TEMP[6].zzzz\n"
166 "SLE TEMP[8].w, TEMP[7], CONST[3].xxxx\n"
167 "SGT TEMP[7].w, TEMP[7], CONST[3].yyyy\n"
168
169 "MAX TEMP[9].w, TEMP[7], TEMP[8]\n"
170
171 "STORE IMAGE[0], TEMP[0], TEMP[9], 2D\n"
172 "ENDIF\n"
173
174 "END\n";
175
176 char *compute_shader_rgba =
177 "COMP\n"
178 "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"
179 "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"
180 "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
181
182 "DCL SV[0], THREAD_ID\n"
183 "DCL SV[1], BLOCK_ID\n"
184
185 "DCL CONST[0..5]\n"
186 "DCL SVIEW[0], RECT, FLOAT\n"
187 "DCL SAMP[0]\n"
188
189 "DCL IMAGE[0], 2D, WR\n"
190 "DCL TEMP[0..3]\n"
191
192 "IMM[0] UINT32 { 8, 8, 1, 0}\n"
193 "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"
194
195 "UMAD TEMP[0], SV[1], IMM[0], SV[0]\n"
196
197 /* Drawn area check */
198 "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
199 "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"
200 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n"
201 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"
202 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"
203
204 "UIF TEMP[1]\n"
205 /* Translate */
206 "UADD TEMP[2].xy, TEMP[0], -CONST[5].xyxy\n"
207 "U2F TEMP[2], TEMP[2]\n"
208
209 /* Scale */
210 "DIV TEMP[2], TEMP[2], CONST[3].zwzw\n"
211
212 /* Fetch texels */
213 "TEX_LZ TEMP[3], TEMP[2], SAMP[0], RECT\n"
214
215 "STORE IMAGE[0], TEMP[0], TEMP[3], 2D\n"
216 "ENDIF\n"
217
218 "END\n";
219
220 static void
221 cs_launch(struct vl_compositor *c,
222 void *cs)
223 {
224 struct pipe_context *ctx = c->pipe;
225
226 /* Bind the image */
227 struct pipe_image_view image = {};
228 image.resource = c->fb_state.cbufs[0]->texture;
229 image.shader_access = image.access = PIPE_IMAGE_ACCESS_READ_WRITE;
230 image.format = c->fb_state.cbufs[0]->texture->format;
231
232 ctx->set_shader_images(c->pipe, PIPE_SHADER_COMPUTE, 0, 1, &image);
233
234 /* Bind compute shader */
235 ctx->bind_compute_state(ctx, cs);
236
237 /* Dispatch compute */
238 struct pipe_grid_info info = {};
239 info.block[0] = 8;
240 info.block[1] = 8;
241 info.block[2] = 1;
242 info.grid[0] = DIV_ROUND_UP(c->fb_state.width, info.block[0]);
243 info.grid[1] = DIV_ROUND_UP(c->fb_state.height, info.block[1]);
244 info.grid[2] = 1;
245
246 ctx->launch_grid(ctx, &info);
247 }
248
249 static inline struct u_rect
250 calc_drawn_area(struct vl_compositor_state *s,
251 struct vl_compositor_layer *layer)
252 {
253 struct vertex2f tl, br;
254 struct u_rect result;
255
256 assert(s && layer);
257
258 tl = layer->dst.tl;
259 br = layer->dst.br;
260
261 /* Scale */
262 result.x0 = tl.x * layer->viewport.scale[0] + layer->viewport.translate[0];
263 result.y0 = tl.y * layer->viewport.scale[1] + layer->viewport.translate[1];
264 result.x1 = br.x * layer->viewport.scale[0] + layer->viewport.translate[0];
265 result.y1 = br.y * layer->viewport.scale[1] + layer->viewport.translate[1];
266
267 /* Clip */
268 result.x0 = MAX2(result.x0, s->scissor.minx);
269 result.y0 = MAX2(result.y0, s->scissor.miny);
270 result.x1 = MIN2(result.x1, s->scissor.maxx);
271 result.y1 = MIN2(result.y1, s->scissor.maxy);
272 return result;
273 }
274
275 static bool
276 set_viewport(struct vl_compositor_state *s,
277 struct cs_viewport *drawn)
278 {
279 struct pipe_transfer *buf_transfer;
280
281 assert(s && drawn);
282
283 void *ptr = pipe_buffer_map(s->pipe, s->shader_params,
284 PIPE_TRANSFER_READ | PIPE_TRANSFER_WRITE,
285 &buf_transfer);
286
287 if (!ptr)
288 return false;
289
290 float *ptr_float = (float *)ptr;
291 ptr_float += sizeof(vl_csc_matrix)/sizeof(float) + 2;
292 *ptr_float++ = drawn->scale_x;
293 *ptr_float++ = drawn->scale_y;
294
295 int *ptr_int = (int *)ptr_float;
296 *ptr_int++ = drawn->area.x0;
297 *ptr_int++ = drawn->area.y0;
298 *ptr_int++ = drawn->area.x1;
299 *ptr_int++ = drawn->area.y1;
300 *ptr_int++ = drawn->translate_x;
301 *ptr_int = drawn->translate_y;
302
303 pipe_buffer_unmap(s->pipe, buf_transfer);
304
305 return true;
306 }
307
308 static void
309 draw_layers(struct vl_compositor *c,
310 struct vl_compositor_state *s,
311 struct u_rect *dirty)
312 {
313 unsigned i;
314 static struct cs_viewport old_drawn;
315
316 assert(c);
317
318 for (i = 0; i < VL_COMPOSITOR_MAX_LAYERS; ++i) {
319 if (s->used_layers & (1 << i)) {
320 struct vl_compositor_layer *layer = &s->layers[i];
321 struct pipe_sampler_view **samplers = &layer->sampler_views[0];
322 unsigned num_sampler_views = !samplers[1] ? 1 : !samplers[2] ? 2 : 3;
323 struct cs_viewport drawn;
324
325 drawn.area = calc_drawn_area(s, layer);
326 drawn.scale_x = layer->viewport.scale[0] /
327 (float)layer->sampler_views[0]->texture->width0;
328 drawn.scale_y = drawn.scale_x;
329 drawn.translate_x = (int)layer->viewport.translate[0];
330 drawn.translate_y = (int)layer->viewport.translate[1];
331
332 if (memcmp(&drawn, &old_drawn, sizeof(struct cs_viewport))) {
333 set_viewport(s, &drawn);
334 old_drawn = drawn;
335 }
336
337 c->pipe->bind_sampler_states(c->pipe, PIPE_SHADER_COMPUTE, 0,
338 num_sampler_views, layer->samplers);
339 c->pipe->set_sampler_views(c->pipe, PIPE_SHADER_COMPUTE, 0,
340 num_sampler_views, samplers);
341
342 cs_launch(c, layer->cs);
343
344 if (dirty) {
345 struct u_rect drawn = calc_drawn_area(s, layer);
346 dirty->x0 = MIN2(drawn.x0, dirty->x0);
347 dirty->y0 = MIN2(drawn.y0, dirty->y0);
348 dirty->x1 = MAX2(drawn.x1, dirty->x1);
349 dirty->y1 = MAX2(drawn.y1, dirty->y1);
350 }
351 }
352 }
353 }
354
355 void *
356 vl_compositor_cs_create_shader(struct vl_compositor *c,
357 const char *compute_shader_text)
358 {
359 assert(c && compute_shader_text);
360
361 struct tgsi_token tokens[1024];
362 if (!tgsi_text_translate(compute_shader_text, tokens, ARRAY_SIZE(tokens))) {
363 assert(0);
364 return NULL;
365 }
366
367 struct pipe_compute_state state = {};
368 state.ir_type = PIPE_SHADER_IR_TGSI;
369 state.prog = tokens;
370
371 /* create compute shader */
372 return c->pipe->create_compute_state(c->pipe, &state);
373 }
374
375 void
376 vl_compositor_cs_render(struct vl_compositor_state *s,
377 struct vl_compositor *c,
378 struct pipe_surface *dst_surface,
379 struct u_rect *dirty_area,
380 bool clear_dirty)
381 {
382 assert(c && s);
383 assert(dst_surface);
384
385 c->fb_state.width = dst_surface->width;
386 c->fb_state.height = dst_surface->height;
387 c->fb_state.cbufs[0] = dst_surface;
388
389 if (!s->scissor_valid) {
390 s->scissor.minx = 0;
391 s->scissor.miny = 0;
392 s->scissor.maxx = dst_surface->width;
393 s->scissor.maxy = dst_surface->height;
394 }
395
396 if (clear_dirty && dirty_area &&
397 (dirty_area->x0 < dirty_area->x1 || dirty_area->y0 < dirty_area->y1)) {
398
399 c->pipe->clear_render_target(c->pipe, dst_surface, &s->clear_color,
400 0, 0, dst_surface->width, dst_surface->height, false);
401 dirty_area->x0 = dirty_area->y0 = VL_COMPOSITOR_MAX_DIRTY;
402 dirty_area->x1 = dirty_area->y1 = VL_COMPOSITOR_MIN_DIRTY;
403 }
404
405 pipe_set_constant_buffer(c->pipe, PIPE_SHADER_COMPUTE, 0, s->shader_params);
406
407 draw_layers(c, s, dirty_area);
408 }