st/xorg: add yuv shaders
[mesa.git] / src / gallium / state_trackers / xorg / xorg_exa_tgsi.c
1 #include "xorg_exa_tgsi.h"
2
3 /*### stupidity defined in X11/extensions/XI.h */
4 #undef Absolute
5
6 #include "pipe/p_format.h"
7 #include "pipe/p_context.h"
8 #include "pipe/p_state.h"
9 #include "pipe/p_inlines.h"
10 #include "pipe/p_shader_tokens.h"
11
12 #include "util/u_memory.h"
13 #include "util/u_simple_shaders.h"
14
15 #include "tgsi/tgsi_ureg.h"
16
17 #include "cso_cache/cso_context.h"
18 #include "cso_cache/cso_hash.h"
19
20 /* Vertex shader:
21 * IN[0] = vertex pos
22 * IN[1] = src tex coord | solid fill color
23 * IN[2] = mask tex coord
24 * IN[3] = dst tex coord
25 * CONST[0] = (2/dst_width, 2/dst_height, 1, 1)
26 * CONST[1] = (-1, -1, 0, 0)
27 *
28 * OUT[0] = vertex pos
29 * OUT[1] = src tex coord | solid fill color
30 * OUT[2] = mask tex coord
31 * OUT[3] = dst tex coord
32 */
33
34 /* Fragment shader:
35 * SAMP[0] = src
36 * SAMP[1] = mask
37 * SAMP[2] = dst
38 * IN[0] = pos src | solid fill color
39 * IN[1] = pos mask
40 * IN[2] = pos dst
41 * CONST[0] = (0, 0, 0, 1)
42 *
43 * OUT[0] = color
44 */
45
46 struct xorg_shaders {
47 struct xorg_renderer *r;
48
49 struct cso_hash *vs_hash;
50 struct cso_hash *fs_hash;
51 };
52
53 static INLINE void
54 src_in_mask(struct ureg_program *ureg,
55 struct ureg_dst dst,
56 struct ureg_src src,
57 struct ureg_src mask,
58 int component_alpha)
59 {
60 if (component_alpha == FS_CA_FULL) {
61 ureg_MUL(ureg, dst, src, mask);
62 } else if (component_alpha == FS_CA_SRCALPHA) {
63 ureg_MUL(ureg, dst,
64 ureg_scalar(src, TGSI_SWIZZLE_W), mask);
65 }
66 else {
67 ureg_MUL(ureg, dst, src,
68 ureg_scalar(mask, TGSI_SWIZZLE_X));
69 }
70 }
71
72 static struct ureg_src
73 vs_normalize_coords(struct ureg_program *ureg, struct ureg_src coords,
74 struct ureg_src const0, struct ureg_src const1)
75 {
76 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
77 struct ureg_src ret;
78 ureg_MAD(ureg, tmp, coords, const0, const1);
79 ret = ureg_src(tmp);
80 ureg_release_temporary(ureg, tmp);
81 return ret;
82 }
83
84 static void
85 linear_gradient(struct ureg_program *ureg,
86 struct ureg_dst out,
87 struct ureg_src pos,
88 struct ureg_src sampler,
89 struct ureg_src coords,
90 struct ureg_src const0124,
91 struct ureg_src matrow0,
92 struct ureg_src matrow1,
93 struct ureg_src matrow2)
94 {
95 struct ureg_dst temp0 = ureg_DECL_temporary(ureg);
96 struct ureg_dst temp1 = ureg_DECL_temporary(ureg);
97 struct ureg_dst temp2 = ureg_DECL_temporary(ureg);
98 struct ureg_dst temp3 = ureg_DECL_temporary(ureg);
99 struct ureg_dst temp4 = ureg_DECL_temporary(ureg);
100 struct ureg_dst temp5 = ureg_DECL_temporary(ureg);
101
102 ureg_MOV(ureg,
103 ureg_writemask(temp0, TGSI_WRITEMASK_XY), pos);
104 ureg_MOV(ureg,
105 ureg_writemask(temp0, TGSI_WRITEMASK_Z),
106 ureg_scalar(const0124, TGSI_SWIZZLE_Y));
107
108 ureg_DP3(ureg, temp1, matrow0, ureg_src(temp0));
109 ureg_DP3(ureg, temp2, matrow1, ureg_src(temp0));
110 ureg_DP3(ureg, temp3, matrow2, ureg_src(temp0));
111 ureg_RCP(ureg, temp3, ureg_src(temp3));
112 ureg_MUL(ureg, temp1, ureg_src(temp1), ureg_src(temp3));
113 ureg_MUL(ureg, temp2, ureg_src(temp2), ureg_src(temp3));
114
115 ureg_MOV(ureg, ureg_writemask(temp4, TGSI_WRITEMASK_X),
116 ureg_src(temp1));
117 ureg_MOV(ureg, ureg_writemask(temp4, TGSI_WRITEMASK_Y),
118 ureg_src(temp2));
119
120 ureg_MUL(ureg, temp0,
121 ureg_scalar(coords, TGSI_SWIZZLE_Y),
122 ureg_scalar(ureg_src(temp4), TGSI_SWIZZLE_Y));
123 ureg_MAD(ureg, temp1,
124 ureg_scalar(coords, TGSI_SWIZZLE_X),
125 ureg_scalar(ureg_src(temp4), TGSI_SWIZZLE_X),
126 ureg_src(temp0));
127
128 ureg_MUL(ureg, temp2,
129 ureg_src(temp1),
130 ureg_scalar(coords, TGSI_SWIZZLE_Z));
131
132 ureg_TEX(ureg, out,
133 TGSI_TEXTURE_1D, ureg_src(temp2), sampler);
134
135 ureg_release_temporary(ureg, temp0);
136 ureg_release_temporary(ureg, temp1);
137 ureg_release_temporary(ureg, temp2);
138 ureg_release_temporary(ureg, temp3);
139 ureg_release_temporary(ureg, temp4);
140 ureg_release_temporary(ureg, temp5);
141 }
142
143
144 static void
145 radial_gradient(struct ureg_program *ureg,
146 struct ureg_dst out,
147 struct ureg_src pos,
148 struct ureg_src sampler,
149 struct ureg_src coords,
150 struct ureg_src const0124,
151 struct ureg_src matrow0,
152 struct ureg_src matrow1,
153 struct ureg_src matrow2)
154 {
155 struct ureg_dst temp0 = ureg_DECL_temporary(ureg);
156 struct ureg_dst temp1 = ureg_DECL_temporary(ureg);
157 struct ureg_dst temp2 = ureg_DECL_temporary(ureg);
158 struct ureg_dst temp3 = ureg_DECL_temporary(ureg);
159 struct ureg_dst temp4 = ureg_DECL_temporary(ureg);
160 struct ureg_dst temp5 = ureg_DECL_temporary(ureg);
161
162 ureg_MOV(ureg,
163 ureg_writemask(temp0, TGSI_WRITEMASK_XY),
164 pos);
165 ureg_MOV(ureg,
166 ureg_writemask(temp0, TGSI_WRITEMASK_Z),
167 ureg_scalar(const0124, TGSI_SWIZZLE_Y));
168
169 ureg_DP3(ureg, temp1, matrow0, ureg_src(temp0));
170 ureg_DP3(ureg, temp2, matrow1, ureg_src(temp0));
171 ureg_DP3(ureg, temp3, matrow2, ureg_src(temp0));
172 ureg_RCP(ureg, temp3, ureg_src(temp3));
173 ureg_MUL(ureg, temp1, ureg_src(temp1), ureg_src(temp3));
174 ureg_MUL(ureg, temp2, ureg_src(temp2), ureg_src(temp3));
175
176 ureg_MOV(ureg, ureg_writemask(temp5, TGSI_WRITEMASK_X),
177 ureg_src(temp1));
178 ureg_MOV(ureg, ureg_writemask(temp5, TGSI_WRITEMASK_Y),
179 ureg_src(temp2));
180
181 ureg_MUL(ureg, temp0, ureg_scalar(coords, TGSI_SWIZZLE_Y),
182 ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_Y));
183 ureg_MAD(ureg, temp1,
184 ureg_scalar(coords, TGSI_SWIZZLE_X),
185 ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_X),
186 ureg_src(temp0));
187 ureg_ADD(ureg, temp1,
188 ureg_src(temp1), ureg_src(temp1));
189 ureg_MUL(ureg, temp3,
190 ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_Y),
191 ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_Y));
192 ureg_MAD(ureg, temp4,
193 ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_X),
194 ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_X),
195 ureg_src(temp3));
196 ureg_MOV(ureg, temp4, ureg_negate(ureg_src(temp4)));
197 ureg_MUL(ureg, temp2,
198 ureg_scalar(coords, TGSI_SWIZZLE_Z),
199 ureg_src(temp4));
200 ureg_MUL(ureg, temp0,
201 ureg_scalar(const0124, TGSI_SWIZZLE_W),
202 ureg_src(temp2));
203 ureg_MUL(ureg, temp3,
204 ureg_src(temp1), ureg_src(temp1));
205 ureg_SUB(ureg, temp2,
206 ureg_src(temp3), ureg_src(temp0));
207 ureg_RSQ(ureg, temp2, ureg_abs(ureg_src(temp2)));
208 ureg_RCP(ureg, temp2, ureg_src(temp2));
209 ureg_SUB(ureg, temp1,
210 ureg_src(temp2), ureg_src(temp1));
211 ureg_ADD(ureg, temp0,
212 ureg_scalar(coords, TGSI_SWIZZLE_Z),
213 ureg_scalar(coords, TGSI_SWIZZLE_Z));
214 ureg_RCP(ureg, temp0, ureg_src(temp0));
215 ureg_MUL(ureg, temp2,
216 ureg_src(temp1), ureg_src(temp0));
217 ureg_TEX(ureg, out, TGSI_TEXTURE_1D,
218 ureg_src(temp2), sampler);
219
220 ureg_release_temporary(ureg, temp0);
221 ureg_release_temporary(ureg, temp1);
222 ureg_release_temporary(ureg, temp2);
223 ureg_release_temporary(ureg, temp3);
224 ureg_release_temporary(ureg, temp4);
225 ureg_release_temporary(ureg, temp5);
226 }
227
228 static void *
229 create_vs(struct pipe_context *pipe,
230 unsigned vs_traits)
231 {
232 struct ureg_program *ureg;
233 struct ureg_src src;
234 struct ureg_dst dst;
235 struct ureg_src const0, const1;
236 boolean is_fill = vs_traits & VS_FILL;
237 boolean is_composite = vs_traits & VS_COMPOSITE;
238 boolean has_mask = vs_traits & VS_MASK;
239 unsigned input_slot = 0;
240
241 ureg = ureg_create(TGSI_PROCESSOR_VERTEX);
242 if (ureg == NULL)
243 return 0;
244
245 const0 = ureg_DECL_constant(ureg, 0);
246 const1 = ureg_DECL_constant(ureg, 1);
247
248 /* it has to be either a fill or a composite op */
249 debug_assert(is_fill ^ is_composite);
250
251 src = ureg_DECL_vs_input(ureg, input_slot++);
252 dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0);
253 src = vs_normalize_coords(ureg, src,
254 const0, const1);
255 ureg_MOV(ureg, dst, src);
256
257 if (is_composite) {
258 src = ureg_DECL_vs_input(ureg, input_slot++);
259 dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 0);
260 ureg_MOV(ureg, dst, src);
261 }
262
263 if (is_fill) {
264 src = ureg_DECL_vs_input(ureg, input_slot++);
265 dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
266 ureg_MOV(ureg, dst, src);
267 }
268
269 if (has_mask) {
270 src = ureg_DECL_vs_input(ureg, input_slot++);
271 dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 1);
272 ureg_MOV(ureg, dst, src);
273 }
274
275 ureg_END(ureg);
276
277 return ureg_create_shader_and_destroy(ureg, pipe);
278 }
279
280 static void *
281 create_yuv_shader(struct pipe_context *pipe, struct ureg_program *ureg)
282 {
283 struct ureg_src y_sampler, u_sampler, v_sampler;
284 struct ureg_src pos;
285 struct ureg_src matrow0, matrow1, matrow2;
286 struct ureg_dst y, u, v, rgb;
287 struct ureg_dst out = ureg_DECL_output(ureg,
288 TGSI_SEMANTIC_COLOR,
289 0);
290
291 pos = ureg_DECL_fs_input(ureg,
292 TGSI_SEMANTIC_GENERIC,
293 0,
294 TGSI_INTERPOLATE_PERSPECTIVE);
295
296 rgb = ureg_DECL_temporary(ureg);
297 y = ureg_DECL_temporary(ureg);
298 u = ureg_DECL_temporary(ureg);
299 v = ureg_DECL_temporary(ureg);
300
301 y_sampler = ureg_DECL_sampler(ureg, 0);
302 u_sampler = ureg_DECL_sampler(ureg, 1);
303 v_sampler = ureg_DECL_sampler(ureg, 2);
304
305 matrow0 = ureg_DECL_constant(ureg, 0);
306 matrow1 = ureg_DECL_constant(ureg, 1);
307 matrow2 = ureg_DECL_constant(ureg, 2);
308
309 ureg_TEX(ureg, y,
310 TGSI_TEXTURE_2D, pos, y_sampler);
311 ureg_TEX(ureg, u,
312 TGSI_TEXTURE_2D, pos, u_sampler);
313 ureg_TEX(ureg, v,
314 TGSI_TEXTURE_2D, pos, v_sampler);
315
316 ureg_MUL(ureg, rgb,
317 ureg_scalar(ureg_src(y), TGSI_SWIZZLE_X),
318 matrow0);
319 ureg_MAD(ureg, rgb,
320 ureg_scalar(ureg_src(u), TGSI_SWIZZLE_X),
321 matrow1,
322 ureg_src(rgb));
323 ureg_MAD(ureg, rgb,
324 ureg_scalar(ureg_src(v), TGSI_SWIZZLE_X),
325 matrow2,
326 ureg_src(rgb));
327
328 /* rgb.a = 1; */
329 ureg_MOV(ureg, ureg_writemask(rgb, TGSI_WRITEMASK_W),
330 ureg_scalar(matrow0, TGSI_SWIZZLE_X));
331
332 ureg_MOV(ureg, out, ureg_src(rgb));
333
334 ureg_release_temporary(ureg, rgb);
335 ureg_release_temporary(ureg, y);
336 ureg_release_temporary(ureg, u);
337 ureg_release_temporary(ureg, v);
338
339 ureg_END(ureg);
340
341 return ureg_create_shader_and_destroy(ureg, pipe);
342 }
343
344 static void *
345 create_fs(struct pipe_context *pipe,
346 unsigned fs_traits)
347 {
348 struct ureg_program *ureg;
349 struct ureg_src /*dst_sampler,*/ src_sampler, mask_sampler;
350 struct ureg_src /*dst_pos,*/ src_input, mask_pos;
351 struct ureg_dst src, mask;
352 struct ureg_dst out;
353 boolean has_mask = fs_traits & FS_MASK;
354 boolean is_fill = fs_traits & FS_FILL;
355 boolean is_composite = fs_traits & FS_COMPOSITE;
356 boolean is_solid = fs_traits & FS_SOLID_FILL;
357 boolean is_lingrad = fs_traits & FS_LINGRAD_FILL;
358 boolean is_radgrad = fs_traits & FS_RADGRAD_FILL;
359 unsigned comp_alpha = fs_traits & FS_COMPONENT_ALPHA;
360 boolean is_yuv = fs_traits & FS_YUV;
361
362 ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
363 if (ureg == NULL)
364 return 0;
365
366 /* it has to be either a fill, a composite op or a yuv conversion */
367 debug_assert((is_fill ^ is_composite) ^ is_yuv);
368
369 out = ureg_DECL_output(ureg,
370 TGSI_SEMANTIC_COLOR,
371 0);
372
373 if (is_composite) {
374 src_sampler = ureg_DECL_sampler(ureg, 0);
375 src_input = ureg_DECL_fs_input(ureg,
376 TGSI_SEMANTIC_GENERIC,
377 0,
378 TGSI_INTERPOLATE_PERSPECTIVE);
379 } else if (is_fill) {
380 if (is_solid)
381 src_input = ureg_DECL_fs_input(ureg,
382 TGSI_SEMANTIC_COLOR,
383 0,
384 TGSI_INTERPOLATE_PERSPECTIVE);
385 else
386 src_input = ureg_DECL_fs_input(ureg,
387 TGSI_SEMANTIC_POSITION,
388 0,
389 TGSI_INTERPOLATE_PERSPECTIVE);
390 } else {
391 debug_assert(is_yuv);
392 return create_yuv_shader(pipe, ureg);
393 }
394
395 if (has_mask) {
396 mask_sampler = ureg_DECL_sampler(ureg, 1);
397 mask_pos = ureg_DECL_fs_input(ureg,
398 TGSI_SEMANTIC_GENERIC,
399 1,
400 TGSI_INTERPOLATE_PERSPECTIVE);
401 }
402
403 #if 0 /* unused right now */
404 dst_sampler = ureg_DECL_sampler(ureg, 2);
405 dst_pos = ureg_DECL_fs_input(ureg,
406 TGSI_SEMANTIC_POSITION,
407 2,
408 TGSI_INTERPOLATE_PERSPECTIVE);
409 #endif
410
411 if (is_composite) {
412 if (has_mask)
413 src = ureg_DECL_temporary(ureg);
414 else
415 src = out;
416 ureg_TEX(ureg, src,
417 TGSI_TEXTURE_2D, src_input, src_sampler);
418 } else if (is_fill) {
419 if (is_solid) {
420 if (has_mask)
421 src = ureg_dst(src_input);
422 else
423 ureg_MOV(ureg, out, src_input);
424 } else if (is_lingrad || is_radgrad) {
425 struct ureg_src coords, const0124,
426 matrow0, matrow1, matrow2;
427
428 if (has_mask)
429 src = ureg_DECL_temporary(ureg);
430 else
431 src = out;
432
433 coords = ureg_DECL_constant(ureg, 0);
434 const0124 = ureg_DECL_constant(ureg, 1);
435 matrow0 = ureg_DECL_constant(ureg, 2);
436 matrow1 = ureg_DECL_constant(ureg, 3);
437 matrow2 = ureg_DECL_constant(ureg, 4);
438
439 if (is_lingrad) {
440 linear_gradient(ureg, src,
441 src_input, src_sampler,
442 coords, const0124,
443 matrow0, matrow1, matrow2);
444 } else if (is_radgrad) {
445 radial_gradient(ureg, src,
446 src_input, src_sampler,
447 coords, const0124,
448 matrow0, matrow1, matrow2);
449 }
450 } else
451 debug_assert(!"Unknown fill type!");
452 }
453
454 if (has_mask) {
455 mask = ureg_DECL_temporary(ureg);
456 ureg_TEX(ureg, mask,
457 TGSI_TEXTURE_2D, mask_pos, mask_sampler);
458 /* src IN mask */
459 src_in_mask(ureg, out, ureg_src(src), ureg_src(mask), comp_alpha);
460 ureg_release_temporary(ureg, mask);
461 }
462
463 ureg_END(ureg);
464
465 return ureg_create_shader_and_destroy(ureg, pipe);
466 }
467
468 struct xorg_shaders * xorg_shaders_create(struct xorg_renderer *r)
469 {
470 struct xorg_shaders *sc = CALLOC_STRUCT(xorg_shaders);
471
472 sc->r = r;
473 sc->vs_hash = cso_hash_create();
474 sc->fs_hash = cso_hash_create();
475
476 return sc;
477 }
478
479 static void
480 cache_destroy(struct cso_context *cso,
481 struct cso_hash *hash,
482 unsigned processor)
483 {
484 struct cso_hash_iter iter = cso_hash_first_node(hash);
485 while (!cso_hash_iter_is_null(iter)) {
486 void *shader = (void *)cso_hash_iter_data(iter);
487 if (processor == PIPE_SHADER_FRAGMENT) {
488 cso_delete_fragment_shader(cso, shader);
489 } else if (processor == PIPE_SHADER_VERTEX) {
490 cso_delete_vertex_shader(cso, shader);
491 }
492 iter = cso_hash_erase(hash, iter);
493 }
494 cso_hash_delete(hash);
495 }
496
497 void xorg_shaders_destroy(struct xorg_shaders *sc)
498 {
499 cache_destroy(sc->r->cso, sc->vs_hash,
500 PIPE_SHADER_VERTEX);
501 cache_destroy(sc->r->cso, sc->fs_hash,
502 PIPE_SHADER_FRAGMENT);
503
504 free(sc);
505 }
506
507 static INLINE void *
508 shader_from_cache(struct pipe_context *pipe,
509 unsigned type,
510 struct cso_hash *hash,
511 unsigned key)
512 {
513 void *shader = 0;
514
515 struct cso_hash_iter iter = cso_hash_find(hash, key);
516
517 if (cso_hash_iter_is_null(iter)) {
518 if (type == PIPE_SHADER_VERTEX)
519 shader = create_vs(pipe, key);
520 else
521 shader = create_fs(pipe, key);
522 cso_hash_insert(hash, key, shader);
523 } else
524 shader = (void *)cso_hash_iter_data(iter);
525
526 return shader;
527 }
528
529 struct xorg_shader xorg_shaders_get(struct xorg_shaders *sc,
530 unsigned vs_traits,
531 unsigned fs_traits)
532 {
533 struct xorg_shader shader = { NULL, NULL };
534 void *vs, *fs;
535
536 vs = shader_from_cache(sc->r->pipe, PIPE_SHADER_VERTEX,
537 sc->vs_hash, vs_traits);
538 fs = shader_from_cache(sc->r->pipe, PIPE_SHADER_FRAGMENT,
539 sc->fs_hash, fs_traits);
540
541 debug_assert(vs && fs);
542 if (!vs || !fs)
543 return shader;
544
545 shader.vs = vs;
546 shader.fs = fs;
547
548 return shader;
549 }