st/xorg: add yuv vertex shader
[mesa.git] / src / gallium / state_trackers / xorg / xorg_exa_tgsi.c
1 #include "xorg_exa_tgsi.h"
2
3 /*### stupidity defined in X11/extensions/XI.h */
4 #undef Absolute
5
6 #include "pipe/p_format.h"
7 #include "pipe/p_context.h"
8 #include "pipe/p_state.h"
9 #include "pipe/p_inlines.h"
10 #include "pipe/p_shader_tokens.h"
11
12 #include "util/u_memory.h"
13 #include "util/u_simple_shaders.h"
14
15 #include "tgsi/tgsi_ureg.h"
16
17 #include "cso_cache/cso_context.h"
18 #include "cso_cache/cso_hash.h"
19
20 /* Vertex shader:
21 * IN[0] = vertex pos
22 * IN[1] = src tex coord | solid fill color
23 * IN[2] = mask tex coord
24 * IN[3] = dst tex coord
25 * CONST[0] = (2/dst_width, 2/dst_height, 1, 1)
26 * CONST[1] = (-1, -1, 0, 0)
27 *
28 * OUT[0] = vertex pos
29 * OUT[1] = src tex coord | solid fill color
30 * OUT[2] = mask tex coord
31 * OUT[3] = dst tex coord
32 */
33
34 /* Fragment shader:
35 * SAMP[0] = src
36 * SAMP[1] = mask
37 * SAMP[2] = dst
38 * IN[0] = pos src | solid fill color
39 * IN[1] = pos mask
40 * IN[2] = pos dst
41 * CONST[0] = (0, 0, 0, 1)
42 *
43 * OUT[0] = color
44 */
45
46 struct xorg_shaders {
47 struct xorg_renderer *r;
48
49 struct cso_hash *vs_hash;
50 struct cso_hash *fs_hash;
51 };
52
53 static INLINE void
54 src_in_mask(struct ureg_program *ureg,
55 struct ureg_dst dst,
56 struct ureg_src src,
57 struct ureg_src mask,
58 int component_alpha)
59 {
60 if (component_alpha == FS_CA_FULL) {
61 ureg_MUL(ureg, dst, src, mask);
62 } else if (component_alpha == FS_CA_SRCALPHA) {
63 ureg_MUL(ureg, dst,
64 ureg_scalar(src, TGSI_SWIZZLE_W), mask);
65 }
66 else {
67 ureg_MUL(ureg, dst, src,
68 ureg_scalar(mask, TGSI_SWIZZLE_X));
69 }
70 }
71
72 static struct ureg_src
73 vs_normalize_coords(struct ureg_program *ureg, struct ureg_src coords,
74 struct ureg_src const0, struct ureg_src const1)
75 {
76 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
77 struct ureg_src ret;
78 ureg_MAD(ureg, tmp, coords, const0, const1);
79 ret = ureg_src(tmp);
80 ureg_release_temporary(ureg, tmp);
81 return ret;
82 }
83
84 static void
85 linear_gradient(struct ureg_program *ureg,
86 struct ureg_dst out,
87 struct ureg_src pos,
88 struct ureg_src sampler,
89 struct ureg_src coords,
90 struct ureg_src const0124,
91 struct ureg_src matrow0,
92 struct ureg_src matrow1,
93 struct ureg_src matrow2)
94 {
95 struct ureg_dst temp0 = ureg_DECL_temporary(ureg);
96 struct ureg_dst temp1 = ureg_DECL_temporary(ureg);
97 struct ureg_dst temp2 = ureg_DECL_temporary(ureg);
98 struct ureg_dst temp3 = ureg_DECL_temporary(ureg);
99 struct ureg_dst temp4 = ureg_DECL_temporary(ureg);
100 struct ureg_dst temp5 = ureg_DECL_temporary(ureg);
101
102 ureg_MOV(ureg,
103 ureg_writemask(temp0, TGSI_WRITEMASK_XY), pos);
104 ureg_MOV(ureg,
105 ureg_writemask(temp0, TGSI_WRITEMASK_Z),
106 ureg_scalar(const0124, TGSI_SWIZZLE_Y));
107
108 ureg_DP3(ureg, temp1, matrow0, ureg_src(temp0));
109 ureg_DP3(ureg, temp2, matrow1, ureg_src(temp0));
110 ureg_DP3(ureg, temp3, matrow2, ureg_src(temp0));
111 ureg_RCP(ureg, temp3, ureg_src(temp3));
112 ureg_MUL(ureg, temp1, ureg_src(temp1), ureg_src(temp3));
113 ureg_MUL(ureg, temp2, ureg_src(temp2), ureg_src(temp3));
114
115 ureg_MOV(ureg, ureg_writemask(temp4, TGSI_WRITEMASK_X),
116 ureg_src(temp1));
117 ureg_MOV(ureg, ureg_writemask(temp4, TGSI_WRITEMASK_Y),
118 ureg_src(temp2));
119
120 ureg_MUL(ureg, temp0,
121 ureg_scalar(coords, TGSI_SWIZZLE_Y),
122 ureg_scalar(ureg_src(temp4), TGSI_SWIZZLE_Y));
123 ureg_MAD(ureg, temp1,
124 ureg_scalar(coords, TGSI_SWIZZLE_X),
125 ureg_scalar(ureg_src(temp4), TGSI_SWIZZLE_X),
126 ureg_src(temp0));
127
128 ureg_MUL(ureg, temp2,
129 ureg_src(temp1),
130 ureg_scalar(coords, TGSI_SWIZZLE_Z));
131
132 ureg_TEX(ureg, out,
133 TGSI_TEXTURE_1D, ureg_src(temp2), sampler);
134
135 ureg_release_temporary(ureg, temp0);
136 ureg_release_temporary(ureg, temp1);
137 ureg_release_temporary(ureg, temp2);
138 ureg_release_temporary(ureg, temp3);
139 ureg_release_temporary(ureg, temp4);
140 ureg_release_temporary(ureg, temp5);
141 }
142
143
144 static void
145 radial_gradient(struct ureg_program *ureg,
146 struct ureg_dst out,
147 struct ureg_src pos,
148 struct ureg_src sampler,
149 struct ureg_src coords,
150 struct ureg_src const0124,
151 struct ureg_src matrow0,
152 struct ureg_src matrow1,
153 struct ureg_src matrow2)
154 {
155 struct ureg_dst temp0 = ureg_DECL_temporary(ureg);
156 struct ureg_dst temp1 = ureg_DECL_temporary(ureg);
157 struct ureg_dst temp2 = ureg_DECL_temporary(ureg);
158 struct ureg_dst temp3 = ureg_DECL_temporary(ureg);
159 struct ureg_dst temp4 = ureg_DECL_temporary(ureg);
160 struct ureg_dst temp5 = ureg_DECL_temporary(ureg);
161
162 ureg_MOV(ureg,
163 ureg_writemask(temp0, TGSI_WRITEMASK_XY),
164 pos);
165 ureg_MOV(ureg,
166 ureg_writemask(temp0, TGSI_WRITEMASK_Z),
167 ureg_scalar(const0124, TGSI_SWIZZLE_Y));
168
169 ureg_DP3(ureg, temp1, matrow0, ureg_src(temp0));
170 ureg_DP3(ureg, temp2, matrow1, ureg_src(temp0));
171 ureg_DP3(ureg, temp3, matrow2, ureg_src(temp0));
172 ureg_RCP(ureg, temp3, ureg_src(temp3));
173 ureg_MUL(ureg, temp1, ureg_src(temp1), ureg_src(temp3));
174 ureg_MUL(ureg, temp2, ureg_src(temp2), ureg_src(temp3));
175
176 ureg_MOV(ureg, ureg_writemask(temp5, TGSI_WRITEMASK_X),
177 ureg_src(temp1));
178 ureg_MOV(ureg, ureg_writemask(temp5, TGSI_WRITEMASK_Y),
179 ureg_src(temp2));
180
181 ureg_MUL(ureg, temp0, ureg_scalar(coords, TGSI_SWIZZLE_Y),
182 ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_Y));
183 ureg_MAD(ureg, temp1,
184 ureg_scalar(coords, TGSI_SWIZZLE_X),
185 ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_X),
186 ureg_src(temp0));
187 ureg_ADD(ureg, temp1,
188 ureg_src(temp1), ureg_src(temp1));
189 ureg_MUL(ureg, temp3,
190 ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_Y),
191 ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_Y));
192 ureg_MAD(ureg, temp4,
193 ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_X),
194 ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_X),
195 ureg_src(temp3));
196 ureg_MOV(ureg, temp4, ureg_negate(ureg_src(temp4)));
197 ureg_MUL(ureg, temp2,
198 ureg_scalar(coords, TGSI_SWIZZLE_Z),
199 ureg_src(temp4));
200 ureg_MUL(ureg, temp0,
201 ureg_scalar(const0124, TGSI_SWIZZLE_W),
202 ureg_src(temp2));
203 ureg_MUL(ureg, temp3,
204 ureg_src(temp1), ureg_src(temp1));
205 ureg_SUB(ureg, temp2,
206 ureg_src(temp3), ureg_src(temp0));
207 ureg_RSQ(ureg, temp2, ureg_abs(ureg_src(temp2)));
208 ureg_RCP(ureg, temp2, ureg_src(temp2));
209 ureg_SUB(ureg, temp1,
210 ureg_src(temp2), ureg_src(temp1));
211 ureg_ADD(ureg, temp0,
212 ureg_scalar(coords, TGSI_SWIZZLE_Z),
213 ureg_scalar(coords, TGSI_SWIZZLE_Z));
214 ureg_RCP(ureg, temp0, ureg_src(temp0));
215 ureg_MUL(ureg, temp2,
216 ureg_src(temp1), ureg_src(temp0));
217 ureg_TEX(ureg, out, TGSI_TEXTURE_1D,
218 ureg_src(temp2), sampler);
219
220 ureg_release_temporary(ureg, temp0);
221 ureg_release_temporary(ureg, temp1);
222 ureg_release_temporary(ureg, temp2);
223 ureg_release_temporary(ureg, temp3);
224 ureg_release_temporary(ureg, temp4);
225 ureg_release_temporary(ureg, temp5);
226 }
227
228 static void *
229 create_vs(struct pipe_context *pipe,
230 unsigned vs_traits)
231 {
232 struct ureg_program *ureg;
233 struct ureg_src src;
234 struct ureg_dst dst;
235 struct ureg_src const0, const1;
236 boolean is_fill = vs_traits & VS_FILL;
237 boolean is_composite = vs_traits & VS_COMPOSITE;
238 boolean has_mask = vs_traits & VS_MASK;
239 boolean is_yuv = vs_traits & VS_YUV;
240 unsigned input_slot = 0;
241
242 ureg = ureg_create(TGSI_PROCESSOR_VERTEX);
243 if (ureg == NULL)
244 return 0;
245
246 const0 = ureg_DECL_constant(ureg, 0);
247 const1 = ureg_DECL_constant(ureg, 1);
248
249 /* it has to be either a fill or a composite op */
250 debug_assert(is_fill ^ is_composite);
251
252 src = ureg_DECL_vs_input(ureg, input_slot++);
253 dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0);
254 src = vs_normalize_coords(ureg, src,
255 const0, const1);
256 ureg_MOV(ureg, dst, src);
257
258 if (is_yuv) {
259 src = ureg_DECL_vs_input(ureg, input_slot++);
260 dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 0);
261 ureg_MOV(ureg, dst, src);
262
263 src = ureg_DECL_vs_input(ureg, input_slot++);
264 dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 1);
265 ureg_MOV(ureg, dst, src);
266
267 src = ureg_DECL_vs_input(ureg, input_slot++);
268 dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 2);
269 ureg_MOV(ureg, dst, src);
270 }
271
272 if (is_composite) {
273 src = ureg_DECL_vs_input(ureg, input_slot++);
274 dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 0);
275 ureg_MOV(ureg, dst, src);
276 }
277
278 if (is_fill) {
279 src = ureg_DECL_vs_input(ureg, input_slot++);
280 dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
281 ureg_MOV(ureg, dst, src);
282 }
283
284 if (has_mask) {
285 src = ureg_DECL_vs_input(ureg, input_slot++);
286 dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 1);
287 ureg_MOV(ureg, dst, src);
288 }
289
290 ureg_END(ureg);
291
292 return ureg_create_shader_and_destroy(ureg, pipe);
293 }
294
295 static void *
296 create_yuv_shader(struct pipe_context *pipe, struct ureg_program *ureg)
297 {
298 struct ureg_src y_sampler, u_sampler, v_sampler;
299 struct ureg_src pos;
300 struct ureg_src matrow0, matrow1, matrow2;
301 struct ureg_dst y, u, v, rgb;
302 struct ureg_dst out = ureg_DECL_output(ureg,
303 TGSI_SEMANTIC_COLOR,
304 0);
305
306 pos = ureg_DECL_fs_input(ureg,
307 TGSI_SEMANTIC_GENERIC,
308 0,
309 TGSI_INTERPOLATE_PERSPECTIVE);
310
311 rgb = ureg_DECL_temporary(ureg);
312 y = ureg_DECL_temporary(ureg);
313 u = ureg_DECL_temporary(ureg);
314 v = ureg_DECL_temporary(ureg);
315
316 y_sampler = ureg_DECL_sampler(ureg, 0);
317 u_sampler = ureg_DECL_sampler(ureg, 1);
318 v_sampler = ureg_DECL_sampler(ureg, 2);
319
320 matrow0 = ureg_DECL_constant(ureg, 0);
321 matrow1 = ureg_DECL_constant(ureg, 1);
322 matrow2 = ureg_DECL_constant(ureg, 2);
323
324 ureg_TEX(ureg, y,
325 TGSI_TEXTURE_2D, pos, y_sampler);
326 ureg_TEX(ureg, u,
327 TGSI_TEXTURE_2D, pos, u_sampler);
328 ureg_TEX(ureg, v,
329 TGSI_TEXTURE_2D, pos, v_sampler);
330
331 ureg_MUL(ureg, rgb,
332 ureg_scalar(ureg_src(y), TGSI_SWIZZLE_X),
333 matrow0);
334 ureg_MAD(ureg, rgb,
335 ureg_scalar(ureg_src(u), TGSI_SWIZZLE_X),
336 matrow1,
337 ureg_src(rgb));
338 ureg_MAD(ureg, rgb,
339 ureg_scalar(ureg_src(v), TGSI_SWIZZLE_X),
340 matrow2,
341 ureg_src(rgb));
342
343 /* rgb.a = 1; */
344 ureg_MOV(ureg, ureg_writemask(rgb, TGSI_WRITEMASK_W),
345 ureg_scalar(matrow0, TGSI_SWIZZLE_X));
346
347 ureg_MOV(ureg, out, ureg_src(rgb));
348
349 ureg_release_temporary(ureg, rgb);
350 ureg_release_temporary(ureg, y);
351 ureg_release_temporary(ureg, u);
352 ureg_release_temporary(ureg, v);
353
354 ureg_END(ureg);
355
356 return ureg_create_shader_and_destroy(ureg, pipe);
357 }
358
359 static void *
360 create_fs(struct pipe_context *pipe,
361 unsigned fs_traits)
362 {
363 struct ureg_program *ureg;
364 struct ureg_src /*dst_sampler,*/ src_sampler, mask_sampler;
365 struct ureg_src /*dst_pos,*/ src_input, mask_pos;
366 struct ureg_dst src, mask;
367 struct ureg_dst out;
368 boolean has_mask = fs_traits & FS_MASK;
369 boolean is_fill = fs_traits & FS_FILL;
370 boolean is_composite = fs_traits & FS_COMPOSITE;
371 boolean is_solid = fs_traits & FS_SOLID_FILL;
372 boolean is_lingrad = fs_traits & FS_LINGRAD_FILL;
373 boolean is_radgrad = fs_traits & FS_RADGRAD_FILL;
374 unsigned comp_alpha = fs_traits & FS_COMPONENT_ALPHA;
375 boolean is_yuv = fs_traits & FS_YUV;
376
377 ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
378 if (ureg == NULL)
379 return 0;
380
381 /* it has to be either a fill, a composite op or a yuv conversion */
382 debug_assert((is_fill ^ is_composite) ^ is_yuv);
383
384 out = ureg_DECL_output(ureg,
385 TGSI_SEMANTIC_COLOR,
386 0);
387
388 if (is_composite) {
389 src_sampler = ureg_DECL_sampler(ureg, 0);
390 src_input = ureg_DECL_fs_input(ureg,
391 TGSI_SEMANTIC_GENERIC,
392 0,
393 TGSI_INTERPOLATE_PERSPECTIVE);
394 } else if (is_fill) {
395 if (is_solid)
396 src_input = ureg_DECL_fs_input(ureg,
397 TGSI_SEMANTIC_COLOR,
398 0,
399 TGSI_INTERPOLATE_PERSPECTIVE);
400 else
401 src_input = ureg_DECL_fs_input(ureg,
402 TGSI_SEMANTIC_POSITION,
403 0,
404 TGSI_INTERPOLATE_PERSPECTIVE);
405 } else {
406 debug_assert(is_yuv);
407 return create_yuv_shader(pipe, ureg);
408 }
409
410 if (has_mask) {
411 mask_sampler = ureg_DECL_sampler(ureg, 1);
412 mask_pos = ureg_DECL_fs_input(ureg,
413 TGSI_SEMANTIC_GENERIC,
414 1,
415 TGSI_INTERPOLATE_PERSPECTIVE);
416 }
417
418 #if 0 /* unused right now */
419 dst_sampler = ureg_DECL_sampler(ureg, 2);
420 dst_pos = ureg_DECL_fs_input(ureg,
421 TGSI_SEMANTIC_POSITION,
422 2,
423 TGSI_INTERPOLATE_PERSPECTIVE);
424 #endif
425
426 if (is_composite) {
427 if (has_mask)
428 src = ureg_DECL_temporary(ureg);
429 else
430 src = out;
431 ureg_TEX(ureg, src,
432 TGSI_TEXTURE_2D, src_input, src_sampler);
433 } else if (is_fill) {
434 if (is_solid) {
435 if (has_mask)
436 src = ureg_dst(src_input);
437 else
438 ureg_MOV(ureg, out, src_input);
439 } else if (is_lingrad || is_radgrad) {
440 struct ureg_src coords, const0124,
441 matrow0, matrow1, matrow2;
442
443 if (has_mask)
444 src = ureg_DECL_temporary(ureg);
445 else
446 src = out;
447
448 coords = ureg_DECL_constant(ureg, 0);
449 const0124 = ureg_DECL_constant(ureg, 1);
450 matrow0 = ureg_DECL_constant(ureg, 2);
451 matrow1 = ureg_DECL_constant(ureg, 3);
452 matrow2 = ureg_DECL_constant(ureg, 4);
453
454 if (is_lingrad) {
455 linear_gradient(ureg, src,
456 src_input, src_sampler,
457 coords, const0124,
458 matrow0, matrow1, matrow2);
459 } else if (is_radgrad) {
460 radial_gradient(ureg, src,
461 src_input, src_sampler,
462 coords, const0124,
463 matrow0, matrow1, matrow2);
464 }
465 } else
466 debug_assert(!"Unknown fill type!");
467 }
468
469 if (has_mask) {
470 mask = ureg_DECL_temporary(ureg);
471 ureg_TEX(ureg, mask,
472 TGSI_TEXTURE_2D, mask_pos, mask_sampler);
473 /* src IN mask */
474 src_in_mask(ureg, out, ureg_src(src), ureg_src(mask), comp_alpha);
475 ureg_release_temporary(ureg, mask);
476 }
477
478 ureg_END(ureg);
479
480 return ureg_create_shader_and_destroy(ureg, pipe);
481 }
482
483 struct xorg_shaders * xorg_shaders_create(struct xorg_renderer *r)
484 {
485 struct xorg_shaders *sc = CALLOC_STRUCT(xorg_shaders);
486
487 sc->r = r;
488 sc->vs_hash = cso_hash_create();
489 sc->fs_hash = cso_hash_create();
490
491 return sc;
492 }
493
494 static void
495 cache_destroy(struct cso_context *cso,
496 struct cso_hash *hash,
497 unsigned processor)
498 {
499 struct cso_hash_iter iter = cso_hash_first_node(hash);
500 while (!cso_hash_iter_is_null(iter)) {
501 void *shader = (void *)cso_hash_iter_data(iter);
502 if (processor == PIPE_SHADER_FRAGMENT) {
503 cso_delete_fragment_shader(cso, shader);
504 } else if (processor == PIPE_SHADER_VERTEX) {
505 cso_delete_vertex_shader(cso, shader);
506 }
507 iter = cso_hash_erase(hash, iter);
508 }
509 cso_hash_delete(hash);
510 }
511
512 void xorg_shaders_destroy(struct xorg_shaders *sc)
513 {
514 cache_destroy(sc->r->cso, sc->vs_hash,
515 PIPE_SHADER_VERTEX);
516 cache_destroy(sc->r->cso, sc->fs_hash,
517 PIPE_SHADER_FRAGMENT);
518
519 free(sc);
520 }
521
522 static INLINE void *
523 shader_from_cache(struct pipe_context *pipe,
524 unsigned type,
525 struct cso_hash *hash,
526 unsigned key)
527 {
528 void *shader = 0;
529
530 struct cso_hash_iter iter = cso_hash_find(hash, key);
531
532 if (cso_hash_iter_is_null(iter)) {
533 if (type == PIPE_SHADER_VERTEX)
534 shader = create_vs(pipe, key);
535 else
536 shader = create_fs(pipe, key);
537 cso_hash_insert(hash, key, shader);
538 } else
539 shader = (void *)cso_hash_iter_data(iter);
540
541 return shader;
542 }
543
544 struct xorg_shader xorg_shaders_get(struct xorg_shaders *sc,
545 unsigned vs_traits,
546 unsigned fs_traits)
547 {
548 struct xorg_shader shader = { NULL, NULL };
549 void *vs, *fs;
550
551 vs = shader_from_cache(sc->r->pipe, PIPE_SHADER_VERTEX,
552 sc->vs_hash, vs_traits);
553 fs = shader_from_cache(sc->r->pipe, PIPE_SHADER_FRAGMENT,
554 sc->fs_hash, fs_traits);
555
556 debug_assert(vs && fs);
557 if (!vs || !fs)
558 return shader;
559
560 shader.vs = vs;
561 shader.fs = fs;
562
563 return shader;
564 }