8c9b674b4b6bebf972fa838dbdb5b0189666dc94
[mesa.git] / src / gallium / state_trackers / xorg / xorg_exa_tgsi.c
1 #include "xorg_exa_tgsi.h"
2
3 /*### stupidity defined in X11/extensions/XI.h */
4 #undef Absolute
5
6 #include "pipe/p_format.h"
7 #include "pipe/p_context.h"
8 #include "pipe/p_state.h"
9 #include "pipe/p_inlines.h"
10 #include "pipe/p_shader_tokens.h"
11
12 #include "util/u_memory.h"
13 #include "util/u_simple_shaders.h"
14
15 #include "tgsi/tgsi_ureg.h"
16
17 #include "cso_cache/cso_context.h"
18 #include "cso_cache/cso_hash.h"
19
20 /* Vertex shader:
21 * IN[0] = vertex pos
22 * IN[1] = src tex coord | solid fill color
23 * IN[2] = mask tex coord
24 * IN[3] = dst tex coord
25 * CONST[0] = (2/dst_width, 2/dst_height, 1, 1)
26 * CONST[1] = (-1, -1, 0, 0)
27 *
28 * OUT[0] = vertex pos
29 * OUT[1] = src tex coord | solid fill color
30 * OUT[2] = mask tex coord
31 * OUT[3] = dst tex coord
32 */
33
34 /* Fragment shader:
35 * SAMP[0] = src
36 * SAMP[1] = mask
37 * SAMP[2] = dst
38 * IN[0] = pos src | solid fill color
39 * IN[1] = pos mask
40 * IN[2] = pos dst
41 * CONST[0] = (0, 0, 0, 1)
42 *
43 * OUT[0] = color
44 */
45
46 struct xorg_shaders {
47 struct xorg_renderer *r;
48
49 struct cso_hash *vs_hash;
50 struct cso_hash *fs_hash;
51 };
52
53 static const char over_op[] =
54 "SUB TEMP[3], CONST[0].wwww, TEMP[1].wwww\n"
55 "MAD TEMP[3], TEMP[0], TEMP[3], TEMP[0]\n";
56
57
58 static INLINE void
59 create_preamble(struct ureg_program *ureg)
60 {
61 }
62
63
64 static INLINE void
65 src_in_mask(struct ureg_program *ureg,
66 struct ureg_dst dst,
67 struct ureg_src src,
68 struct ureg_src mask)
69 {
70 #if 0
71 /* MUL dst, src, mask.a */
72 ureg_MUL(ureg, dst, src,
73 ureg_scalar(mask, TGSI_SWIZZLE_W));
74 #else
75 /* MOV dst, src */
76 /* MUL dst.a, src.a, mask.a */
77 ureg_MOV(ureg, dst, src);
78 ureg_MUL(ureg,
79 ureg_writemask(dst, TGSI_WRITEMASK_W),
80 ureg_scalar(src, TGSI_SWIZZLE_W),
81 ureg_scalar(mask, TGSI_SWIZZLE_W));
82 #endif
83 }
84
85 static struct ureg_src
86 vs_normalize_coords(struct ureg_program *ureg, struct ureg_src coords,
87 struct ureg_src const0, struct ureg_src const1)
88 {
89 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
90 struct ureg_src ret;
91 ureg_MAD(ureg, tmp, coords, const0, const1);
92 ret = ureg_src(tmp);
93 ureg_release_temporary(ureg, tmp);
94 return ret;
95 }
96
97 static void
98 linear_gradient(struct ureg_program *ureg,
99 struct ureg_dst out,
100 struct ureg_src pos,
101 struct ureg_src sampler,
102 struct ureg_src coords,
103 struct ureg_src const0124,
104 struct ureg_src matrow0,
105 struct ureg_src matrow1,
106 struct ureg_src matrow2)
107 {
108 struct ureg_dst temp0 = ureg_DECL_temporary(ureg);
109 struct ureg_dst temp1 = ureg_DECL_temporary(ureg);
110 struct ureg_dst temp2 = ureg_DECL_temporary(ureg);
111 struct ureg_dst temp3 = ureg_DECL_temporary(ureg);
112 struct ureg_dst temp4 = ureg_DECL_temporary(ureg);
113 struct ureg_dst temp5 = ureg_DECL_temporary(ureg);
114
115 ureg_MOV(ureg,
116 ureg_writemask(temp0, TGSI_WRITEMASK_XY), pos);
117 ureg_MOV(ureg,
118 ureg_writemask(temp0, TGSI_WRITEMASK_Z),
119 ureg_scalar(const0124, TGSI_SWIZZLE_Y));
120
121 ureg_DP3(ureg, temp1, matrow0, ureg_src(temp0));
122 ureg_DP3(ureg, temp2, matrow1, ureg_src(temp0));
123 ureg_DP3(ureg, temp3, matrow2, ureg_src(temp0));
124 ureg_RCP(ureg, temp3, ureg_src(temp3));
125 ureg_MUL(ureg, temp1, ureg_src(temp1), ureg_src(temp3));
126 ureg_MUL(ureg, temp2, ureg_src(temp2), ureg_src(temp3));
127
128 ureg_MOV(ureg, ureg_writemask(temp4, TGSI_WRITEMASK_X),
129 ureg_src(temp1));
130 ureg_MOV(ureg, ureg_writemask(temp4, TGSI_WRITEMASK_Y),
131 ureg_src(temp2));
132
133 ureg_MUL(ureg, temp0,
134 ureg_scalar(coords, TGSI_SWIZZLE_Y),
135 ureg_scalar(ureg_src(temp4), TGSI_SWIZZLE_Y));
136 ureg_MAD(ureg, temp1,
137 ureg_scalar(coords, TGSI_SWIZZLE_X),
138 ureg_scalar(ureg_src(temp4), TGSI_SWIZZLE_X),
139 ureg_src(temp0));
140
141 ureg_MUL(ureg, temp2,
142 ureg_src(temp1),
143 ureg_scalar(coords, TGSI_SWIZZLE_Z));
144
145 ureg_TEX(ureg, out,
146 TGSI_TEXTURE_1D, ureg_src(temp2), sampler);
147
148 ureg_release_temporary(ureg, temp0);
149 ureg_release_temporary(ureg, temp1);
150 ureg_release_temporary(ureg, temp2);
151 ureg_release_temporary(ureg, temp3);
152 ureg_release_temporary(ureg, temp4);
153 ureg_release_temporary(ureg, temp5);
154 }
155
156
157 static void
158 radial_gradient(struct ureg_program *ureg,
159 struct ureg_dst out,
160 struct ureg_src pos,
161 struct ureg_src sampler,
162 struct ureg_src coords,
163 struct ureg_src const0124,
164 struct ureg_src matrow0,
165 struct ureg_src matrow1,
166 struct ureg_src matrow2)
167 {
168 struct ureg_dst temp0 = ureg_DECL_temporary(ureg);
169 struct ureg_dst temp1 = ureg_DECL_temporary(ureg);
170 struct ureg_dst temp2 = ureg_DECL_temporary(ureg);
171 struct ureg_dst temp3 = ureg_DECL_temporary(ureg);
172 struct ureg_dst temp4 = ureg_DECL_temporary(ureg);
173 struct ureg_dst temp5 = ureg_DECL_temporary(ureg);
174
175 ureg_MOV(ureg,
176 ureg_writemask(temp0, TGSI_WRITEMASK_XY),
177 pos);
178 ureg_MOV(ureg,
179 ureg_writemask(temp0, TGSI_WRITEMASK_Z),
180 ureg_scalar(const0124, TGSI_SWIZZLE_Y));
181
182 ureg_DP3(ureg, temp1, matrow0, ureg_src(temp0));
183 ureg_DP3(ureg, temp2, matrow1, ureg_src(temp0));
184 ureg_DP3(ureg, temp3, matrow2, ureg_src(temp0));
185 ureg_RCP(ureg, temp3, ureg_src(temp3));
186 ureg_MUL(ureg, temp1, ureg_src(temp1), ureg_src(temp3));
187 ureg_MUL(ureg, temp2, ureg_src(temp2), ureg_src(temp3));
188
189 ureg_MOV(ureg, ureg_writemask(temp5, TGSI_WRITEMASK_X),
190 ureg_src(temp1));
191 ureg_MOV(ureg, ureg_writemask(temp5, TGSI_WRITEMASK_Y),
192 ureg_src(temp2));
193
194 ureg_MUL(ureg, temp0, ureg_scalar(coords, TGSI_SWIZZLE_Y),
195 ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_Y));
196 ureg_MAD(ureg, temp1,
197 ureg_scalar(coords, TGSI_SWIZZLE_X),
198 ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_X),
199 ureg_src(temp0));
200 ureg_ADD(ureg, temp1,
201 ureg_src(temp1), ureg_src(temp1));
202 ureg_MUL(ureg, temp3,
203 ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_Y),
204 ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_Y));
205 ureg_MAD(ureg, temp4,
206 ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_X),
207 ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_X),
208 ureg_src(temp3));
209 ureg_MOV(ureg, temp4, ureg_negate(ureg_src(temp4)));
210 ureg_MUL(ureg, temp2,
211 ureg_scalar(coords, TGSI_SWIZZLE_Z),
212 ureg_src(temp4));
213 ureg_MUL(ureg, temp0,
214 ureg_scalar(const0124, TGSI_SWIZZLE_W),
215 ureg_src(temp2));
216 ureg_MUL(ureg, temp3,
217 ureg_src(temp1), ureg_src(temp1));
218 ureg_SUB(ureg, temp2,
219 ureg_src(temp3), ureg_src(temp0));
220 ureg_RSQ(ureg, temp2, ureg_abs(ureg_src(temp2)));
221 ureg_RCP(ureg, temp2, ureg_src(temp2));
222 ureg_SUB(ureg, temp1,
223 ureg_src(temp2), ureg_src(temp1));
224 ureg_ADD(ureg, temp0,
225 ureg_scalar(coords, TGSI_SWIZZLE_Z),
226 ureg_scalar(coords, TGSI_SWIZZLE_Z));
227 ureg_RCP(ureg, temp0, ureg_src(temp0));
228 ureg_MUL(ureg, temp2,
229 ureg_src(temp1), ureg_src(temp0));
230 ureg_TEX(ureg, out, TGSI_TEXTURE_1D,
231 ureg_src(temp2), sampler);
232
233 ureg_release_temporary(ureg, temp0);
234 ureg_release_temporary(ureg, temp1);
235 ureg_release_temporary(ureg, temp2);
236 ureg_release_temporary(ureg, temp3);
237 ureg_release_temporary(ureg, temp4);
238 ureg_release_temporary(ureg, temp5);
239 }
240
241 static void *
242 create_vs(struct pipe_context *pipe,
243 unsigned vs_traits)
244 {
245 struct ureg_program *ureg;
246 struct ureg_src src;
247 struct ureg_dst dst;
248 struct ureg_src const0, const1;
249 boolean is_fill = vs_traits & VS_FILL;
250 boolean is_composite = vs_traits & VS_COMPOSITE;
251 boolean has_mask = vs_traits & VS_MASK;
252 unsigned input_slot = 0;
253
254 ureg = ureg_create(TGSI_PROCESSOR_VERTEX);
255 if (ureg == NULL)
256 return 0;
257
258 const0 = ureg_DECL_constant(ureg, 0);
259 const1 = ureg_DECL_constant(ureg, 1);
260
261 /* it has to be either a fill or a composite op */
262 debug_assert(is_fill ^ is_composite);
263
264 src = ureg_DECL_vs_input(ureg, input_slot++);
265 dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0);
266 src = vs_normalize_coords(ureg, src,
267 const0, const1);
268 ureg_MOV(ureg, dst, src);
269
270 if (is_composite) {
271 src = ureg_DECL_vs_input(ureg, input_slot++);
272 dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 0);
273 ureg_MOV(ureg, dst, src);
274 }
275
276 if (is_fill) {
277 src = ureg_DECL_vs_input(ureg, input_slot++);
278 dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
279 ureg_MOV(ureg, dst, src);
280 }
281
282 if (has_mask) {
283 src = ureg_DECL_vs_input(ureg, input_slot++);
284 dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 1);
285 ureg_MOV(ureg, dst, src);
286 }
287
288 ureg_END(ureg);
289
290 return ureg_create_shader_and_destroy(ureg, pipe);
291 }
292
293 static void *
294 create_fs(struct pipe_context *pipe,
295 unsigned fs_traits)
296 {
297 struct ureg_program *ureg;
298 struct ureg_src /*dst_sampler,*/ src_sampler, mask_sampler;
299 struct ureg_src /*dst_pos,*/ src_input, mask_pos;
300 struct ureg_dst src, mask;
301 struct ureg_dst out;
302 boolean has_mask = fs_traits & FS_MASK;
303 boolean is_fill = fs_traits & FS_FILL;
304 boolean is_composite = fs_traits & FS_COMPOSITE;
305 boolean is_solid = fs_traits & FS_SOLID_FILL;
306 boolean is_lingrad = fs_traits & FS_LINGRAD_FILL;
307 boolean is_radgrad = fs_traits & FS_RADGRAD_FILL;
308
309 ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
310 if (ureg == NULL)
311 return 0;
312
313 /* it has to be either a fill or a composite op */
314 debug_assert(is_fill ^ is_composite);
315
316 out = ureg_DECL_output(ureg,
317 TGSI_SEMANTIC_COLOR,
318 0);
319
320 if (is_composite) {
321 src_sampler = ureg_DECL_sampler(ureg, 0);
322 src_input = ureg_DECL_fs_input(ureg,
323 TGSI_SEMANTIC_GENERIC,
324 0,
325 TGSI_INTERPOLATE_PERSPECTIVE);
326 } else {
327 debug_assert(is_fill);
328 if (is_solid)
329 src_input = ureg_DECL_fs_input(ureg,
330 TGSI_SEMANTIC_COLOR,
331 0,
332 TGSI_INTERPOLATE_PERSPECTIVE);
333 else
334 src_input = ureg_DECL_fs_input(ureg,
335 TGSI_SEMANTIC_POSITION,
336 0,
337 TGSI_INTERPOLATE_PERSPECTIVE);
338 }
339
340 if (has_mask) {
341 mask_sampler = ureg_DECL_sampler(ureg, 1);
342 mask_pos = ureg_DECL_fs_input(ureg,
343 TGSI_SEMANTIC_GENERIC,
344 1,
345 TGSI_INTERPOLATE_PERSPECTIVE);
346 }
347
348 #if 0 /* unused right now */
349 dst_sampler = ureg_DECL_sampler(ureg, 2);
350 dst_pos = ureg_DECL_fs_input(ureg,
351 TGSI_SEMANTIC_POSITION,
352 2,
353 TGSI_INTERPOLATE_PERSPECTIVE);
354 #endif
355
356 if (is_composite) {
357 if (has_mask)
358 src = ureg_DECL_temporary(ureg);
359 else
360 src = out;
361 ureg_TEX(ureg, src,
362 TGSI_TEXTURE_2D, src_input, src_sampler);
363 } else if (is_fill) {
364 if (is_solid) {
365 if (has_mask)
366 src = ureg_dst(src_input);
367 else
368 ureg_MOV(ureg, out, src_input);
369 } else if (is_lingrad || is_radgrad) {
370 struct ureg_src coords, const0124,
371 matrow0, matrow1, matrow2;
372
373 if (has_mask)
374 src = ureg_DECL_temporary(ureg);
375 else
376 src = out;
377
378 coords = ureg_DECL_constant(ureg, 0);
379 const0124 = ureg_DECL_constant(ureg, 1);
380 matrow0 = ureg_DECL_constant(ureg, 2);
381 matrow1 = ureg_DECL_constant(ureg, 3);
382 matrow2 = ureg_DECL_constant(ureg, 4);
383
384 if (is_lingrad) {
385 linear_gradient(ureg, src,
386 src_input, src_sampler,
387 coords, const0124,
388 matrow0, matrow1, matrow2);
389 } else if (is_radgrad) {
390 radial_gradient(ureg, src,
391 src_input, src_sampler,
392 coords, const0124,
393 matrow0, matrow1, matrow2);
394 }
395 } else
396 debug_assert(!"Unknown fill type!");
397 }
398
399 if (has_mask) {
400 mask = ureg_DECL_temporary(ureg);
401 ureg_TEX(ureg, mask,
402 TGSI_TEXTURE_2D, mask_pos, mask_sampler);
403 /* src IN mask */
404 src_in_mask(ureg, out, ureg_src(src), ureg_src(mask));
405 ureg_release_temporary(ureg, mask);
406 }
407
408 ureg_END(ureg);
409
410 return ureg_create_shader_and_destroy(ureg, pipe);
411 }
412
413 struct xorg_shaders * xorg_shaders_create(struct xorg_renderer *r)
414 {
415 struct xorg_shaders *sc = CALLOC_STRUCT(xorg_shaders);
416
417 sc->r = r;
418 sc->vs_hash = cso_hash_create();
419 sc->fs_hash = cso_hash_create();
420
421 return sc;
422 }
423
424 static void
425 cache_destroy(struct cso_context *cso,
426 struct cso_hash *hash,
427 unsigned processor)
428 {
429 struct cso_hash_iter iter = cso_hash_first_node(hash);
430 while (!cso_hash_iter_is_null(iter)) {
431 void *shader = (void *)cso_hash_iter_data(iter);
432 if (processor == PIPE_SHADER_FRAGMENT) {
433 cso_delete_fragment_shader(cso, shader);
434 } else if (processor == PIPE_SHADER_VERTEX) {
435 cso_delete_vertex_shader(cso, shader);
436 }
437 iter = cso_hash_erase(hash, iter);
438 }
439 cso_hash_delete(hash);
440 }
441
442 void xorg_shaders_destroy(struct xorg_shaders *sc)
443 {
444 cache_destroy(sc->r->cso, sc->vs_hash,
445 PIPE_SHADER_VERTEX);
446 cache_destroy(sc->r->cso, sc->fs_hash,
447 PIPE_SHADER_FRAGMENT);
448
449 free(sc);
450 }
451
452 static INLINE void *
453 shader_from_cache(struct pipe_context *pipe,
454 unsigned type,
455 struct cso_hash *hash,
456 unsigned key)
457 {
458 void *shader = 0;
459
460 struct cso_hash_iter iter = cso_hash_find(hash, key);
461
462 if (cso_hash_iter_is_null(iter)) {
463 if (type == PIPE_SHADER_VERTEX)
464 shader = create_vs(pipe, key);
465 else
466 shader = create_fs(pipe, key);
467 cso_hash_insert(hash, key, shader);
468 } else
469 shader = (void *)cso_hash_iter_data(iter);
470
471 return shader;
472 }
473
474 struct xorg_shader xorg_shaders_get(struct xorg_shaders *sc,
475 unsigned vs_traits,
476 unsigned fs_traits)
477 {
478 struct xorg_shader shader = { NULL, NULL };
479 void *vs, *fs;
480
481 vs = shader_from_cache(sc->r->pipe, PIPE_SHADER_VERTEX,
482 sc->vs_hash, vs_traits);
483 fs = shader_from_cache(sc->r->pipe, PIPE_SHADER_FRAGMENT,
484 sc->fs_hash, fs_traits);
485
486 debug_assert(vs && fs);
487 if (!vs || !fs)
488 return shader;
489
490 shader.vs = vs;
491 shader.fs = fs;
492
493 return shader;
494 }