r300: Zero-initialize register for NV_vertex_program
[mesa.git] / src / gallium / state_trackers / xorg / xorg_exa_tgsi.c
1 #include "xorg_exa_tgsi.h"
2
3 /*### stupidity defined in X11/extensions/XI.h */
4 #undef Absolute
5
6 #include "pipe/p_format.h"
7 #include "pipe/p_context.h"
8 #include "pipe/p_state.h"
9 #include "pipe/p_inlines.h"
10 #include "pipe/p_shader_tokens.h"
11
12 #include "util/u_memory.h"
13 #include "util/u_simple_shaders.h"
14
15 #include "tgsi/tgsi_ureg.h"
16
17 #include "cso_cache/cso_context.h"
18 #include "cso_cache/cso_hash.h"
19
20 /* Vertex shader:
21 * IN[0] = vertex pos
22 * IN[1] = src tex coord | solid fill color
23 * IN[2] = mask tex coord
24 * IN[3] = dst tex coord
25 * CONST[0] = (2/dst_width, 2/dst_height, 1, 1)
26 * CONST[1] = (-1, -1, 0, 0)
27 *
28 * OUT[0] = vertex pos
29 * OUT[1] = src tex coord | solid fill color
30 * OUT[2] = mask tex coord
31 * OUT[3] = dst tex coord
32 */
33
34 /* Fragment shader:
35 * SAMP[0] = src
36 * SAMP[1] = mask
37 * SAMP[2] = dst
38 * IN[0] = pos src | solid fill color
39 * IN[1] = pos mask
40 * IN[2] = pos dst
41 * CONST[0] = (0, 0, 0, 1)
42 *
43 * OUT[0] = color
44 */
45
46 struct xorg_shaders {
47 struct exa_context *exa;
48
49 struct cso_hash *vs_hash;
50 struct cso_hash *fs_hash;
51 };
52
53 static const char over_op[] =
54 "SUB TEMP[3], CONST[0].wwww, TEMP[1].wwww\n"
55 "MUL TEMP[3], TEMP[0], TEMP[3]\n"
56 "ADD TEMP[0], TEMP[3], TEMP[0]\n";
57
58
59 static INLINE void
60 create_preamble(struct ureg_program *ureg)
61 {
62 }
63
64
65 static INLINE void
66 src_in_mask(struct ureg_program *ureg,
67 struct ureg_dst dst,
68 struct ureg_src src,
69 struct ureg_src mask)
70 {
71 /* MUL dst, src, mask.wwww */
72 ureg_MUL(ureg, dst, src,
73 ureg_scalar(mask, TGSI_SWIZZLE_W));
74 }
75
76 static struct ureg_src
77 vs_normalize_coords(struct ureg_program *ureg, struct ureg_src coords,
78 struct ureg_src const0, struct ureg_src const1)
79 {
80 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
81 struct ureg_src ret;
82 ureg_MUL(ureg, tmp, coords, const0);
83 ureg_ADD(ureg, tmp, ureg_src(tmp), const1);
84 ret = ureg_src(tmp);
85 ureg_release_temporary(ureg, tmp);
86 return ret;
87 }
88
89 static void
90 linear_gradient(struct ureg_program *ureg,
91 struct ureg_dst out,
92 struct ureg_src pos,
93 struct ureg_src sampler,
94 struct ureg_src coords,
95 struct ureg_src const0124,
96 struct ureg_src matrow0,
97 struct ureg_src matrow1,
98 struct ureg_src matrow2)
99 {
100 struct ureg_dst temp0 = ureg_DECL_temporary(ureg);
101 struct ureg_dst temp1 = ureg_DECL_temporary(ureg);
102 struct ureg_dst temp2 = ureg_DECL_temporary(ureg);
103 struct ureg_dst temp3 = ureg_DECL_temporary(ureg);
104 struct ureg_dst temp4 = ureg_DECL_temporary(ureg);
105 struct ureg_dst temp5 = ureg_DECL_temporary(ureg);
106
107 ureg_MOV(ureg,
108 ureg_writemask(temp0, TGSI_WRITEMASK_XY), pos);
109 ureg_MOV(ureg,
110 ureg_writemask(temp0, TGSI_WRITEMASK_Z),
111 ureg_scalar(const0124, TGSI_SWIZZLE_Y));
112
113 ureg_DP3(ureg, temp1, matrow0, ureg_src(temp0));
114 ureg_DP3(ureg, temp2, matrow1, ureg_src(temp0));
115 ureg_DP3(ureg, temp3, matrow2, ureg_src(temp0));
116 ureg_RCP(ureg, temp3, ureg_src(temp3));
117 ureg_MUL(ureg, temp1, ureg_src(temp1), ureg_src(temp3));
118 ureg_MUL(ureg, temp2, ureg_src(temp2), ureg_src(temp3));
119
120 ureg_MOV(ureg, ureg_writemask(temp4, TGSI_WRITEMASK_X),
121 ureg_src(temp1));
122 ureg_MOV(ureg, ureg_writemask(temp4, TGSI_WRITEMASK_Y),
123 ureg_src(temp2));
124
125 ureg_MUL(ureg, temp0,
126 ureg_scalar(coords, TGSI_SWIZZLE_Y),
127 ureg_scalar(ureg_src(temp4), TGSI_SWIZZLE_Y));
128 ureg_MAD(ureg, temp1,
129 ureg_scalar(coords, TGSI_SWIZZLE_X),
130 ureg_scalar(ureg_src(temp4), TGSI_SWIZZLE_X),
131 ureg_src(temp0));
132
133 ureg_MUL(ureg, temp2,
134 ureg_src(temp1),
135 ureg_scalar(coords, TGSI_SWIZZLE_Z));
136
137 ureg_TEX(ureg, out,
138 TGSI_TEXTURE_1D, ureg_src(temp2), sampler);
139
140 ureg_release_temporary(ureg, temp0);
141 ureg_release_temporary(ureg, temp1);
142 ureg_release_temporary(ureg, temp2);
143 ureg_release_temporary(ureg, temp3);
144 ureg_release_temporary(ureg, temp4);
145 ureg_release_temporary(ureg, temp5);
146 }
147
148
149 static void
150 radial_gradient(struct ureg_program *ureg,
151 struct ureg_dst out,
152 struct ureg_src pos,
153 struct ureg_src sampler,
154 struct ureg_src coords,
155 struct ureg_src const0124,
156 struct ureg_src matrow0,
157 struct ureg_src matrow1,
158 struct ureg_src matrow2)
159 {
160 struct ureg_dst temp0 = ureg_DECL_temporary(ureg);
161 struct ureg_dst temp1 = ureg_DECL_temporary(ureg);
162 struct ureg_dst temp2 = ureg_DECL_temporary(ureg);
163 struct ureg_dst temp3 = ureg_DECL_temporary(ureg);
164 struct ureg_dst temp4 = ureg_DECL_temporary(ureg);
165 struct ureg_dst temp5 = ureg_DECL_temporary(ureg);
166
167 ureg_MOV(ureg,
168 ureg_writemask(temp0, TGSI_WRITEMASK_XY),
169 pos);
170 ureg_MOV(ureg,
171 ureg_writemask(temp0, TGSI_WRITEMASK_Z),
172 ureg_scalar(const0124, TGSI_SWIZZLE_Y));
173
174 ureg_DP3(ureg, temp1, matrow0, ureg_src(temp0));
175 ureg_DP3(ureg, temp2, matrow1, ureg_src(temp0));
176 ureg_DP3(ureg, temp3, matrow2, ureg_src(temp0));
177 ureg_RCP(ureg, temp3, ureg_src(temp3));
178 ureg_MUL(ureg, temp1, ureg_src(temp1), ureg_src(temp3));
179 ureg_MUL(ureg, temp2, ureg_src(temp2), ureg_src(temp3));
180
181 ureg_MOV(ureg, ureg_writemask(temp5, TGSI_WRITEMASK_X),
182 ureg_src(temp1));
183 ureg_MOV(ureg, ureg_writemask(temp5, TGSI_WRITEMASK_Y),
184 ureg_src(temp2));
185
186 ureg_MUL(ureg, temp0, ureg_scalar(coords, TGSI_SWIZZLE_Y),
187 ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_Y));
188 ureg_MAD(ureg, temp1,
189 ureg_scalar(coords, TGSI_SWIZZLE_X),
190 ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_X),
191 ureg_src(temp0));
192 ureg_ADD(ureg, temp1,
193 ureg_src(temp1), ureg_src(temp1));
194 ureg_MUL(ureg, temp3,
195 ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_Y),
196 ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_Y));
197 ureg_MAD(ureg, temp4,
198 ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_X),
199 ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_X),
200 ureg_src(temp3));
201 ureg_MOV(ureg, temp4, ureg_negate(ureg_src(temp4)));
202 ureg_MUL(ureg, temp2,
203 ureg_scalar(coords, TGSI_SWIZZLE_Z),
204 ureg_src(temp4));
205 ureg_MUL(ureg, temp0,
206 ureg_scalar(const0124, TGSI_SWIZZLE_W),
207 ureg_src(temp2));
208 ureg_MUL(ureg, temp3,
209 ureg_src(temp1), ureg_src(temp1));
210 ureg_SUB(ureg, temp2,
211 ureg_src(temp3), ureg_src(temp0));
212 ureg_RSQ(ureg, temp2, ureg_abs(ureg_src(temp2)));
213 ureg_RCP(ureg, temp2, ureg_src(temp2));
214 ureg_SUB(ureg, temp1,
215 ureg_src(temp2), ureg_src(temp1));
216 ureg_ADD(ureg, temp0,
217 ureg_scalar(coords, TGSI_SWIZZLE_Z),
218 ureg_scalar(coords, TGSI_SWIZZLE_Z));
219 ureg_RCP(ureg, temp0, ureg_src(temp0));
220 ureg_MUL(ureg, temp2,
221 ureg_src(temp1), ureg_src(temp0));
222 ureg_TEX(ureg, out, TGSI_TEXTURE_1D,
223 ureg_src(temp2), sampler);
224
225 ureg_release_temporary(ureg, temp0);
226 ureg_release_temporary(ureg, temp1);
227 ureg_release_temporary(ureg, temp2);
228 ureg_release_temporary(ureg, temp3);
229 ureg_release_temporary(ureg, temp4);
230 ureg_release_temporary(ureg, temp5);
231 }
232
233 static void *
234 create_vs(struct pipe_context *pipe,
235 unsigned vs_traits)
236 {
237 struct ureg_program *ureg;
238 struct ureg_src src;
239 struct ureg_dst dst;
240 struct ureg_src const0, const1;
241 boolean is_fill = vs_traits & VS_FILL;
242 boolean is_composite = vs_traits & VS_COMPOSITE;
243 boolean has_mask = vs_traits & VS_MASK;
244
245 ureg = ureg_create(TGSI_PROCESSOR_VERTEX);
246 if (ureg == NULL)
247 return 0;
248
249 const0 = ureg_DECL_constant(ureg);
250 const1 = ureg_DECL_constant(ureg);
251
252 /* it has to be either a fill or a composite op */
253 debug_assert(is_fill ^ is_composite);
254
255 src = ureg_DECL_vs_input(ureg,
256 TGSI_SEMANTIC_POSITION, 0);
257 dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0);
258 src = vs_normalize_coords(ureg, src,
259 const0, const1);
260 ureg_MOV(ureg, dst, src);
261
262
263 if (is_composite) {
264 src = ureg_DECL_vs_input(ureg,
265 TGSI_SEMANTIC_GENERIC, 1);
266 dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 1);
267 ureg_MOV(ureg, dst, src);
268 }
269 if (is_fill) {
270 src = ureg_DECL_vs_input(ureg,
271 TGSI_SEMANTIC_COLOR, 1);
272 dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1);
273 ureg_MOV(ureg, dst, src);
274 }
275
276 if (has_mask) {
277 src = ureg_DECL_vs_input(ureg,
278 TGSI_SEMANTIC_GENERIC, 2);
279 dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 2);
280 ureg_MOV(ureg, dst, src);
281 }
282
283 ureg_END(ureg);
284
285 return ureg_create_shader_and_destroy(ureg, pipe);
286 }
287
288 static void *
289 create_fs(struct pipe_context *pipe,
290 unsigned fs_traits)
291 {
292 struct ureg_program *ureg;
293 struct ureg_src /*dst_sampler,*/ src_sampler, mask_sampler;
294 struct ureg_src /*dst_pos,*/ src_input, mask_pos;
295 struct ureg_dst src, mask;
296 struct ureg_dst out;
297 boolean has_mask = fs_traits & FS_MASK;
298 boolean is_fill = fs_traits & FS_FILL;
299 boolean is_composite = fs_traits & FS_COMPOSITE;
300 boolean is_solid = fs_traits & FS_SOLID_FILL;
301 boolean is_lingrad = fs_traits & FS_LINGRAD_FILL;
302 boolean is_radgrad = fs_traits & FS_RADGRAD_FILL;
303
304 ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
305 if (ureg == NULL)
306 return 0;
307
308 /* it has to be either a fill or a composite op */
309 debug_assert(is_fill ^ is_composite);
310
311 out = ureg_DECL_output(ureg,
312 TGSI_SEMANTIC_COLOR,
313 0);
314
315 if (is_composite) {
316 src_sampler = ureg_DECL_sampler(ureg, 0);
317 src_input = ureg_DECL_fs_input(ureg,
318 TGSI_SEMANTIC_POSITION,
319 0,
320 TGSI_INTERPOLATE_PERSPECTIVE);
321 }
322 if (is_fill) {
323 if (is_solid)
324 src_input = ureg_DECL_fs_input(ureg,
325 TGSI_SEMANTIC_COLOR,
326 0,
327 TGSI_INTERPOLATE_PERSPECTIVE);
328 else
329 src_input = ureg_DECL_fs_input(ureg,
330 TGSI_SEMANTIC_POSITION,
331 0,
332 TGSI_INTERPOLATE_PERSPECTIVE);
333 }
334
335 if (has_mask) {
336 mask_sampler = ureg_DECL_sampler(ureg, 1);
337 mask_pos = ureg_DECL_fs_input(ureg,
338 TGSI_SEMANTIC_POSITION,
339 1,
340 TGSI_INTERPOLATE_PERSPECTIVE);
341 }
342
343 #if 0 /* unused right now */
344 dst_sampler = ureg_DECL_sampler(ureg, 2);
345 dst_pos = ureg_DECL_fs_input(ureg,
346 TGSI_SEMANTIC_POSITION,
347 2,
348 TGSI_INTERPOLATE_PERSPECTIVE);
349 #endif
350
351 if (is_composite) {
352 if (has_mask)
353 src = ureg_DECL_temporary(ureg);
354 else
355 src = out;
356 ureg_TEX(ureg, src,
357 TGSI_TEXTURE_2D, src_input, src_sampler);
358 } else if (is_fill) {
359 if (is_solid) {
360 if (has_mask)
361 src = ureg_dst(src_input);
362 else
363 ureg_MOV(ureg, out, src_input);
364 } else if (is_lingrad || is_radgrad) {
365 struct ureg_src coords, const0124,
366 matrow0, matrow1, matrow2;
367
368 if (has_mask)
369 src = ureg_DECL_temporary(ureg);
370 else
371 src = out;
372
373 coords = ureg_DECL_constant(ureg);
374 const0124 = ureg_DECL_constant(ureg);
375 matrow0 = ureg_DECL_constant(ureg);
376 matrow1 = ureg_DECL_constant(ureg);
377 matrow2 = ureg_DECL_constant(ureg);
378
379 if (is_lingrad) {
380 linear_gradient(ureg, src,
381 src_input, src_sampler,
382 coords, const0124,
383 matrow0, matrow1, matrow2);
384 } else if (is_radgrad) {
385 radial_gradient(ureg, src,
386 src_input, src_sampler,
387 coords, const0124,
388 matrow0, matrow1, matrow2);
389 }
390 } else
391 debug_assert(!"Unknown fill type!");
392 }
393
394 if (has_mask) {
395 mask = ureg_DECL_temporary(ureg);
396 ureg_TEX(ureg, mask,
397 TGSI_TEXTURE_2D, mask_pos, mask_sampler);
398 /* src IN mask */
399 src_in_mask(ureg, out, ureg_src(src), ureg_src(mask));
400 ureg_release_temporary(ureg, mask);
401 }
402
403 ureg_END(ureg);
404
405 return ureg_create_shader_and_destroy(ureg, pipe);
406 }
407
408 struct xorg_shaders * xorg_shaders_create(struct exa_context *exa)
409 {
410 struct xorg_shaders *sc = CALLOC_STRUCT(xorg_shaders);
411
412 sc->exa = exa;
413 sc->vs_hash = cso_hash_create();
414 sc->fs_hash = cso_hash_create();
415
416 return sc;
417 }
418
419 static void
420 cache_destroy(struct cso_context *cso,
421 struct cso_hash *hash,
422 unsigned processor)
423 {
424 struct cso_hash_iter iter = cso_hash_first_node(hash);
425 while (!cso_hash_iter_is_null(iter)) {
426 void *shader = (void *)cso_hash_iter_data(iter);
427 if (processor == PIPE_SHADER_FRAGMENT) {
428 cso_delete_fragment_shader(cso, shader);
429 } else if (processor == PIPE_SHADER_VERTEX) {
430 cso_delete_vertex_shader(cso, shader);
431 }
432 iter = cso_hash_erase(hash, iter);
433 }
434 cso_hash_delete(hash);
435 }
436
437 void xorg_shaders_destroy(struct xorg_shaders *sc)
438 {
439 cache_destroy(sc->exa->cso, sc->vs_hash,
440 PIPE_SHADER_VERTEX);
441 cache_destroy(sc->exa->cso, sc->fs_hash,
442 PIPE_SHADER_FRAGMENT);
443
444 free(sc);
445 }
446
447 static INLINE void *
448 shader_from_cache(struct pipe_context *pipe,
449 unsigned type,
450 struct cso_hash *hash,
451 unsigned key)
452 {
453 void *shader = 0;
454
455 struct cso_hash_iter iter = cso_hash_find(hash, key);
456
457 if (cso_hash_iter_is_null(iter)) {
458 if (type == PIPE_SHADER_VERTEX)
459 shader = create_vs(pipe, key);
460 else
461 shader = create_fs(pipe, key);
462 cso_hash_insert(hash, key, shader);
463 } else
464 shader = (void *)cso_hash_iter_data(iter);
465
466 return shader;
467 }
468
469 struct xorg_shader xorg_shaders_get(struct xorg_shaders *sc,
470 unsigned vs_traits,
471 unsigned fs_traits)
472 {
473 struct xorg_shader shader = {0};
474 void *vs, *fs;
475
476 vs = shader_from_cache(sc->exa->ctx, PIPE_SHADER_VERTEX,
477 sc->vs_hash, vs_traits);
478 fs = shader_from_cache(sc->exa->ctx, PIPE_SHADER_FRAGMENT,
479 sc->fs_hash, fs_traits);
480
481 debug_assert(vs && fs);
482 if (!vs || !fs)
483 return shader;
484
485 shader.vs = vs;
486 shader.fs = fs;
487
488 return shader;
489 }