st/vega: Move masking after blending.
[mesa.git] / src / gallium / state_trackers / vega / asm_fill.h
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc. All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial portions
15 * of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
20 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 **************************************************************************/
26
27 #ifndef ASM_FILL_H
28 #define ASM_FILL_H
29
30 #include "tgsi/tgsi_ureg.h"
31
32 typedef void (* ureg_func)( struct ureg_program *ureg,
33 struct ureg_dst *out,
34 struct ureg_src *in,
35 struct ureg_src *sampler,
36 struct ureg_dst *temp,
37 struct ureg_src *constant);
38
39 static INLINE void
40 solid_fill( struct ureg_program *ureg,
41 struct ureg_dst *out,
42 struct ureg_src *in,
43 struct ureg_src *sampler,
44 struct ureg_dst *temp,
45 struct ureg_src *constant)
46 {
47 ureg_MOV(ureg, *out, constant[2]);
48 }
49
50 /**
51 * Perform frag-coord-to-paint-coord transform. The transformation is in
52 * CONST[4..6].
53 */
54 #define PAINT_TRANSFORM \
55 ureg_MOV(ureg, ureg_writemask(temp[0], TGSI_WRITEMASK_XY), in[0]); \
56 ureg_MOV(ureg, \
57 ureg_writemask(temp[0], TGSI_WRITEMASK_Z), \
58 ureg_scalar(constant[3], TGSI_SWIZZLE_Y)); \
59 ureg_DP3(ureg, temp[1], constant[4], ureg_src(temp[0])); \
60 ureg_DP3(ureg, temp[2], constant[5], ureg_src(temp[0])); \
61 ureg_DP3(ureg, temp[3], constant[6], ureg_src(temp[0])); \
62 ureg_RCP(ureg, temp[3], ureg_src(temp[3])); \
63 ureg_MUL(ureg, temp[1], ureg_src(temp[1]), ureg_src(temp[3])); \
64 ureg_MUL(ureg, temp[2], ureg_src(temp[2]), ureg_src(temp[3])); \
65 ureg_MOV(ureg, \
66 ureg_writemask(temp[4], TGSI_WRITEMASK_X), \
67 ureg_src(temp[1])); \
68 ureg_MOV(ureg, \
69 ureg_writemask(temp[4], TGSI_WRITEMASK_Y), \
70 ureg_src(temp[2]));
71
72 static INLINE void
73 linear_grad( struct ureg_program *ureg,
74 struct ureg_dst *out,
75 struct ureg_src *in,
76 struct ureg_src *sampler,
77 struct ureg_dst *temp,
78 struct ureg_src *constant)
79 {
80 PAINT_TRANSFORM
81
82 /* grad = DP2((x, y), CONST[2].xy) * CONST[2].z */
83 ureg_MUL(ureg, temp[0],
84 ureg_scalar(constant[2], TGSI_SWIZZLE_Y),
85 ureg_scalar(ureg_src(temp[4]), TGSI_SWIZZLE_Y));
86 ureg_MAD(ureg, temp[1],
87 ureg_scalar(constant[2], TGSI_SWIZZLE_X),
88 ureg_scalar(ureg_src(temp[4]), TGSI_SWIZZLE_X),
89 ureg_src(temp[0]));
90 ureg_MUL(ureg, temp[2], ureg_src(temp[1]),
91 ureg_scalar(constant[2], TGSI_SWIZZLE_Z));
92
93 ureg_TEX(ureg, *out, TGSI_TEXTURE_1D, ureg_src(temp[2]), sampler[0]);
94 }
95
96 static INLINE void
97 radial_grad( struct ureg_program *ureg,
98 struct ureg_dst *out,
99 struct ureg_src *in,
100 struct ureg_src *sampler,
101 struct ureg_dst *temp,
102 struct ureg_src *constant)
103 {
104 PAINT_TRANSFORM
105
106 /*
107 * Calculate (sqrt(B^2 + AC) - B) / A, where
108 *
109 * A is CONST[2].z,
110 * B is DP2((x, y), CONST[2].xy), and
111 * C is DP2((x, y), (x, y)).
112 */
113
114 /* B and C */
115 ureg_DP2(ureg, temp[0], ureg_src(temp[4]), constant[2]);
116 ureg_DP2(ureg, temp[1], ureg_src(temp[4]), ureg_src(temp[4]));
117
118 /* the square root */
119 ureg_MUL(ureg, temp[2], ureg_src(temp[0]), ureg_src(temp[0]));
120 ureg_MAD(ureg, temp[3], ureg_src(temp[1]),
121 ureg_scalar(constant[2], TGSI_SWIZZLE_Z), ureg_src(temp[2]));
122 ureg_RSQ(ureg, temp[3], ureg_src(temp[3]));
123 ureg_RCP(ureg, temp[3], ureg_src(temp[3]));
124
125 ureg_SUB(ureg, temp[3], ureg_src(temp[3]), ureg_src(temp[0]));
126 ureg_RCP(ureg, temp[0], ureg_scalar(constant[2], TGSI_SWIZZLE_Z));
127 ureg_MUL(ureg, temp[0], ureg_src(temp[0]), ureg_src(temp[3]));
128
129 ureg_TEX(ureg, *out, TGSI_TEXTURE_1D, ureg_src(temp[0]), sampler[0]);
130 }
131
132
133 static INLINE void
134 pattern( struct ureg_program *ureg,
135 struct ureg_dst *out,
136 struct ureg_src *in,
137 struct ureg_src *sampler,
138 struct ureg_dst *temp,
139 struct ureg_src *constant)
140 {
141 PAINT_TRANSFORM
142
143 /* (s, t) = (x / tex_width, y / tex_height) */
144 ureg_RCP(ureg, temp[0],
145 ureg_swizzle(constant[3],
146 TGSI_SWIZZLE_Z,
147 TGSI_SWIZZLE_W,
148 TGSI_SWIZZLE_Z,
149 TGSI_SWIZZLE_W));
150 ureg_MOV(ureg, temp[1], ureg_src(temp[4]));
151 ureg_MUL(ureg,
152 ureg_writemask(temp[1], TGSI_WRITEMASK_X),
153 ureg_src(temp[1]),
154 ureg_src(temp[0]));
155 ureg_MUL(ureg,
156 ureg_writemask(temp[1], TGSI_WRITEMASK_Y),
157 ureg_src(temp[1]),
158 ureg_src(temp[0]));
159
160 ureg_TEX(ureg, *out, TGSI_TEXTURE_2D, ureg_src(temp[1]), sampler[0]);
161 }
162
163 static INLINE void
164 paint_degenerate( struct ureg_program *ureg,
165 struct ureg_dst *out,
166 struct ureg_src *in,
167 struct ureg_src *sampler,
168 struct ureg_dst *temp,
169 struct ureg_src *constant)
170 {
171 /* CONST[3].y is 1.0f */
172 ureg_MOV(ureg, temp[1], ureg_scalar(constant[3], TGSI_SWIZZLE_Y));
173 ureg_TEX(ureg, *out, TGSI_TEXTURE_1D, ureg_src(temp[1]), sampler[0]);
174 }
175
176 static INLINE void
177 image_normal( struct ureg_program *ureg,
178 struct ureg_dst *out,
179 struct ureg_src *in,
180 struct ureg_src *sampler,
181 struct ureg_dst *temp,
182 struct ureg_src *constant)
183 {
184 ureg_TEX(ureg, *out, TGSI_TEXTURE_2D, in[1], sampler[3]);
185 }
186
187
188 static INLINE void
189 image_multiply( struct ureg_program *ureg,
190 struct ureg_dst *out,
191 struct ureg_src *in,
192 struct ureg_src *sampler,
193 struct ureg_dst *temp,
194 struct ureg_src *constant)
195 {
196 ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[1], sampler[3]);
197 ureg_MUL(ureg, *out, ureg_src(temp[0]), ureg_src(temp[1]));
198 }
199
200
201 static INLINE void
202 image_stencil( struct ureg_program *ureg,
203 struct ureg_dst *out,
204 struct ureg_src *in,
205 struct ureg_src *sampler,
206 struct ureg_dst *temp,
207 struct ureg_src *constant)
208 {
209 ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[1], sampler[3]);
210 ureg_MUL(ureg, *out, ureg_src(temp[0]), ureg_src(temp[1]));
211 }
212
213 static INLINE void
214 color_transform( struct ureg_program *ureg,
215 struct ureg_dst *out,
216 struct ureg_src *in,
217 struct ureg_src *sampler,
218 struct ureg_dst *temp,
219 struct ureg_src *constant)
220 {
221 ureg_MAD(ureg, temp[1], ureg_src(temp[0]), constant[0], constant[1]);
222 /* clamp to [0.0f, 1.0f] */
223 ureg_CLAMP(ureg, temp[1],
224 ureg_src(temp[1]),
225 ureg_scalar(constant[3], TGSI_SWIZZLE_X),
226 ureg_scalar(constant[3], TGSI_SWIZZLE_Y));
227 ureg_MOV(ureg, *out, ureg_src(temp[1]));
228 }
229
230 /**
231 * Emit instructions for the specified blend mode. Colors should be
232 * premultiplied. Two temporary registers are required.
233 *
234 * XXX callers do not pass premultiplied colors!
235 */
236 static INLINE void
237 blend_generic(struct ureg_program *ureg,
238 VGBlendMode mode,
239 struct ureg_dst out,
240 struct ureg_src src,
241 struct ureg_src dst,
242 struct ureg_src src_channel_alpha,
243 struct ureg_src one,
244 struct ureg_dst temp[2])
245 {
246 switch (mode) {
247 case VG_BLEND_SRC:
248 ureg_MOV(ureg, out, src);
249 break;
250 case VG_BLEND_SRC_OVER:
251 /* RGBA_out = RGBA_src + (1 - A_src) * RGBA_dst */
252 ureg_SUB(ureg, temp[0], one, src_channel_alpha);
253 ureg_MAD(ureg, out, ureg_src(temp[0]), dst, src);
254 break;
255 case VG_BLEND_DST_OVER:
256 /* RGBA_out = RGBA_dst + (1 - A_dst) * RGBA_src */
257 ureg_SUB(ureg, temp[0], one, ureg_scalar(dst, TGSI_SWIZZLE_W));
258 ureg_MAD(ureg, out, ureg_src(temp[0]), src, dst);
259 break;
260 case VG_BLEND_SRC_IN:
261 ureg_MUL(ureg, out, src, ureg_scalar(dst, TGSI_SWIZZLE_W));
262 break;
263 case VG_BLEND_DST_IN:
264 ureg_MUL(ureg, out, dst, src_channel_alpha);
265 break;
266 case VG_BLEND_MULTIPLY:
267 /*
268 * RGB_out = (1 - A_dst) * RGB_src + (1 - A_src) * RGB_dst +
269 * RGB_src * RGB_dst
270 */
271 ureg_MAD(ureg, temp[0],
272 ureg_scalar(dst, TGSI_SWIZZLE_W), ureg_negate(src), src);
273 ureg_MAD(ureg, temp[1],
274 src_channel_alpha, ureg_negate(dst), dst);
275 ureg_MAD(ureg, temp[1], src, dst, ureg_src(temp[1]));
276 ureg_ADD(ureg, out, ureg_src(temp[0]), ureg_src(temp[1]));
277 /* alpha is src over */
278 ureg_ADD(ureg, ureg_writemask(out, TGSI_WRITEMASK_W),
279 src, ureg_src(temp[1]));
280 break;
281 case VG_BLEND_SCREEN:
282 /* RGBA_out = RGBA_src + (1 - RGBA_src) * RGBA_dst */
283 ureg_SUB(ureg, temp[0], one, src);
284 ureg_MAD(ureg, out, ureg_src(temp[0]), dst, src);
285 break;
286 case VG_BLEND_DARKEN:
287 case VG_BLEND_LIGHTEN:
288 /* src over */
289 ureg_SUB(ureg, temp[0], one, src_channel_alpha);
290 ureg_MAD(ureg, temp[0], ureg_src(temp[0]), dst, src);
291 /* dst over */
292 ureg_SUB(ureg, temp[1], one, ureg_scalar(dst, TGSI_SWIZZLE_W));
293 ureg_MAD(ureg, temp[1], ureg_src(temp[1]), src, dst);
294 /* take min/max for colors */
295 if (mode == VG_BLEND_DARKEN) {
296 ureg_MIN(ureg, ureg_writemask(out, TGSI_WRITEMASK_XYZ),
297 ureg_src(temp[0]), ureg_src(temp[1]));
298 }
299 else {
300 ureg_MAX(ureg, ureg_writemask(out, TGSI_WRITEMASK_XYZ),
301 ureg_src(temp[0]), ureg_src(temp[1]));
302 }
303 break;
304 case VG_BLEND_ADDITIVE:
305 /* RGBA_out = RGBA_src + RGBA_dst */
306 ureg_ADD(ureg, temp[0], src, dst);
307 ureg_MIN(ureg, out, ureg_src(temp[0]), one);
308 break;
309 default:
310 assert(0);
311 break;
312 }
313 }
314
315 static INLINE void
316 blend_multiply( struct ureg_program *ureg,
317 struct ureg_dst *out,
318 struct ureg_src *in,
319 struct ureg_src *sampler,
320 struct ureg_dst *temp,
321 struct ureg_src *constant)
322 {
323 ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[2]);
324 blend_generic(ureg, VG_BLEND_MULTIPLY, *out,
325 ureg_src(temp[0]),
326 ureg_src(temp[1]),
327 ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W),
328 ureg_scalar(constant[3], TGSI_SWIZZLE_Y),
329 temp + 2);
330 }
331
332 static INLINE void
333 blend_screen( struct ureg_program *ureg,
334 struct ureg_dst *out,
335 struct ureg_src *in,
336 struct ureg_src *sampler,
337 struct ureg_dst *temp,
338 struct ureg_src *constant)
339 {
340 ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[2]);
341 blend_generic(ureg, VG_BLEND_SCREEN, *out,
342 ureg_src(temp[0]),
343 ureg_src(temp[1]),
344 ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W),
345 ureg_scalar(constant[3], TGSI_SWIZZLE_Y),
346 temp + 2);
347 }
348
349 static INLINE void
350 blend_darken( struct ureg_program *ureg,
351 struct ureg_dst *out,
352 struct ureg_src *in,
353 struct ureg_src *sampler,
354 struct ureg_dst *temp,
355 struct ureg_src *constant)
356 {
357 ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[2]);
358 blend_generic(ureg, VG_BLEND_DARKEN, *out,
359 ureg_src(temp[0]),
360 ureg_src(temp[1]),
361 ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W),
362 ureg_scalar(constant[3], TGSI_SWIZZLE_Y),
363 temp + 2);
364 }
365
366 static INLINE void
367 blend_lighten( struct ureg_program *ureg,
368 struct ureg_dst *out,
369 struct ureg_src *in,
370 struct ureg_src *sampler,
371 struct ureg_dst *temp,
372 struct ureg_src *constant)
373 {
374 ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[2]);
375 blend_generic(ureg, VG_BLEND_LIGHTEN, *out,
376 ureg_src(temp[0]),
377 ureg_src(temp[1]),
378 ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W),
379 ureg_scalar(constant[3], TGSI_SWIZZLE_Y),
380 temp + 2);
381 }
382
383 static INLINE void
384 mask( struct ureg_program *ureg,
385 struct ureg_dst *out,
386 struct ureg_src *in,
387 struct ureg_src *sampler,
388 struct ureg_dst *temp,
389 struct ureg_src *constant)
390 {
391 ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[1]);
392 ureg_MUL(ureg, ureg_writemask(temp[0], TGSI_WRITEMASK_W),
393 ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W),
394 ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W));
395 ureg_MOV(ureg, *out, ureg_src(temp[0]));
396 }
397
398 static INLINE void
399 premultiply( struct ureg_program *ureg,
400 struct ureg_dst *out,
401 struct ureg_src *in,
402 struct ureg_src *sampler,
403 struct ureg_dst *temp,
404 struct ureg_src *constant)
405 {
406 ureg_MUL(ureg,
407 ureg_writemask(temp[0], TGSI_WRITEMASK_XYZ),
408 ureg_src(temp[0]),
409 ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W));
410 }
411
412 static INLINE void
413 unpremultiply( struct ureg_program *ureg,
414 struct ureg_dst *out,
415 struct ureg_src *in,
416 struct ureg_src *sampler,
417 struct ureg_dst *temp,
418 struct ureg_src *constant)
419 {
420 ureg_TEX(ureg, temp[0], TGSI_TEXTURE_2D, in[0], sampler[1]);
421 }
422
423
424 static INLINE void
425 color_bw( struct ureg_program *ureg,
426 struct ureg_dst *out,
427 struct ureg_src *in,
428 struct ureg_src *sampler,
429 struct ureg_dst *temp,
430 struct ureg_src *constant)
431 {
432 ureg_ADD(ureg, temp[1],
433 ureg_scalar(constant[3], TGSI_SWIZZLE_Y),
434 ureg_scalar(constant[3], TGSI_SWIZZLE_Y));
435 ureg_RCP(ureg, temp[2], ureg_src(temp[1]));
436 ureg_ADD(ureg, temp[1],
437 ureg_scalar(constant[3], TGSI_SWIZZLE_Y),
438 ureg_src(temp[2]));
439 ureg_ADD(ureg, ureg_writemask(temp[2], TGSI_WRITEMASK_X),
440 ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_X),
441 ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_Y));
442 ureg_ADD(ureg, ureg_writemask(temp[2], TGSI_WRITEMASK_X),
443 ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_Z),
444 ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_X));
445 ureg_SGE(ureg,
446 ureg_writemask(temp[0], TGSI_WRITEMASK_XYZ),
447 ureg_scalar(ureg_src(temp[2]), TGSI_SWIZZLE_X),
448 ureg_src(temp[1]));
449 ureg_SGE(ureg,
450 ureg_writemask(temp[0], TGSI_WRITEMASK_W),
451 ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W),
452 ureg_scalar(ureg_src(temp[2]), TGSI_SWIZZLE_Y));
453 ureg_MOV(ureg, *out, ureg_src(temp[0]));
454 }
455
456
457 struct shader_asm_info {
458 VGint id;
459 ureg_func func;
460
461 VGboolean needs_position;
462
463 VGint start_const;
464 VGint num_consts;
465
466 VGint start_sampler;
467 VGint num_samplers;
468
469 VGint start_temp;
470 VGint num_temps;
471 };
472
473
474 /* paint types */
475 static const struct shader_asm_info shaders_paint_asm[] = {
476 {VEGA_SOLID_FILL_SHADER, solid_fill,
477 VG_FALSE, 2, 1, 0, 0, 0, 0},
478 {VEGA_LINEAR_GRADIENT_SHADER, linear_grad,
479 VG_TRUE, 2, 5, 0, 1, 0, 5},
480 {VEGA_RADIAL_GRADIENT_SHADER, radial_grad,
481 VG_TRUE, 2, 5, 0, 1, 0, 5},
482 {VEGA_PATTERN_SHADER, pattern,
483 VG_TRUE, 3, 4, 0, 1, 0, 5},
484 {VEGA_PAINT_DEGENERATE_SHADER, paint_degenerate,
485 VG_FALSE, 3, 1, 0, 1, 0, 2}
486 };
487
488 /* image draw modes */
489 static const struct shader_asm_info shaders_image_asm[] = {
490 {VEGA_IMAGE_NORMAL_SHADER, image_normal,
491 VG_TRUE, 0, 0, 3, 1, 0, 0},
492 {VEGA_IMAGE_MULTIPLY_SHADER, image_multiply,
493 VG_TRUE, 0, 0, 3, 1, 0, 2},
494 {VEGA_IMAGE_STENCIL_SHADER, image_stencil,
495 VG_TRUE, 0, 0, 3, 1, 0, 2}
496 };
497
498 static const struct shader_asm_info shaders_color_transform_asm[] = {
499 {VEGA_COLOR_TRANSFORM_SHADER, color_transform,
500 VG_FALSE, 0, 4, 0, 0, 0, 2}
501 };
502
503 /* extra blend modes */
504 static const struct shader_asm_info shaders_blend_asm[] = {
505 {VEGA_BLEND_MULTIPLY_SHADER, blend_multiply,
506 VG_TRUE, 3, 1, 2, 1, 0, 4},
507 {VEGA_BLEND_SCREEN_SHADER, blend_screen,
508 VG_TRUE, 3, 1, 2, 1, 0, 4},
509 {VEGA_BLEND_DARKEN_SHADER, blend_darken,
510 VG_TRUE, 3, 1, 2, 1, 0, 4},
511 {VEGA_BLEND_LIGHTEN_SHADER, blend_lighten,
512 VG_TRUE, 3, 1, 2, 1, 0, 4},
513 };
514
515 static const struct shader_asm_info shaders_mask_asm[] = {
516 {VEGA_MASK_SHADER, mask,
517 VG_TRUE, 0, 0, 1, 1, 0, 2}
518 };
519
520 /* premultiply */
521 static const struct shader_asm_info shaders_premultiply_asm[] = {
522 {VEGA_PREMULTIPLY_SHADER, premultiply,
523 VG_FALSE, 0, 0, 0, 0, 0, 1},
524 {VEGA_UNPREMULTIPLY_SHADER, unpremultiply,
525 VG_FALSE, 0, 0, 0, 0, 0, 1},
526 };
527
528 /* color transform to black and white */
529 static const struct shader_asm_info shaders_bw_asm[] = {
530 {VEGA_BW_SHADER, color_bw,
531 VG_FALSE, 3, 1, 0, 0, 0, 3},
532 };
533
534 #endif