glsl: lower mediump temporaries to 16 bits except structures (v2)
[mesa.git] / src / mesa / state_tracker / st_tgsi_lower_yuv.c
1 /*
2 * Copyright © 2016 Red Hat
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include <stdbool.h>
25
26 #include "st_tgsi_lower_yuv.h"
27 #include "tgsi/tgsi_transform.h"
28 #include "tgsi/tgsi_scan.h"
29 #include "util/u_debug.h"
30
31 #include "util/bitscan.h"
32
33 struct tgsi_yuv_transform {
34 struct tgsi_transform_context base;
35 struct tgsi_shader_info info;
36 struct tgsi_full_src_register imm[4];
37 struct {
38 struct tgsi_full_src_register src;
39 struct tgsi_full_dst_register dst;
40 } tmp[2];
41 #define A 0
42 #define B 1
43
44 /* Maps a primary sampler (used for Y) to the U or UV sampler. In
45 * case of 3-plane YUV format, the V plane is next sampler after U.
46 */
47 unsigned char sampler_map[PIPE_MAX_SAMPLERS][2];
48
49 bool first_instruction_emitted;
50 unsigned free_slots;
51 unsigned lower_nv12;
52 unsigned lower_iyuv;
53 };
54
55 static inline struct tgsi_yuv_transform *
56 tgsi_yuv_transform(struct tgsi_transform_context *tctx)
57 {
58 return (struct tgsi_yuv_transform *)tctx;
59 }
60
61 static void
62 reg_dst(struct tgsi_full_dst_register *dst,
63 const struct tgsi_full_dst_register *orig_dst, unsigned wrmask)
64 {
65 *dst = *orig_dst;
66 dst->Register.WriteMask &= wrmask;
67 assert(dst->Register.WriteMask);
68 }
69
70 static inline void
71 get_swiz(unsigned *swiz, const struct tgsi_src_register *src)
72 {
73 swiz[0] = src->SwizzleX;
74 swiz[1] = src->SwizzleY;
75 swiz[2] = src->SwizzleZ;
76 swiz[3] = src->SwizzleW;
77 }
78
79 static void
80 reg_src(struct tgsi_full_src_register *src,
81 const struct tgsi_full_src_register *orig_src,
82 unsigned sx, unsigned sy, unsigned sz, unsigned sw)
83 {
84 unsigned swiz[4];
85 get_swiz(swiz, &orig_src->Register);
86 *src = *orig_src;
87 src->Register.SwizzleX = swiz[sx];
88 src->Register.SwizzleY = swiz[sy];
89 src->Register.SwizzleZ = swiz[sz];
90 src->Register.SwizzleW = swiz[sw];
91 }
92
93 #define TGSI_SWIZZLE__ TGSI_SWIZZLE_X /* don't-care value! */
94 #define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y, \
95 TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
96
97 static inline struct tgsi_full_instruction
98 tex_instruction(unsigned samp)
99 {
100 struct tgsi_full_instruction inst;
101
102 inst = tgsi_default_full_instruction();
103 inst.Instruction.Opcode = TGSI_OPCODE_TEX;
104 inst.Instruction.Texture = 1;
105 inst.Texture.Texture = TGSI_TEXTURE_2D;
106 inst.Instruction.NumDstRegs = 1;
107 inst.Instruction.NumSrcRegs = 2;
108 inst.Src[1].Register.File = TGSI_FILE_SAMPLER;
109 inst.Src[1].Register.Index = samp;
110
111 return inst;
112 }
113
114 static inline struct tgsi_full_instruction
115 mov_instruction(void)
116 {
117 struct tgsi_full_instruction inst;
118
119 inst = tgsi_default_full_instruction();
120 inst.Instruction.Opcode = TGSI_OPCODE_MOV;
121 inst.Instruction.Saturate = 0;
122 inst.Instruction.NumDstRegs = 1;
123 inst.Instruction.NumSrcRegs = 1;
124
125 return inst;
126 }
127
128 static inline struct tgsi_full_instruction
129 dp3_instruction(void)
130 {
131 struct tgsi_full_instruction inst;
132
133 inst = tgsi_default_full_instruction();
134 inst.Instruction.Opcode = TGSI_OPCODE_DP3;
135 inst.Instruction.NumDstRegs = 1;
136 inst.Instruction.NumSrcRegs = 2;
137
138 return inst;
139 }
140
141
142
143 static void
144 emit_immed(struct tgsi_transform_context *tctx, int idx,
145 float x, float y, float z, float w)
146 {
147 struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
148 struct tgsi_shader_info *info = &ctx->info;
149 struct tgsi_full_immediate immed;
150
151 immed = tgsi_default_full_immediate();
152 immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */
153 immed.u[0].Float = x;
154 immed.u[1].Float = y;
155 immed.u[2].Float = z;
156 immed.u[3].Float = w;
157 tctx->emit_immediate(tctx, &immed);
158
159 ctx->imm[idx].Register.File = TGSI_FILE_IMMEDIATE;
160 ctx->imm[idx].Register.Index = info->immediate_count + idx;
161 ctx->imm[idx].Register.SwizzleX = TGSI_SWIZZLE_X;
162 ctx->imm[idx].Register.SwizzleY = TGSI_SWIZZLE_Y;
163 ctx->imm[idx].Register.SwizzleZ = TGSI_SWIZZLE_Z;
164 ctx->imm[idx].Register.SwizzleW = TGSI_SWIZZLE_W;
165 }
166
167 static void
168 emit_samp(struct tgsi_transform_context *tctx, unsigned samp)
169 {
170 tgsi_transform_sampler_decl(tctx, samp);
171 tgsi_transform_sampler_view_decl(tctx, samp, PIPE_TEXTURE_2D,
172 TGSI_RETURN_TYPE_FLOAT);
173 }
174
175 /* Emit extra declarations we need:
176 * + 2 TEMP to hold intermediate results
177 * + 1 (for 2-plane YUV) or 2 (for 3-plane YUV) extra samplers per
178 * lowered YUV sampler
179 * + extra immediates for doing CSC
180 */
181 static void
182 emit_decls(struct tgsi_transform_context *tctx)
183 {
184 struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
185 struct tgsi_shader_info *info = &ctx->info;
186 unsigned mask, tempbase, i;
187 struct tgsi_full_declaration decl;
188
189 /*
190 * Declare immediates for CSC conversion:
191 */
192
193 /* ITU-R BT.601 conversion */
194 emit_immed(tctx, 0, 1.164f, 0.000f, 1.596f, 0.0f);
195 emit_immed(tctx, 1, 1.164f, -0.392f, -0.813f, 0.0f);
196 emit_immed(tctx, 2, 1.164f, 2.017f, 0.000f, 0.0f);
197 emit_immed(tctx, 3, 0.0625f, 0.500f, 0.500f, 1.0f);
198
199 /*
200 * Declare extra samplers / sampler-views:
201 */
202
203 mask = ctx->lower_nv12 | ctx->lower_iyuv;
204 while (mask) {
205 unsigned extra, y_samp = u_bit_scan(&mask);
206
207 extra = u_bit_scan(&ctx->free_slots);
208 ctx->sampler_map[y_samp][0] = extra;
209 emit_samp(tctx, extra);
210
211 if (ctx->lower_iyuv & (1 << y_samp)) {
212 extra = u_bit_scan(&ctx->free_slots);
213 ctx->sampler_map[y_samp][1] = extra;
214 emit_samp(tctx, extra);
215 }
216 }
217
218 /*
219 * Declare extra temp:
220 */
221
222 tempbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
223
224 for (i = 0; i < 2; i++) {
225 decl = tgsi_default_full_declaration();
226 decl.Declaration.File = TGSI_FILE_TEMPORARY;
227 decl.Range.First = decl.Range.Last = tempbase + i;
228 tctx->emit_declaration(tctx, &decl);
229
230 ctx->tmp[i].src.Register.File = TGSI_FILE_TEMPORARY;
231 ctx->tmp[i].src.Register.Index = tempbase + i;
232 ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X;
233 ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y;
234 ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z;
235 ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W;
236
237 ctx->tmp[i].dst.Register.File = TGSI_FILE_TEMPORARY;
238 ctx->tmp[i].dst.Register.Index = tempbase + i;
239 ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW;
240 }
241 }
242
243 /* call with YUV in tmpA.xyz */
244 static void
245 yuv_to_rgb(struct tgsi_transform_context *tctx,
246 struct tgsi_full_dst_register *dst)
247 {
248 struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
249 struct tgsi_full_instruction inst;
250
251 /*
252 * IMM[0] FLT32 { 1.164, 0.000, 1.596, 0.0 }
253 * IMM[1] FLT32 { 1.164, -0.392, -0.813, 0.0 }
254 * IMM[2] FLT32 { 1.164, 2.017, 0.000, 0.0 }
255 * IMM[3] FLT32 { 0.0625, 0.500, 0.500, 1.0 }
256 */
257
258 /* SUB tmpA.xyz, tmpA, imm[3] */
259 inst = tgsi_default_full_instruction();
260 inst.Instruction.Opcode = TGSI_OPCODE_ADD;
261 inst.Instruction.Saturate = 0;
262 inst.Instruction.NumDstRegs = 1;
263 inst.Instruction.NumSrcRegs = 2;
264 reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZ);
265 reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, _));
266 reg_src(&inst.Src[1], &ctx->imm[3], SWIZ(X, Y, Z, _));
267 inst.Src[1].Register.Negate = 1;
268 tctx->emit_instruction(tctx, &inst);
269
270 /* DP3 dst.x, tmpA, imm[0] */
271 if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
272 inst = dp3_instruction();
273 reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_X);
274 reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
275 reg_src(&inst.Src[1], &ctx->imm[0], SWIZ(X, Y, Z, W));
276 tctx->emit_instruction(tctx, &inst);
277 }
278
279 /* DP3 dst.y, tmpA, imm[1] */
280 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
281 inst = dp3_instruction();
282 reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Y);
283 reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
284 reg_src(&inst.Src[1], &ctx->imm[1], SWIZ(X, Y, Z, W));
285 tctx->emit_instruction(tctx, &inst);
286 }
287
288 /* DP3 dst.z, tmpA, imm[2] */
289 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
290 inst = dp3_instruction();
291 reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Z);
292 reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
293 reg_src(&inst.Src[1], &ctx->imm[2], SWIZ(X, Y, Z, W));
294 tctx->emit_instruction(tctx, &inst);
295 }
296
297 /* MOV dst.w, imm[0].x */
298 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
299 inst = mov_instruction();
300 reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_W);
301 reg_src(&inst.Src[0], &ctx->imm[3], SWIZ(_, _, _, W));
302 tctx->emit_instruction(tctx, &inst);
303 }
304 }
305
306 static void
307 lower_nv12(struct tgsi_transform_context *tctx,
308 struct tgsi_full_instruction *originst)
309 {
310 struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
311 struct tgsi_full_instruction inst;
312 struct tgsi_full_src_register *coord = &originst->Src[0];
313 unsigned samp = originst->Src[1].Register.Index;
314
315 /* sample Y:
316 * TEX tempA.x, coord, texture[samp], 2D;
317 */
318 inst = tex_instruction(samp);
319 reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
320 reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
321 tctx->emit_instruction(tctx, &inst);
322
323 /* sample UV:
324 * TEX tempB.xy, coord, texture[sampler_map[samp][0]], 2D;
325 * MOV tempA.yz, tempB._xy_
326 */
327 inst = tex_instruction(ctx->sampler_map[samp][0]);
328 reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XY);
329 reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
330 tctx->emit_instruction(tctx, &inst);
331
332 inst = mov_instruction();
333 reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_YZ);
334 reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, X, Y, _));
335 tctx->emit_instruction(tctx, &inst);
336
337 /* At this point, we have YUV in tempA.xyz, rest is common: */
338 yuv_to_rgb(tctx, &originst->Dst[0]);
339 }
340
341 static void
342 lower_iyuv(struct tgsi_transform_context *tctx,
343 struct tgsi_full_instruction *originst)
344 {
345 struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
346 struct tgsi_full_instruction inst;
347 struct tgsi_full_src_register *coord = &originst->Src[0];
348 unsigned samp = originst->Src[1].Register.Index;
349
350 /* sample Y:
351 * TEX tempA.x, coord, texture[samp], 2D;
352 */
353 inst = tex_instruction(samp);
354 reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
355 reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
356 tctx->emit_instruction(tctx, &inst);
357
358 /* sample U:
359 * TEX tempB.x, coord, texture[sampler_map[samp][0]], 2D;
360 * MOV tempA.y, tempB._x__
361 */
362 inst = tex_instruction(ctx->sampler_map[samp][0]);
363 reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
364 reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
365 tctx->emit_instruction(tctx, &inst);
366
367 inst = mov_instruction();
368 reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
369 reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, X, _, _));
370 tctx->emit_instruction(tctx, &inst);
371
372 /* sample V:
373 * TEX tempB.x, coord, texture[sampler_map[samp][1]], 2D;
374 * MOV tempA.z, tempB.__x_
375 */
376 inst = tex_instruction(ctx->sampler_map[samp][1]);
377 reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
378 reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
379 tctx->emit_instruction(tctx, &inst);
380
381 inst = mov_instruction();
382 reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
383 reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, _, X, _));
384 tctx->emit_instruction(tctx, &inst);
385
386 /* At this point, we have YUV in tempA.xyz, rest is common: */
387 yuv_to_rgb(tctx, &originst->Dst[0]);
388 }
389
390 static void
391 transform_instr(struct tgsi_transform_context *tctx,
392 struct tgsi_full_instruction *inst)
393 {
394 struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
395
396 if (!ctx->first_instruction_emitted) {
397 emit_decls(tctx);
398 ctx->first_instruction_emitted = true;
399 }
400
401 switch (inst->Instruction.Opcode) {
402 /* TODO what other tex opcode's can be used w/ external eglimgs? */
403 case TGSI_OPCODE_TEX: {
404 unsigned samp = inst->Src[1].Register.Index;
405 if (ctx->lower_nv12 & (1 << samp)) {
406 lower_nv12(tctx, inst);
407 } else if (ctx->lower_iyuv & (1 << samp)) {
408 lower_iyuv(tctx, inst);
409 } else {
410 goto skip;
411 }
412 break;
413 }
414 default:
415 skip:
416 tctx->emit_instruction(tctx, inst);
417 return;
418 }
419 }
420
421 extern const struct tgsi_token *
422 st_tgsi_lower_yuv(const struct tgsi_token *tokens, unsigned free_slots,
423 unsigned lower_nv12, unsigned lower_iyuv)
424 {
425 struct tgsi_yuv_transform ctx;
426 struct tgsi_token *newtoks;
427 int newlen;
428
429 assert(!(lower_nv12 & lower_iyuv)); /* bitmasks should be mutually exclusive */
430
431 // tgsi_dump(tokens, 0);
432 // debug_printf("\n");
433
434 memset(&ctx, 0, sizeof(ctx));
435 ctx.base.transform_instruction = transform_instr;
436 ctx.free_slots = free_slots;
437 ctx.lower_nv12 = lower_nv12;
438 ctx.lower_iyuv = lower_iyuv;
439 tgsi_scan_shader(tokens, &ctx.info);
440
441 /* TODO better job of figuring out how many extra tokens we need..
442 * this is a pain about tgsi_transform :-/
443 */
444 newlen = tgsi_num_tokens(tokens) + 300;
445 newtoks = tgsi_alloc_tokens(newlen);
446 if (!newtoks)
447 return NULL;
448
449 tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
450
451 // tgsi_dump(newtoks, 0);
452 // debug_printf("\n");
453
454 return newtoks;
455 }