nir: Move nir_lower_uniforms_to_ubo to compiler/nir.
[mesa.git] / src / mesa / state_tracker / st_tgsi_lower_yuv.c
1 /*
2 * Copyright © 2016 Red Hat
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include <stdbool.h>
25
26 #include "st_tgsi_lower_yuv.h"
27 #include "tgsi/tgsi_transform.h"
28 #include "tgsi/tgsi_scan.h"
29 #include "tgsi/tgsi_dump.h"
30 #include "util/u_debug.h"
31
32 #include "util/bitscan.h"
33
34 struct tgsi_yuv_transform {
35 struct tgsi_transform_context base;
36 struct tgsi_shader_info info;
37 struct tgsi_full_src_register imm[4];
38 struct {
39 struct tgsi_full_src_register src;
40 struct tgsi_full_dst_register dst;
41 } tmp[2];
42 #define A 0
43 #define B 1
44
45 /* Maps a primary sampler (used for Y) to the U or UV sampler. In
46 * case of 3-plane YUV format, the V plane is next sampler after U.
47 */
48 unsigned char sampler_map[PIPE_MAX_SAMPLERS][2];
49
50 bool first_instruction_emitted;
51 unsigned free_slots;
52 unsigned lower_nv12;
53 unsigned lower_iyuv;
54 };
55
56 static inline struct tgsi_yuv_transform *
57 tgsi_yuv_transform(struct tgsi_transform_context *tctx)
58 {
59 return (struct tgsi_yuv_transform *)tctx;
60 }
61
62 static void
63 reg_dst(struct tgsi_full_dst_register *dst,
64 const struct tgsi_full_dst_register *orig_dst, unsigned wrmask)
65 {
66 *dst = *orig_dst;
67 dst->Register.WriteMask &= wrmask;
68 assert(dst->Register.WriteMask);
69 }
70
71 static inline void
72 get_swiz(unsigned *swiz, const struct tgsi_src_register *src)
73 {
74 swiz[0] = src->SwizzleX;
75 swiz[1] = src->SwizzleY;
76 swiz[2] = src->SwizzleZ;
77 swiz[3] = src->SwizzleW;
78 }
79
80 static void
81 reg_src(struct tgsi_full_src_register *src,
82 const struct tgsi_full_src_register *orig_src,
83 unsigned sx, unsigned sy, unsigned sz, unsigned sw)
84 {
85 unsigned swiz[4];
86 get_swiz(swiz, &orig_src->Register);
87 *src = *orig_src;
88 src->Register.SwizzleX = swiz[sx];
89 src->Register.SwizzleY = swiz[sy];
90 src->Register.SwizzleZ = swiz[sz];
91 src->Register.SwizzleW = swiz[sw];
92 }
93
94 #define TGSI_SWIZZLE__ TGSI_SWIZZLE_X /* don't-care value! */
95 #define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y, \
96 TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
97
98 static inline struct tgsi_full_instruction
99 tex_instruction(unsigned samp)
100 {
101 struct tgsi_full_instruction inst;
102
103 inst = tgsi_default_full_instruction();
104 inst.Instruction.Opcode = TGSI_OPCODE_TEX;
105 inst.Instruction.Texture = 1;
106 inst.Texture.Texture = TGSI_TEXTURE_2D;
107 inst.Instruction.NumDstRegs = 1;
108 inst.Instruction.NumSrcRegs = 2;
109 inst.Src[1].Register.File = TGSI_FILE_SAMPLER;
110 inst.Src[1].Register.Index = samp;
111
112 return inst;
113 }
114
115 static inline struct tgsi_full_instruction
116 mov_instruction(void)
117 {
118 struct tgsi_full_instruction inst;
119
120 inst = tgsi_default_full_instruction();
121 inst.Instruction.Opcode = TGSI_OPCODE_MOV;
122 inst.Instruction.Saturate = 0;
123 inst.Instruction.NumDstRegs = 1;
124 inst.Instruction.NumSrcRegs = 1;
125
126 return inst;
127 }
128
129 static inline struct tgsi_full_instruction
130 dp3_instruction(void)
131 {
132 struct tgsi_full_instruction inst;
133
134 inst = tgsi_default_full_instruction();
135 inst.Instruction.Opcode = TGSI_OPCODE_DP3;
136 inst.Instruction.NumDstRegs = 1;
137 inst.Instruction.NumSrcRegs = 2;
138
139 return inst;
140 }
141
142
143
144 static void
145 emit_immed(struct tgsi_transform_context *tctx, int idx,
146 float x, float y, float z, float w)
147 {
148 struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
149 struct tgsi_shader_info *info = &ctx->info;
150 struct tgsi_full_immediate immed;
151
152 immed = tgsi_default_full_immediate();
153 immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */
154 immed.u[0].Float = x;
155 immed.u[1].Float = y;
156 immed.u[2].Float = z;
157 immed.u[3].Float = w;
158 tctx->emit_immediate(tctx, &immed);
159
160 ctx->imm[idx].Register.File = TGSI_FILE_IMMEDIATE;
161 ctx->imm[idx].Register.Index = info->immediate_count + idx;
162 ctx->imm[idx].Register.SwizzleX = TGSI_SWIZZLE_X;
163 ctx->imm[idx].Register.SwizzleY = TGSI_SWIZZLE_Y;
164 ctx->imm[idx].Register.SwizzleZ = TGSI_SWIZZLE_Z;
165 ctx->imm[idx].Register.SwizzleW = TGSI_SWIZZLE_W;
166 }
167
168 static void
169 emit_samp(struct tgsi_transform_context *tctx, unsigned samp)
170 {
171 tgsi_transform_sampler_decl(tctx, samp);
172 tgsi_transform_sampler_view_decl(tctx, samp, PIPE_TEXTURE_2D,
173 TGSI_RETURN_TYPE_FLOAT);
174 }
175
176 /* Emit extra declarations we need:
177 * + 2 TEMP to hold intermediate results
178 * + 1 (for 2-plane YUV) or 2 (for 3-plane YUV) extra samplers per
179 * lowered YUV sampler
180 * + extra immediates for doing CSC
181 */
182 static void
183 emit_decls(struct tgsi_transform_context *tctx)
184 {
185 struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
186 struct tgsi_shader_info *info = &ctx->info;
187 unsigned mask, tempbase, i;
188 struct tgsi_full_declaration decl;
189
190 /*
191 * Declare immediates for CSC conversion:
192 */
193
194 /* ITU-R BT.601 conversion */
195 emit_immed(tctx, 0, 1.164, 0.000, 1.596, 0.0);
196 emit_immed(tctx, 1, 1.164, -0.392, -0.813, 0.0);
197 emit_immed(tctx, 2, 1.164, 2.017, 0.000, 0.0);
198 emit_immed(tctx, 3, 0.0625, 0.500, 0.500, 1.0);
199
200 /*
201 * Declare extra samplers / sampler-views:
202 */
203
204 mask = ctx->lower_nv12 | ctx->lower_iyuv;
205 while (mask) {
206 unsigned extra, y_samp = u_bit_scan(&mask);
207
208 extra = u_bit_scan(&ctx->free_slots);
209 ctx->sampler_map[y_samp][0] = extra;
210 emit_samp(tctx, extra);
211
212 if (ctx->lower_iyuv & (1 << y_samp)) {
213 extra = u_bit_scan(&ctx->free_slots);
214 ctx->sampler_map[y_samp][1] = extra;
215 emit_samp(tctx, extra);
216 }
217 }
218
219 /*
220 * Declare extra temp:
221 */
222
223 tempbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
224
225 for (i = 0; i < 2; i++) {
226 decl = tgsi_default_full_declaration();
227 decl.Declaration.File = TGSI_FILE_TEMPORARY;
228 decl.Range.First = decl.Range.Last = tempbase + i;
229 tctx->emit_declaration(tctx, &decl);
230
231 ctx->tmp[i].src.Register.File = TGSI_FILE_TEMPORARY;
232 ctx->tmp[i].src.Register.Index = tempbase + i;
233 ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X;
234 ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y;
235 ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z;
236 ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W;
237
238 ctx->tmp[i].dst.Register.File = TGSI_FILE_TEMPORARY;
239 ctx->tmp[i].dst.Register.Index = tempbase + i;
240 ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW;
241 }
242 }
243
244 /* call with YUV in tmpA.xyz */
245 static void
246 yuv_to_rgb(struct tgsi_transform_context *tctx,
247 struct tgsi_full_dst_register *dst)
248 {
249 struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
250 struct tgsi_full_instruction inst;
251
252 /*
253 * IMM[0] FLT32 { 1.164, 0.000, 1.596, 0.0 }
254 * IMM[1] FLT32 { 1.164, -0.392, -0.813, 0.0 }
255 * IMM[2] FLT32 { 1.164, 2.017, 0.000, 0.0 }
256 * IMM[3] FLT32 { 0.0625, 0.500, 0.500, 1.0 }
257 */
258
259 /* SUB tmpA.xyz, tmpA, imm[3] */
260 inst = tgsi_default_full_instruction();
261 inst.Instruction.Opcode = TGSI_OPCODE_ADD;
262 inst.Instruction.Saturate = 0;
263 inst.Instruction.NumDstRegs = 1;
264 inst.Instruction.NumSrcRegs = 2;
265 reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZ);
266 reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, _));
267 reg_src(&inst.Src[1], &ctx->imm[3], SWIZ(X, Y, Z, _));
268 inst.Src[1].Register.Negate = 1;
269 tctx->emit_instruction(tctx, &inst);
270
271 /* DP3 dst.x, tmpA, imm[0] */
272 inst = dp3_instruction();
273 reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_X);
274 reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
275 reg_src(&inst.Src[1], &ctx->imm[0], SWIZ(X, Y, Z, W));
276 tctx->emit_instruction(tctx, &inst);
277
278 /* DP3 dst.y, tmpA, imm[1] */
279 inst = dp3_instruction();
280 reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Y);
281 reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
282 reg_src(&inst.Src[1], &ctx->imm[1], SWIZ(X, Y, Z, W));
283 tctx->emit_instruction(tctx, &inst);
284
285 /* DP3 dst.z, tmpA, imm[2] */
286 inst = dp3_instruction();
287 reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Z);
288 reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
289 reg_src(&inst.Src[1], &ctx->imm[2], SWIZ(X, Y, Z, W));
290 tctx->emit_instruction(tctx, &inst);
291
292 /* MOV dst.w, imm[0].x */
293 inst = mov_instruction();
294 reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_W);
295 reg_src(&inst.Src[0], &ctx->imm[3], SWIZ(_, _, _, W));
296 tctx->emit_instruction(tctx, &inst);
297 }
298
299 static void
300 lower_nv12(struct tgsi_transform_context *tctx,
301 struct tgsi_full_instruction *originst)
302 {
303 struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
304 struct tgsi_full_instruction inst;
305 struct tgsi_full_src_register *coord = &originst->Src[0];
306 unsigned samp = originst->Src[1].Register.Index;
307
308 /* sample Y:
309 * TEX tempA.x, coord, texture[samp], 2D;
310 */
311 inst = tex_instruction(samp);
312 reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
313 reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
314 tctx->emit_instruction(tctx, &inst);
315
316 /* sample UV:
317 * TEX tempB.xy, coord, texture[sampler_map[samp][0]], 2D;
318 * MOV tempA.yz, tempB._xy_
319 */
320 inst = tex_instruction(ctx->sampler_map[samp][0]);
321 reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XY);
322 reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
323 tctx->emit_instruction(tctx, &inst);
324
325 inst = mov_instruction();
326 reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_YZ);
327 reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, X, Y, _));
328 tctx->emit_instruction(tctx, &inst);
329
330 /* At this point, we have YUV in tempA.xyz, rest is common: */
331 yuv_to_rgb(tctx, &originst->Dst[0]);
332 }
333
334 static void
335 lower_iyuv(struct tgsi_transform_context *tctx,
336 struct tgsi_full_instruction *originst)
337 {
338 struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
339 struct tgsi_full_instruction inst;
340 struct tgsi_full_src_register *coord = &originst->Src[0];
341 unsigned samp = originst->Src[1].Register.Index;
342
343 /* sample Y:
344 * TEX tempA.x, coord, texture[samp], 2D;
345 */
346 inst = tex_instruction(samp);
347 reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
348 reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
349 tctx->emit_instruction(tctx, &inst);
350
351 /* sample U:
352 * TEX tempB.x, coord, texture[sampler_map[samp][0]], 2D;
353 * MOV tempA.y, tempB._x__
354 */
355 inst = tex_instruction(ctx->sampler_map[samp][0]);
356 reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
357 reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
358 tctx->emit_instruction(tctx, &inst);
359
360 inst = mov_instruction();
361 reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
362 reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, X, _, _));
363 tctx->emit_instruction(tctx, &inst);
364
365 /* sample V:
366 * TEX tempB.x, coord, texture[sampler_map[samp][1]], 2D;
367 * MOV tempA.z, tempB.__x_
368 */
369 inst = tex_instruction(ctx->sampler_map[samp][1]);
370 reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
371 reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
372 tctx->emit_instruction(tctx, &inst);
373
374 inst = mov_instruction();
375 reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
376 reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, _, X, _));
377 tctx->emit_instruction(tctx, &inst);
378
379 /* At this point, we have YUV in tempA.xyz, rest is common: */
380 yuv_to_rgb(tctx, &originst->Dst[0]);
381 }
382
383 static void
384 transform_instr(struct tgsi_transform_context *tctx,
385 struct tgsi_full_instruction *inst)
386 {
387 struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
388
389 if (!ctx->first_instruction_emitted) {
390 emit_decls(tctx);
391 ctx->first_instruction_emitted = true;
392 }
393
394 switch (inst->Instruction.Opcode) {
395 /* TODO what other tex opcode's can be used w/ external eglimgs? */
396 case TGSI_OPCODE_TEX: {
397 unsigned samp = inst->Src[1].Register.Index;
398 if (ctx->lower_nv12 & (1 << samp)) {
399 lower_nv12(tctx, inst);
400 } else if (ctx->lower_iyuv & (1 << samp)) {
401 lower_iyuv(tctx, inst);
402 } else {
403 goto skip;
404 }
405 break;
406 }
407 default:
408 skip:
409 tctx->emit_instruction(tctx, inst);
410 return;
411 }
412 }
413
414 extern const struct tgsi_token *
415 st_tgsi_lower_yuv(const struct tgsi_token *tokens, unsigned free_slots,
416 unsigned lower_nv12, unsigned lower_iyuv)
417 {
418 struct tgsi_yuv_transform ctx;
419 struct tgsi_token *newtoks;
420 int newlen;
421
422 assert(!(lower_nv12 & lower_iyuv)); /* bitmasks should be mutually exclusive */
423
424 // tgsi_dump(tokens, 0);
425 // debug_printf("\n");
426
427 memset(&ctx, 0, sizeof(ctx));
428 ctx.base.transform_instruction = transform_instr;
429 ctx.free_slots = free_slots;
430 ctx.lower_nv12 = lower_nv12;
431 ctx.lower_iyuv = lower_iyuv;
432 tgsi_scan_shader(tokens, &ctx.info);
433
434 /* TODO better job of figuring out how many extra tokens we need..
435 * this is a pain about tgsi_transform :-/
436 */
437 newlen = tgsi_num_tokens(tokens) + 120;
438 newtoks = tgsi_alloc_tokens(newlen);
439 if (!newtoks)
440 return NULL;
441
442 tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
443
444 // tgsi_dump(newtoks, 0);
445 // debug_printf("\n");
446
447 return newtoks;
448 }