2 * Copyright © 2016 Red Hat
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 #include "st_tgsi_lower_yuv.h"
27 #include "tgsi/tgsi_transform.h"
28 #include "tgsi/tgsi_scan.h"
29 #include "util/u_debug.h"
31 #include "util/bitscan.h"
33 struct tgsi_yuv_transform
{
34 struct tgsi_transform_context base
;
35 struct tgsi_shader_info info
;
36 struct tgsi_full_src_register imm
[4];
38 struct tgsi_full_src_register src
;
39 struct tgsi_full_dst_register dst
;
44 /* Maps a primary sampler (used for Y) to the U or UV sampler. In
45 * case of 3-plane YUV format, the V plane is next sampler after U.
47 unsigned char sampler_map
[PIPE_MAX_SAMPLERS
][2];
49 bool first_instruction_emitted
;
55 static inline struct tgsi_yuv_transform
*
56 tgsi_yuv_transform(struct tgsi_transform_context
*tctx
)
58 return (struct tgsi_yuv_transform
*)tctx
;
62 reg_dst(struct tgsi_full_dst_register
*dst
,
63 const struct tgsi_full_dst_register
*orig_dst
, unsigned wrmask
)
66 dst
->Register
.WriteMask
&= wrmask
;
67 assert(dst
->Register
.WriteMask
);
71 get_swiz(unsigned *swiz
, const struct tgsi_src_register
*src
)
73 swiz
[0] = src
->SwizzleX
;
74 swiz
[1] = src
->SwizzleY
;
75 swiz
[2] = src
->SwizzleZ
;
76 swiz
[3] = src
->SwizzleW
;
80 reg_src(struct tgsi_full_src_register
*src
,
81 const struct tgsi_full_src_register
*orig_src
,
82 unsigned sx
, unsigned sy
, unsigned sz
, unsigned sw
)
85 get_swiz(swiz
, &orig_src
->Register
);
87 src
->Register
.SwizzleX
= swiz
[sx
];
88 src
->Register
.SwizzleY
= swiz
[sy
];
89 src
->Register
.SwizzleZ
= swiz
[sz
];
90 src
->Register
.SwizzleW
= swiz
[sw
];
93 #define TGSI_SWIZZLE__ TGSI_SWIZZLE_X /* don't-care value! */
94 #define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y, \
95 TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
97 static inline struct tgsi_full_instruction
98 tex_instruction(unsigned samp
)
100 struct tgsi_full_instruction inst
;
102 inst
= tgsi_default_full_instruction();
103 inst
.Instruction
.Opcode
= TGSI_OPCODE_TEX
;
104 inst
.Instruction
.Texture
= 1;
105 inst
.Texture
.Texture
= TGSI_TEXTURE_2D
;
106 inst
.Instruction
.NumDstRegs
= 1;
107 inst
.Instruction
.NumSrcRegs
= 2;
108 inst
.Src
[1].Register
.File
= TGSI_FILE_SAMPLER
;
109 inst
.Src
[1].Register
.Index
= samp
;
114 static inline struct tgsi_full_instruction
115 mov_instruction(void)
117 struct tgsi_full_instruction inst
;
119 inst
= tgsi_default_full_instruction();
120 inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
121 inst
.Instruction
.Saturate
= 0;
122 inst
.Instruction
.NumDstRegs
= 1;
123 inst
.Instruction
.NumSrcRegs
= 1;
128 static inline struct tgsi_full_instruction
129 dp3_instruction(void)
131 struct tgsi_full_instruction inst
;
133 inst
= tgsi_default_full_instruction();
134 inst
.Instruction
.Opcode
= TGSI_OPCODE_DP3
;
135 inst
.Instruction
.NumDstRegs
= 1;
136 inst
.Instruction
.NumSrcRegs
= 2;
144 emit_immed(struct tgsi_transform_context
*tctx
, int idx
,
145 float x
, float y
, float z
, float w
)
147 struct tgsi_yuv_transform
*ctx
= tgsi_yuv_transform(tctx
);
148 struct tgsi_shader_info
*info
= &ctx
->info
;
149 struct tgsi_full_immediate immed
;
151 immed
= tgsi_default_full_immediate();
152 immed
.Immediate
.NrTokens
= 1 + 4; /* one for the token itself */
153 immed
.u
[0].Float
= x
;
154 immed
.u
[1].Float
= y
;
155 immed
.u
[2].Float
= z
;
156 immed
.u
[3].Float
= w
;
157 tctx
->emit_immediate(tctx
, &immed
);
159 ctx
->imm
[idx
].Register
.File
= TGSI_FILE_IMMEDIATE
;
160 ctx
->imm
[idx
].Register
.Index
= info
->immediate_count
+ idx
;
161 ctx
->imm
[idx
].Register
.SwizzleX
= TGSI_SWIZZLE_X
;
162 ctx
->imm
[idx
].Register
.SwizzleY
= TGSI_SWIZZLE_Y
;
163 ctx
->imm
[idx
].Register
.SwizzleZ
= TGSI_SWIZZLE_Z
;
164 ctx
->imm
[idx
].Register
.SwizzleW
= TGSI_SWIZZLE_W
;
168 emit_samp(struct tgsi_transform_context
*tctx
, unsigned samp
)
170 tgsi_transform_sampler_decl(tctx
, samp
);
171 tgsi_transform_sampler_view_decl(tctx
, samp
, PIPE_TEXTURE_2D
,
172 TGSI_RETURN_TYPE_FLOAT
);
175 /* Emit extra declarations we need:
176 * + 2 TEMP to hold intermediate results
177 * + 1 (for 2-plane YUV) or 2 (for 3-plane YUV) extra samplers per
178 * lowered YUV sampler
179 * + extra immediates for doing CSC
182 emit_decls(struct tgsi_transform_context
*tctx
)
184 struct tgsi_yuv_transform
*ctx
= tgsi_yuv_transform(tctx
);
185 struct tgsi_shader_info
*info
= &ctx
->info
;
186 unsigned mask
, tempbase
, i
;
187 struct tgsi_full_declaration decl
;
190 * Declare immediates for CSC conversion:
193 /* ITU-R BT.601 conversion */
194 emit_immed(tctx
, 0, 1.164f
, 0.000f
, 1.596f
, 0.0f
);
195 emit_immed(tctx
, 1, 1.164f
, -0.392f
, -0.813f
, 0.0f
);
196 emit_immed(tctx
, 2, 1.164f
, 2.017f
, 0.000f
, 0.0f
);
197 emit_immed(tctx
, 3, 0.0625f
, 0.500f
, 0.500f
, 1.0f
);
200 * Declare extra samplers / sampler-views:
203 mask
= ctx
->lower_nv12
| ctx
->lower_iyuv
;
205 unsigned extra
, y_samp
= u_bit_scan(&mask
);
207 extra
= u_bit_scan(&ctx
->free_slots
);
208 ctx
->sampler_map
[y_samp
][0] = extra
;
209 emit_samp(tctx
, extra
);
211 if (ctx
->lower_iyuv
& (1 << y_samp
)) {
212 extra
= u_bit_scan(&ctx
->free_slots
);
213 ctx
->sampler_map
[y_samp
][1] = extra
;
214 emit_samp(tctx
, extra
);
219 * Declare extra temp:
222 tempbase
= info
->file_max
[TGSI_FILE_TEMPORARY
] + 1;
224 for (i
= 0; i
< 2; i
++) {
225 decl
= tgsi_default_full_declaration();
226 decl
.Declaration
.File
= TGSI_FILE_TEMPORARY
;
227 decl
.Range
.First
= decl
.Range
.Last
= tempbase
+ i
;
228 tctx
->emit_declaration(tctx
, &decl
);
230 ctx
->tmp
[i
].src
.Register
.File
= TGSI_FILE_TEMPORARY
;
231 ctx
->tmp
[i
].src
.Register
.Index
= tempbase
+ i
;
232 ctx
->tmp
[i
].src
.Register
.SwizzleX
= TGSI_SWIZZLE_X
;
233 ctx
->tmp
[i
].src
.Register
.SwizzleY
= TGSI_SWIZZLE_Y
;
234 ctx
->tmp
[i
].src
.Register
.SwizzleZ
= TGSI_SWIZZLE_Z
;
235 ctx
->tmp
[i
].src
.Register
.SwizzleW
= TGSI_SWIZZLE_W
;
237 ctx
->tmp
[i
].dst
.Register
.File
= TGSI_FILE_TEMPORARY
;
238 ctx
->tmp
[i
].dst
.Register
.Index
= tempbase
+ i
;
239 ctx
->tmp
[i
].dst
.Register
.WriteMask
= TGSI_WRITEMASK_XYZW
;
243 /* call with YUV in tmpA.xyz */
245 yuv_to_rgb(struct tgsi_transform_context
*tctx
,
246 struct tgsi_full_dst_register
*dst
)
248 struct tgsi_yuv_transform
*ctx
= tgsi_yuv_transform(tctx
);
249 struct tgsi_full_instruction inst
;
252 * IMM[0] FLT32 { 1.164, 0.000, 1.596, 0.0 }
253 * IMM[1] FLT32 { 1.164, -0.392, -0.813, 0.0 }
254 * IMM[2] FLT32 { 1.164, 2.017, 0.000, 0.0 }
255 * IMM[3] FLT32 { 0.0625, 0.500, 0.500, 1.0 }
258 /* SUB tmpA.xyz, tmpA, imm[3] */
259 inst
= tgsi_default_full_instruction();
260 inst
.Instruction
.Opcode
= TGSI_OPCODE_ADD
;
261 inst
.Instruction
.Saturate
= 0;
262 inst
.Instruction
.NumDstRegs
= 1;
263 inst
.Instruction
.NumSrcRegs
= 2;
264 reg_dst(&inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_XYZ
);
265 reg_src(&inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(X
, Y
, Z
, _
));
266 reg_src(&inst
.Src
[1], &ctx
->imm
[3], SWIZ(X
, Y
, Z
, _
));
267 inst
.Src
[1].Register
.Negate
= 1;
268 tctx
->emit_instruction(tctx
, &inst
);
270 /* DP3 dst.x, tmpA, imm[0] */
271 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_X
) {
272 inst
= dp3_instruction();
273 reg_dst(&inst
.Dst
[0], dst
, TGSI_WRITEMASK_X
);
274 reg_src(&inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(X
, Y
, Z
, W
));
275 reg_src(&inst
.Src
[1], &ctx
->imm
[0], SWIZ(X
, Y
, Z
, W
));
276 tctx
->emit_instruction(tctx
, &inst
);
279 /* DP3 dst.y, tmpA, imm[1] */
280 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
281 inst
= dp3_instruction();
282 reg_dst(&inst
.Dst
[0], dst
, TGSI_WRITEMASK_Y
);
283 reg_src(&inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(X
, Y
, Z
, W
));
284 reg_src(&inst
.Src
[1], &ctx
->imm
[1], SWIZ(X
, Y
, Z
, W
));
285 tctx
->emit_instruction(tctx
, &inst
);
288 /* DP3 dst.z, tmpA, imm[2] */
289 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
290 inst
= dp3_instruction();
291 reg_dst(&inst
.Dst
[0], dst
, TGSI_WRITEMASK_Z
);
292 reg_src(&inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(X
, Y
, Z
, W
));
293 reg_src(&inst
.Src
[1], &ctx
->imm
[2], SWIZ(X
, Y
, Z
, W
));
294 tctx
->emit_instruction(tctx
, &inst
);
297 /* MOV dst.w, imm[0].x */
298 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_W
) {
299 inst
= mov_instruction();
300 reg_dst(&inst
.Dst
[0], dst
, TGSI_WRITEMASK_W
);
301 reg_src(&inst
.Src
[0], &ctx
->imm
[3], SWIZ(_
, _
, _
, W
));
302 tctx
->emit_instruction(tctx
, &inst
);
307 lower_nv12(struct tgsi_transform_context
*tctx
,
308 struct tgsi_full_instruction
*originst
)
310 struct tgsi_yuv_transform
*ctx
= tgsi_yuv_transform(tctx
);
311 struct tgsi_full_instruction inst
;
312 struct tgsi_full_src_register
*coord
= &originst
->Src
[0];
313 unsigned samp
= originst
->Src
[1].Register
.Index
;
316 * TEX tempA.x, coord, texture[samp], 2D;
318 inst
= tex_instruction(samp
);
319 reg_dst(&inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
320 reg_src(&inst
.Src
[0], coord
, SWIZ(X
, Y
, Z
, W
));
321 tctx
->emit_instruction(tctx
, &inst
);
324 * TEX tempB.xy, coord, texture[sampler_map[samp][0]], 2D;
325 * MOV tempA.yz, tempB._xy_
327 inst
= tex_instruction(ctx
->sampler_map
[samp
][0]);
328 reg_dst(&inst
.Dst
[0], &ctx
->tmp
[B
].dst
, TGSI_WRITEMASK_XY
);
329 reg_src(&inst
.Src
[0], coord
, SWIZ(X
, Y
, Z
, W
));
330 tctx
->emit_instruction(tctx
, &inst
);
332 inst
= mov_instruction();
333 reg_dst(&inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_YZ
);
334 reg_src(&inst
.Src
[0], &ctx
->tmp
[B
].src
, SWIZ(_
, X
, Y
, _
));
335 tctx
->emit_instruction(tctx
, &inst
);
337 /* At this point, we have YUV in tempA.xyz, rest is common: */
338 yuv_to_rgb(tctx
, &originst
->Dst
[0]);
342 lower_iyuv(struct tgsi_transform_context
*tctx
,
343 struct tgsi_full_instruction
*originst
)
345 struct tgsi_yuv_transform
*ctx
= tgsi_yuv_transform(tctx
);
346 struct tgsi_full_instruction inst
;
347 struct tgsi_full_src_register
*coord
= &originst
->Src
[0];
348 unsigned samp
= originst
->Src
[1].Register
.Index
;
351 * TEX tempA.x, coord, texture[samp], 2D;
353 inst
= tex_instruction(samp
);
354 reg_dst(&inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
355 reg_src(&inst
.Src
[0], coord
, SWIZ(X
, Y
, Z
, W
));
356 tctx
->emit_instruction(tctx
, &inst
);
359 * TEX tempB.x, coord, texture[sampler_map[samp][0]], 2D;
360 * MOV tempA.y, tempB._x__
362 inst
= tex_instruction(ctx
->sampler_map
[samp
][0]);
363 reg_dst(&inst
.Dst
[0], &ctx
->tmp
[B
].dst
, TGSI_WRITEMASK_X
);
364 reg_src(&inst
.Src
[0], coord
, SWIZ(X
, Y
, Z
, W
));
365 tctx
->emit_instruction(tctx
, &inst
);
367 inst
= mov_instruction();
368 reg_dst(&inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
369 reg_src(&inst
.Src
[0], &ctx
->tmp
[B
].src
, SWIZ(_
, X
, _
, _
));
370 tctx
->emit_instruction(tctx
, &inst
);
373 * TEX tempB.x, coord, texture[sampler_map[samp][1]], 2D;
374 * MOV tempA.z, tempB.__x_
376 inst
= tex_instruction(ctx
->sampler_map
[samp
][1]);
377 reg_dst(&inst
.Dst
[0], &ctx
->tmp
[B
].dst
, TGSI_WRITEMASK_X
);
378 reg_src(&inst
.Src
[0], coord
, SWIZ(X
, Y
, Z
, W
));
379 tctx
->emit_instruction(tctx
, &inst
);
381 inst
= mov_instruction();
382 reg_dst(&inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Z
);
383 reg_src(&inst
.Src
[0], &ctx
->tmp
[B
].src
, SWIZ(_
, _
, X
, _
));
384 tctx
->emit_instruction(tctx
, &inst
);
386 /* At this point, we have YUV in tempA.xyz, rest is common: */
387 yuv_to_rgb(tctx
, &originst
->Dst
[0]);
391 transform_instr(struct tgsi_transform_context
*tctx
,
392 struct tgsi_full_instruction
*inst
)
394 struct tgsi_yuv_transform
*ctx
= tgsi_yuv_transform(tctx
);
396 if (!ctx
->first_instruction_emitted
) {
398 ctx
->first_instruction_emitted
= true;
401 switch (inst
->Instruction
.Opcode
) {
402 /* TODO what other tex opcode's can be used w/ external eglimgs? */
403 case TGSI_OPCODE_TEX
: {
404 unsigned samp
= inst
->Src
[1].Register
.Index
;
405 if (ctx
->lower_nv12
& (1 << samp
)) {
406 lower_nv12(tctx
, inst
);
407 } else if (ctx
->lower_iyuv
& (1 << samp
)) {
408 lower_iyuv(tctx
, inst
);
416 tctx
->emit_instruction(tctx
, inst
);
421 extern const struct tgsi_token
*
422 st_tgsi_lower_yuv(const struct tgsi_token
*tokens
, unsigned free_slots
,
423 unsigned lower_nv12
, unsigned lower_iyuv
)
425 struct tgsi_yuv_transform ctx
;
426 struct tgsi_token
*newtoks
;
429 assert(!(lower_nv12
& lower_iyuv
)); /* bitmasks should be mutually exclusive */
431 // tgsi_dump(tokens, 0);
432 // debug_printf("\n");
434 memset(&ctx
, 0, sizeof(ctx
));
435 ctx
.base
.transform_instruction
= transform_instr
;
436 ctx
.free_slots
= free_slots
;
437 ctx
.lower_nv12
= lower_nv12
;
438 ctx
.lower_iyuv
= lower_iyuv
;
439 tgsi_scan_shader(tokens
, &ctx
.info
);
441 /* TODO better job of figuring out how many extra tokens we need..
442 * this is a pain about tgsi_transform :-/
444 newlen
= tgsi_num_tokens(tokens
) + 300;
445 newtoks
= tgsi_alloc_tokens(newlen
);
449 tgsi_transform_shader(tokens
, newtoks
, newlen
, &ctx
.base
);
451 // tgsi_dump(newtoks, 0);
452 // debug_printf("\n");