--- /dev/null
+/*
+ * Copyright © 2016 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdbool.h>
+
+#include "st_tgsi_lower_yuv.h"
+#include "tgsi/tgsi_transform.h"
+#include "tgsi/tgsi_scan.h"
+#include "tgsi/tgsi_dump.h"
+#include "util/u_debug.h"
+
+#include "util/bitscan.h"
+
+struct tgsi_yuv_transform {
+ struct tgsi_transform_context base;
+ struct tgsi_shader_info info;
+ struct tgsi_full_src_register imm[4];
+ struct {
+ struct tgsi_full_src_register src;
+ struct tgsi_full_dst_register dst;
+ } tmp[2];
+#define A 0
+#define B 1
+
+ /* Maps a primary sampler (used for Y) to the U or UV sampler. In
+ * case of 3-plane YUV format, the V plane is next sampler after U.
+ */
+ unsigned char sampler_map[PIPE_MAX_SAMPLERS][2];
+
+ bool first_instruction_emitted;
+ unsigned free_slots;
+ unsigned lower_nv12;
+ unsigned lower_iyuv;
+};
+
+static inline struct tgsi_yuv_transform *
+tgsi_yuv_transform(struct tgsi_transform_context *tctx)
+{
+ return (struct tgsi_yuv_transform *)tctx;
+}
+
+static void
+reg_dst(struct tgsi_full_dst_register *dst,
+ const struct tgsi_full_dst_register *orig_dst, unsigned wrmask)
+{
+ *dst = *orig_dst;
+ dst->Register.WriteMask &= wrmask;
+ assert(dst->Register.WriteMask);
+}
+
+static inline void
+get_swiz(unsigned *swiz, const struct tgsi_src_register *src)
+{
+ swiz[0] = src->SwizzleX;
+ swiz[1] = src->SwizzleY;
+ swiz[2] = src->SwizzleZ;
+ swiz[3] = src->SwizzleW;
+}
+
+static void
+reg_src(struct tgsi_full_src_register *src,
+ const struct tgsi_full_src_register *orig_src,
+ unsigned sx, unsigned sy, unsigned sz, unsigned sw)
+{
+ unsigned swiz[4];
+ get_swiz(swiz, &orig_src->Register);
+ *src = *orig_src;
+ src->Register.SwizzleX = swiz[sx];
+ src->Register.SwizzleY = swiz[sy];
+ src->Register.SwizzleZ = swiz[sz];
+ src->Register.SwizzleW = swiz[sw];
+}
+
+#define TGSI_SWIZZLE__ TGSI_SWIZZLE_X /* don't-care value! */
+#define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y, \
+ TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
+
+static inline struct tgsi_full_instruction
+tex_instruction(unsigned samp)
+{
+ struct tgsi_full_instruction inst;
+
+ inst = tgsi_default_full_instruction();
+ inst.Instruction.Opcode = TGSI_OPCODE_TEX;
+ inst.Instruction.Texture = 1;
+ inst.Texture.Texture = TGSI_TEXTURE_2D;
+ inst.Instruction.NumDstRegs = 1;
+ inst.Instruction.NumSrcRegs = 2;
+ inst.Src[1].Register.File = TGSI_FILE_SAMPLER;
+ inst.Src[1].Register.Index = samp;
+
+ return inst;
+}
+
+static inline struct tgsi_full_instruction
+mov_instruction(void)
+{
+ struct tgsi_full_instruction inst;
+
+ inst = tgsi_default_full_instruction();
+ inst.Instruction.Opcode = TGSI_OPCODE_MOV;
+ inst.Instruction.Saturate = 0;
+ inst.Instruction.NumDstRegs = 1;
+ inst.Instruction.NumSrcRegs = 1;
+
+ return inst;
+}
+
+static inline struct tgsi_full_instruction
+dp3_instruction(void)
+{
+ struct tgsi_full_instruction inst;
+
+ inst = tgsi_default_full_instruction();
+ inst.Instruction.Opcode = TGSI_OPCODE_DP3;
+ inst.Instruction.NumDstRegs = 1;
+ inst.Instruction.NumSrcRegs = 2;
+
+ return inst;
+}
+
+
+
+static void
+emit_immed(struct tgsi_transform_context *tctx, int idx,
+ float x, float y, float z, float w)
+{
+ struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
+ struct tgsi_shader_info *info = &ctx->info;
+ struct tgsi_full_immediate immed;
+
+ immed = tgsi_default_full_immediate();
+ immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */
+ immed.u[0].Float = x;
+ immed.u[1].Float = y;
+ immed.u[2].Float = z;
+ immed.u[3].Float = w;
+ tctx->emit_immediate(tctx, &immed);
+
+ ctx->imm[idx].Register.File = TGSI_FILE_IMMEDIATE;
+ ctx->imm[idx].Register.Index = info->immediate_count + idx;
+ ctx->imm[idx].Register.SwizzleX = TGSI_SWIZZLE_X;
+ ctx->imm[idx].Register.SwizzleY = TGSI_SWIZZLE_Y;
+ ctx->imm[idx].Register.SwizzleZ = TGSI_SWIZZLE_Z;
+ ctx->imm[idx].Register.SwizzleW = TGSI_SWIZZLE_W;
+}
+
+static void
+emit_samp(struct tgsi_transform_context *tctx, unsigned samp)
+{
+ tgsi_transform_sampler_decl(tctx, samp);
+ tgsi_transform_sampler_view_decl(tctx, samp, PIPE_TEXTURE_2D,
+ TGSI_RETURN_TYPE_FLOAT);
+}
+
+/* Emit extra declarations we need:
+ * + 2 TEMP to hold intermediate results
+ * + 1 (for 2-plane YUV) or 2 (for 3-plane YUV) extra samplers per
+ * lowered YUV sampler
+ * + extra immediates for doing CSC
+ */
+static void
+emit_decls(struct tgsi_transform_context *tctx)
+{
+ struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
+ struct tgsi_shader_info *info = &ctx->info;
+ unsigned mask, tempbase, i;
+ struct tgsi_full_declaration decl;
+
+ /*
+ * Declare immediates for CSC conversion:
+ */
+
+ /* ITU-R BT.601 conversion */
+ emit_immed(tctx, 0, 1.164, 0.000, 1.596, 0.0);
+ emit_immed(tctx, 1, 1.164, -0.392, -0.813, 0.0);
+ emit_immed(tctx, 2, 1.164, 2.017, 0.000, 0.0);
+ emit_immed(tctx, 3, 0.0625, 0.500, 0.500, 1.0);
+
+ /*
+ * Declare extra samplers / sampler-views:
+ */
+
+ mask = ctx->lower_nv12 | ctx->lower_iyuv;
+ while (mask) {
+ unsigned extra, y_samp = u_bit_scan(&mask);
+
+ extra = u_bit_scan(&ctx->free_slots);
+ ctx->sampler_map[y_samp][0] = extra;
+ emit_samp(tctx, extra);
+
+ if (ctx->lower_iyuv & (1 << y_samp)) {
+ extra = u_bit_scan(&ctx->free_slots);
+ ctx->sampler_map[y_samp][1] = extra;
+ emit_samp(tctx, extra);
+ }
+ }
+
+ /*
+ * Declare extra temp:
+ */
+
+ tempbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
+
+ for (i = 0; i < 2; i++) {
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_TEMPORARY;
+ decl.Range.First = decl.Range.Last = tempbase + i;
+ tctx->emit_declaration(tctx, &decl);
+
+ ctx->tmp[i].src.Register.File = TGSI_FILE_TEMPORARY;
+ ctx->tmp[i].src.Register.Index = tempbase + i;
+ ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X;
+ ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y;
+ ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z;
+ ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W;
+
+ ctx->tmp[i].dst.Register.File = TGSI_FILE_TEMPORARY;
+ ctx->tmp[i].dst.Register.Index = tempbase + i;
+ ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW;
+ }
+}
+
+/* call with YUV in tmpA.xyz */
+static void
+yuv_to_rgb(struct tgsi_transform_context *tctx,
+ struct tgsi_full_dst_register *dst)
+{
+ struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
+ struct tgsi_full_instruction inst;
+
+ /*
+ * IMM[0] FLT32 { 1.164, 0.000, 1.596, 0.0 }
+ * IMM[1] FLT32 { 1.164, -0.392, -0.813, 0.0 }
+ * IMM[2] FLT32 { 1.164, 2.017, 0.000, 0.0 }
+ * IMM[3] FLT32 { 0.0625, 0.500, 0.500, 1.0 }
+ */
+
+ /* SUB tmpA.xyz, tmpA, imm[3] */
+ inst = tgsi_default_full_instruction();
+ inst.Instruction.Opcode = TGSI_OPCODE_SUB;
+ inst.Instruction.Saturate = 0;
+ inst.Instruction.NumDstRegs = 1;
+ inst.Instruction.NumSrcRegs = 2;
+ reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZ);
+ reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, _));
+ reg_src(&inst.Src[1], &ctx->imm[3], SWIZ(X, Y, Z, _));
+ tctx->emit_instruction(tctx, &inst);
+
+ /* DP3 dst.x, tmpA, imm[0] */
+ inst = dp3_instruction();
+ reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_X);
+ reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
+ reg_src(&inst.Src[1], &ctx->imm[0], SWIZ(X, Y, Z, W));
+ tctx->emit_instruction(tctx, &inst);
+
+ /* DP3 dst.y, tmpA, imm[1] */
+ inst = dp3_instruction();
+ reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Y);
+ reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
+ reg_src(&inst.Src[1], &ctx->imm[1], SWIZ(X, Y, Z, W));
+ tctx->emit_instruction(tctx, &inst);
+
+ /* DP3 dst.z, tmpA, imm[2] */
+ inst = dp3_instruction();
+ reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Z);
+ reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
+ reg_src(&inst.Src[1], &ctx->imm[2], SWIZ(X, Y, Z, W));
+ tctx->emit_instruction(tctx, &inst);
+
+ /* MOV dst.w, imm[0].x */
+ inst = mov_instruction();
+ reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_W);
+ reg_src(&inst.Src[0], &ctx->imm[3], SWIZ(_, _, _, W));
+ tctx->emit_instruction(tctx, &inst);
+}
+
+static void
+lower_nv12(struct tgsi_transform_context *tctx,
+ struct tgsi_full_instruction *originst)
+{
+ struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
+ struct tgsi_full_instruction inst;
+ struct tgsi_full_src_register *coord = &originst->Src[0];
+ unsigned samp = originst->Src[1].Register.Index;
+
+ /* sample Y:
+ * TEX tempA.x, coord, texture[samp], 2D;
+ */
+ inst = tex_instruction(samp);
+ reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
+ reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
+ tctx->emit_instruction(tctx, &inst);
+
+ /* sample UV:
+ * TEX tempB.xy, coord, texture[sampler_map[samp][0]], 2D;
+ * MOV tempA.yz, tempB._xy_
+ */
+ inst = tex_instruction(ctx->sampler_map[samp][0]);
+ reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XY);
+ reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
+ tctx->emit_instruction(tctx, &inst);
+
+ inst = mov_instruction();
+ reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_YZ);
+ reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, X, Y, _));
+ tctx->emit_instruction(tctx, &inst);
+
+ /* At this point, we have YUV in tempA.xyz, rest is common: */
+ yuv_to_rgb(tctx, &originst->Dst[0]);
+}
+
+static void
+lower_iyuv(struct tgsi_transform_context *tctx,
+ struct tgsi_full_instruction *originst)
+{
+ struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
+ struct tgsi_full_instruction inst;
+ struct tgsi_full_src_register *coord = &originst->Src[0];
+ unsigned samp = originst->Src[1].Register.Index;
+
+ /* sample Y:
+ * TEX tempA.x, coord, texture[samp], 2D;
+ */
+ inst = tex_instruction(samp);
+ reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
+ reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
+ tctx->emit_instruction(tctx, &inst);
+
+ /* sample U:
+ * TEX tempB.x, coord, texture[sampler_map[samp][0]], 2D;
+ * MOV tempA.y, tempB._x__
+ */
+ inst = tex_instruction(ctx->sampler_map[samp][0]);
+ reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
+ reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
+ tctx->emit_instruction(tctx, &inst);
+
+ inst = mov_instruction();
+ reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
+ reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, X, _, _));
+ tctx->emit_instruction(tctx, &inst);
+
+ /* sample V:
+ * TEX tempB.x, coord, texture[sampler_map[samp][1]], 2D;
+ * MOV tempA.z, tempB.__x_
+ */
+ inst = tex_instruction(ctx->sampler_map[samp][1]);
+ reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
+ reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
+ tctx->emit_instruction(tctx, &inst);
+
+ inst = mov_instruction();
+ reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
+ reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, _, X, _));
+ tctx->emit_instruction(tctx, &inst);
+
+ /* At this point, we have YUV in tempA.xyz, rest is common: */
+ yuv_to_rgb(tctx, &originst->Dst[0]);
+}
+
+static void
+transform_instr(struct tgsi_transform_context *tctx,
+ struct tgsi_full_instruction *inst)
+{
+ struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
+
+ if (!ctx->first_instruction_emitted) {
+ emit_decls(tctx);
+ ctx->first_instruction_emitted = true;
+ }
+
+ switch (inst->Instruction.Opcode) {
+ /* TODO what other tex opcode's can be used w/ external eglimgs? */
+ case TGSI_OPCODE_TEX: {
+ unsigned samp = inst->Src[1].Register.Index;
+ if (ctx->lower_nv12 & (1 << samp)) {
+ lower_nv12(tctx, inst);
+ } else if (ctx->lower_iyuv & (1 << samp)) {
+ lower_iyuv(tctx, inst);
+ } else {
+ goto skip;
+ }
+ break;
+ }
+ default:
+ skip:
+ tctx->emit_instruction(tctx, inst);
+ return;
+ }
+}
+
+extern const struct tgsi_token *
+st_tgsi_lower_yuv(const struct tgsi_token *tokens, unsigned free_slots,
+ unsigned lower_nv12, unsigned lower_iyuv)
+{
+ struct tgsi_yuv_transform ctx;
+ struct tgsi_token *newtoks;
+ int newlen;
+
+ assert(!(lower_nv12 & lower_iyuv)); /* bitmasks should be mutually exclusive */
+
+// tgsi_dump(tokens, 0);
+// debug_printf("\n");
+
+ memset(&ctx, 0, sizeof(ctx));
+ ctx.base.transform_instruction = transform_instr;
+ ctx.free_slots = free_slots;
+ ctx.lower_nv12 = lower_nv12;
+ ctx.lower_iyuv = lower_iyuv;
+ tgsi_scan_shader(tokens, &ctx.info);
+
+ /* TODO better job of figuring out how many extra tokens we need..
+ * this is a pain about tgsi_transform :-/
+ */
+ newlen = tgsi_num_tokens(tokens) + 120;
+ newtoks = tgsi_alloc_tokens(newlen);
+ if (!newtoks)
+ return NULL;
+
+ tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
+
+// tgsi_dump(newtoks, 0);
+// debug_printf("\n");
+
+ return newtoks;
+}