From: Rob Clark Date: Fri, 2 Sep 2016 14:42:22 +0000 (-0400) Subject: mesa/st: add lowering pass for YUV samplers X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=c2a60cacd4428c7fd8d6f6160edcde93e7b93284;p=mesa.git mesa/st: add lowering pass for YUV samplers Signed-off-by: Rob Clark --- diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources index 5d51f16db55..c04e0f96469 100644 --- a/src/mesa/Makefile.sources +++ b/src/mesa/Makefile.sources @@ -513,6 +513,8 @@ STATETRACKER_FILES = \ state_tracker/st_scissor.h \ state_tracker/st_texture.c \ state_tracker/st_texture.h \ + state_tracker/st_tgsi_lower_yuv.c \ + state_tracker/st_tgsi_lower_yuv.h \ state_tracker/st_vdpau.c \ state_tracker/st_vdpau.h diff --git a/src/mesa/state_tracker/st_tgsi_lower_yuv.c b/src/mesa/state_tracker/st_tgsi_lower_yuv.c new file mode 100644 index 00000000000..e346b970855 --- /dev/null +++ b/src/mesa/state_tracker/st_tgsi_lower_yuv.c @@ -0,0 +1,447 @@ +/* + * Copyright © 2016 Red Hat + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#include "st_tgsi_lower_yuv.h" +#include "tgsi/tgsi_transform.h" +#include "tgsi/tgsi_scan.h" +#include "tgsi/tgsi_dump.h" +#include "util/u_debug.h" + +#include "util/bitscan.h" + +struct tgsi_yuv_transform { + struct tgsi_transform_context base; + struct tgsi_shader_info info; + struct tgsi_full_src_register imm[4]; + struct { + struct tgsi_full_src_register src; + struct tgsi_full_dst_register dst; + } tmp[2]; +#define A 0 +#define B 1 + + /* Maps a primary sampler (used for Y) to the U or UV sampler. In + * case of 3-plane YUV format, the V plane is next sampler after U. + */ + unsigned char sampler_map[PIPE_MAX_SAMPLERS][2]; + + bool first_instruction_emitted; + unsigned free_slots; + unsigned lower_nv12; + unsigned lower_iyuv; +}; + +static inline struct tgsi_yuv_transform * +tgsi_yuv_transform(struct tgsi_transform_context *tctx) +{ + return (struct tgsi_yuv_transform *)tctx; +} + +static void +reg_dst(struct tgsi_full_dst_register *dst, + const struct tgsi_full_dst_register *orig_dst, unsigned wrmask) +{ + *dst = *orig_dst; + dst->Register.WriteMask &= wrmask; + assert(dst->Register.WriteMask); +} + +static inline void +get_swiz(unsigned *swiz, const struct tgsi_src_register *src) +{ + swiz[0] = src->SwizzleX; + swiz[1] = src->SwizzleY; + swiz[2] = src->SwizzleZ; + swiz[3] = src->SwizzleW; +} + +static void +reg_src(struct tgsi_full_src_register *src, + const struct tgsi_full_src_register *orig_src, + unsigned sx, unsigned sy, unsigned sz, unsigned sw) +{ + unsigned swiz[4]; + get_swiz(swiz, &orig_src->Register); + *src = *orig_src; + src->Register.SwizzleX = swiz[sx]; + src->Register.SwizzleY = swiz[sy]; + src->Register.SwizzleZ = swiz[sz]; + src->Register.SwizzleW = swiz[sw]; +} + +#define TGSI_SWIZZLE__ TGSI_SWIZZLE_X /* don't-care value! */ +#define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y, \ + TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w + +static inline struct tgsi_full_instruction +tex_instruction(unsigned samp) +{ + struct tgsi_full_instruction inst; + + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_TEX; + inst.Instruction.Texture = 1; + inst.Texture.Texture = TGSI_TEXTURE_2D; + inst.Instruction.NumDstRegs = 1; + inst.Instruction.NumSrcRegs = 2; + inst.Src[1].Register.File = TGSI_FILE_SAMPLER; + inst.Src[1].Register.Index = samp; + + return inst; +} + +static inline struct tgsi_full_instruction +mov_instruction(void) +{ + struct tgsi_full_instruction inst; + + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MOV; + inst.Instruction.Saturate = 0; + inst.Instruction.NumDstRegs = 1; + inst.Instruction.NumSrcRegs = 1; + + return inst; +} + +static inline struct tgsi_full_instruction +dp3_instruction(void) +{ + struct tgsi_full_instruction inst; + + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_DP3; + inst.Instruction.NumDstRegs = 1; + inst.Instruction.NumSrcRegs = 2; + + return inst; +} + + + +static void +emit_immed(struct tgsi_transform_context *tctx, int idx, + float x, float y, float z, float w) +{ + struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx); + struct tgsi_shader_info *info = &ctx->info; + struct tgsi_full_immediate immed; + + immed = tgsi_default_full_immediate(); + immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */ + immed.u[0].Float = x; + immed.u[1].Float = y; + immed.u[2].Float = z; + immed.u[3].Float = w; + tctx->emit_immediate(tctx, &immed); + + ctx->imm[idx].Register.File = TGSI_FILE_IMMEDIATE; + ctx->imm[idx].Register.Index = info->immediate_count + idx; + ctx->imm[idx].Register.SwizzleX = TGSI_SWIZZLE_X; + ctx->imm[idx].Register.SwizzleY = TGSI_SWIZZLE_Y; + ctx->imm[idx].Register.SwizzleZ = TGSI_SWIZZLE_Z; + ctx->imm[idx].Register.SwizzleW = TGSI_SWIZZLE_W; +} + +static void +emit_samp(struct tgsi_transform_context *tctx, unsigned samp) +{ + tgsi_transform_sampler_decl(tctx, samp); + tgsi_transform_sampler_view_decl(tctx, samp, PIPE_TEXTURE_2D, + TGSI_RETURN_TYPE_FLOAT); +} + +/* Emit extra declarations we need: + * + 2 TEMP to hold intermediate results + * + 1 (for 2-plane YUV) or 2 (for 3-plane YUV) extra samplers per + * lowered YUV sampler + * + extra immediates for doing CSC + */ +static void +emit_decls(struct tgsi_transform_context *tctx) +{ + struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx); + struct tgsi_shader_info *info = &ctx->info; + unsigned mask, tempbase, i; + struct tgsi_full_declaration decl; + + /* + * Declare immediates for CSC conversion: + */ + + /* ITU-R BT.601 conversion */ + emit_immed(tctx, 0, 1.164, 0.000, 1.596, 0.0); + emit_immed(tctx, 1, 1.164, -0.392, -0.813, 0.0); + emit_immed(tctx, 2, 1.164, 2.017, 0.000, 0.0); + emit_immed(tctx, 3, 0.0625, 0.500, 0.500, 1.0); + + /* + * Declare extra samplers / sampler-views: + */ + + mask = ctx->lower_nv12 | ctx->lower_iyuv; + while (mask) { + unsigned extra, y_samp = u_bit_scan(&mask); + + extra = u_bit_scan(&ctx->free_slots); + ctx->sampler_map[y_samp][0] = extra; + emit_samp(tctx, extra); + + if (ctx->lower_iyuv & (1 << y_samp)) { + extra = u_bit_scan(&ctx->free_slots); + ctx->sampler_map[y_samp][1] = extra; + emit_samp(tctx, extra); + } + } + + /* + * Declare extra temp: + */ + + tempbase = info->file_max[TGSI_FILE_TEMPORARY] + 1; + + for (i = 0; i < 2; i++) { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.Range.First = decl.Range.Last = tempbase + i; + tctx->emit_declaration(tctx, &decl); + + ctx->tmp[i].src.Register.File = TGSI_FILE_TEMPORARY; + ctx->tmp[i].src.Register.Index = tempbase + i; + ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X; + ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y; + ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z; + ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W; + + ctx->tmp[i].dst.Register.File = TGSI_FILE_TEMPORARY; + ctx->tmp[i].dst.Register.Index = tempbase + i; + ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW; + } +} + +/* call with YUV in tmpA.xyz */ +static void +yuv_to_rgb(struct tgsi_transform_context *tctx, + struct tgsi_full_dst_register *dst) +{ + struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx); + struct tgsi_full_instruction inst; + + /* + * IMM[0] FLT32 { 1.164, 0.000, 1.596, 0.0 } + * IMM[1] FLT32 { 1.164, -0.392, -0.813, 0.0 } + * IMM[2] FLT32 { 1.164, 2.017, 0.000, 0.0 } + * IMM[3] FLT32 { 0.0625, 0.500, 0.500, 1.0 } + */ + + /* SUB tmpA.xyz, tmpA, imm[3] */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_SUB; + inst.Instruction.Saturate = 0; + inst.Instruction.NumDstRegs = 1; + inst.Instruction.NumSrcRegs = 2; + reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZ); + reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, _)); + reg_src(&inst.Src[1], &ctx->imm[3], SWIZ(X, Y, Z, _)); + tctx->emit_instruction(tctx, &inst); + + /* DP3 dst.x, tmpA, imm[0] */ + inst = dp3_instruction(); + reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_X); + reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); + reg_src(&inst.Src[1], &ctx->imm[0], SWIZ(X, Y, Z, W)); + tctx->emit_instruction(tctx, &inst); + + /* DP3 dst.y, tmpA, imm[1] */ + inst = dp3_instruction(); + reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Y); + reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); + reg_src(&inst.Src[1], &ctx->imm[1], SWIZ(X, Y, Z, W)); + tctx->emit_instruction(tctx, &inst); + + /* DP3 dst.z, tmpA, imm[2] */ + inst = dp3_instruction(); + reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Z); + reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); + reg_src(&inst.Src[1], &ctx->imm[2], SWIZ(X, Y, Z, W)); + tctx->emit_instruction(tctx, &inst); + + /* MOV dst.w, imm[0].x */ + inst = mov_instruction(); + reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_W); + reg_src(&inst.Src[0], &ctx->imm[3], SWIZ(_, _, _, W)); + tctx->emit_instruction(tctx, &inst); +} + +static void +lower_nv12(struct tgsi_transform_context *tctx, + struct tgsi_full_instruction *originst) +{ + struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx); + struct tgsi_full_instruction inst; + struct tgsi_full_src_register *coord = &originst->Src[0]; + unsigned samp = originst->Src[1].Register.Index; + + /* sample Y: + * TEX tempA.x, coord, texture[samp], 2D; + */ + inst = tex_instruction(samp); + reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); + reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W)); + tctx->emit_instruction(tctx, &inst); + + /* sample UV: + * TEX tempB.xy, coord, texture[sampler_map[samp][0]], 2D; + * MOV tempA.yz, tempB._xy_ + */ + inst = tex_instruction(ctx->sampler_map[samp][0]); + reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XY); + reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W)); + tctx->emit_instruction(tctx, &inst); + + inst = mov_instruction(); + reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_YZ); + reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, X, Y, _)); + tctx->emit_instruction(tctx, &inst); + + /* At this point, we have YUV in tempA.xyz, rest is common: */ + yuv_to_rgb(tctx, &originst->Dst[0]); +} + +static void +lower_iyuv(struct tgsi_transform_context *tctx, + struct tgsi_full_instruction *originst) +{ + struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx); + struct tgsi_full_instruction inst; + struct tgsi_full_src_register *coord = &originst->Src[0]; + unsigned samp = originst->Src[1].Register.Index; + + /* sample Y: + * TEX tempA.x, coord, texture[samp], 2D; + */ + inst = tex_instruction(samp); + reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); + reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W)); + tctx->emit_instruction(tctx, &inst); + + /* sample U: + * TEX tempB.x, coord, texture[sampler_map[samp][0]], 2D; + * MOV tempA.y, tempB._x__ + */ + inst = tex_instruction(ctx->sampler_map[samp][0]); + reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X); + reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W)); + tctx->emit_instruction(tctx, &inst); + + inst = mov_instruction(); + reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); + reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, X, _, _)); + tctx->emit_instruction(tctx, &inst); + + /* sample V: + * TEX tempB.x, coord, texture[sampler_map[samp][1]], 2D; + * MOV tempA.z, tempB.__x_ + */ + inst = tex_instruction(ctx->sampler_map[samp][1]); + reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X); + reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W)); + tctx->emit_instruction(tctx, &inst); + + inst = mov_instruction(); + reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z); + reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, _, X, _)); + tctx->emit_instruction(tctx, &inst); + + /* At this point, we have YUV in tempA.xyz, rest is common: */ + yuv_to_rgb(tctx, &originst->Dst[0]); +} + +static void +transform_instr(struct tgsi_transform_context *tctx, + struct tgsi_full_instruction *inst) +{ + struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx); + + if (!ctx->first_instruction_emitted) { + emit_decls(tctx); + ctx->first_instruction_emitted = true; + } + + switch (inst->Instruction.Opcode) { + /* TODO what other tex opcode's can be used w/ external eglimgs? */ + case TGSI_OPCODE_TEX: { + unsigned samp = inst->Src[1].Register.Index; + if (ctx->lower_nv12 & (1 << samp)) { + lower_nv12(tctx, inst); + } else if (ctx->lower_iyuv & (1 << samp)) { + lower_iyuv(tctx, inst); + } else { + goto skip; + } + break; + } + default: + skip: + tctx->emit_instruction(tctx, inst); + return; + } +} + +extern const struct tgsi_token * +st_tgsi_lower_yuv(const struct tgsi_token *tokens, unsigned free_slots, + unsigned lower_nv12, unsigned lower_iyuv) +{ + struct tgsi_yuv_transform ctx; + struct tgsi_token *newtoks; + int newlen; + + assert(!(lower_nv12 & lower_iyuv)); /* bitmasks should be mutually exclusive */ + +// tgsi_dump(tokens, 0); +// debug_printf("\n"); + + memset(&ctx, 0, sizeof(ctx)); + ctx.base.transform_instruction = transform_instr; + ctx.free_slots = free_slots; + ctx.lower_nv12 = lower_nv12; + ctx.lower_iyuv = lower_iyuv; + tgsi_scan_shader(tokens, &ctx.info); + + /* TODO better job of figuring out how many extra tokens we need.. + * this is a pain about tgsi_transform :-/ + */ + newlen = tgsi_num_tokens(tokens) + 120; + newtoks = tgsi_alloc_tokens(newlen); + if (!newtoks) + return NULL; + + tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base); + +// tgsi_dump(newtoks, 0); +// debug_printf("\n"); + + return newtoks; +} diff --git a/src/mesa/state_tracker/st_tgsi_lower_yuv.h b/src/mesa/state_tracker/st_tgsi_lower_yuv.h new file mode 100644 index 00000000000..c46423b294a --- /dev/null +++ b/src/mesa/state_tracker/st_tgsi_lower_yuv.h @@ -0,0 +1,34 @@ +/* + * Copyright © 2016 Red Hat + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ST_TGSI_LOWER_YUV_H +#define ST_TGSI_LOWER_YUV_H + +struct tgsi_token; + +extern const struct tgsi_token * st_tgsi_lower_yuv(const struct tgsi_token *tokens, + unsigned free_slots, + unsigned lower_nv12, + unsigned lower_iyuv); + +#endif /* ST_TGSI_LOWER_YUV_H */