From 608b3c4432f7b7b0c27fc22369e09c8b7d8cfc03 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sun, 19 Dec 2010 21:49:32 +0100 Subject: [PATCH] nvc0: improve shader support for texturing Fixed shadow and cube texture fetches, add array texture fetches. --- src/gallium/drivers/nvc0/nvc0_pc.c | 2 +- src/gallium/drivers/nvc0/nvc0_pc.h | 12 +- src/gallium/drivers/nvc0/nvc0_pc_emit.c | 28 +++-- src/gallium/drivers/nvc0/nvc0_pc_print.c | 2 + src/gallium/drivers/nvc0/nvc0_pc_regalloc.c | 2 + src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 132 +++++++++++++++----- 6 files changed, 133 insertions(+), 45 deletions(-) diff --git a/src/gallium/drivers/nvc0/nvc0_pc.c b/src/gallium/drivers/nvc0/nvc0_pc.c index cf7b8e347fb..72483f120ed 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.c +++ b/src/gallium/drivers/nvc0/nvc0_pc.c @@ -397,7 +397,7 @@ nvc0_generate_code(struct nvc0_translation_info *ti) if (ret) goto out; #if NOUVEAU_DEBUG > 1 - nv_print_program(pc); + nvc0_print_program(pc); nv_print_cfgraph(pc, "nvc0_shader_cfgraph.dot", 0); #endif diff --git a/src/gallium/drivers/nvc0/nvc0_pc.h b/src/gallium/drivers/nvc0/nvc0_pc.h index df0314965a3..74867f02e72 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.h +++ b/src/gallium/drivers/nvc0/nvc0_pc.h @@ -52,7 +52,8 @@ #define NV_OP_NOP 5 /** - * BIND forces source operand i into the same register as destination operand i + * BIND forces source operand i into the same register as destination operand i, + * and the operands will be assigned consecutive registers (needed for TEX) * SELECT forces its multiple source operands and its destination operand into * one and the same register. */ @@ -152,8 +153,9 @@ #define NV_OP_SUB_S32 81 #define NV_OP_MAD_F32 NV_OP_MAD #define NV_OP_FSET_F32 82 +#define NV_OP_TXG 83 -#define NV_OP_COUNT 83 +#define NV_OP_COUNT 84 /* nv50 files omitted */ #define NV_FILE_GPR 0 @@ -380,9 +382,11 @@ struct nv_instruction { unsigned flat : 1; unsigned patch : 1; unsigned lanes : 4; /* 3rd byte */ - unsigned tex_argc : 3; + unsigned tex_dim : 2; + unsigned tex_array : 1; + unsigned tex_cube : 1; + unsigned tex_shadow : 1; /* 4th byte */ unsigned tex_live : 1; - unsigned tex_cube : 1; /* 4th byte */ unsigned tex_mask : 4; uint8_t quadop; diff --git a/src/gallium/drivers/nvc0/nvc0_pc_emit.c b/src/gallium/drivers/nvc0/nvc0_pc_emit.c index cd1ad03b00b..2f99d5a339f 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_emit.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_emit.c @@ -391,23 +391,37 @@ emit_minmax(struct nv_pc *pc, struct nv_instruction *i) static void emit_tex(struct nv_pc *pc, struct nv_instruction *i) { + int src1 = i->tex_array + i->tex_dim + i->tex_cube; + pc->emit[0] = 0x00000086; pc->emit[1] = 0x80000000; - if (i->opcode == NV_OP_TXB) pc->emit[1] |= 0x04000000; - else - if (i->opcode == NV_OP_TXL) pc->emit[1] |= 0x06000000; + switch (i->opcode) { + case NV_OP_TEX: pc->emit[1] = 0x80000000; break; + case NV_OP_TXB: pc->emit[1] = 0x84000000; break; + case NV_OP_TXL: pc->emit[1] = 0x86000000; break; + case NV_OP_TXF: pc->emit[1] = 0x90000000; break; + case NV_OP_TXG: pc->emit[1] = 0xe0000000; break; + default: + assert(0); + break; + } - set_pred(pc, i); + if (i->tex_array) + pc->emit[1] |= 0x00080000; /* layer index is u16, first value of SRC0 */ + if (i->tex_shadow) + pc->emit[1] |= 0x01000000; /* shadow is part of SRC1, after bias/lod */ - if (1) - pc->emit[0] |= 63 << 26; /* explicit derivatives */ + set_pred(pc, i); DID(pc, i->def[0], 14); SID(pc, i->src[0], 20); + SID(pc, i->src[src1], 26); /* may be NULL -> $r63 */ pc->emit[1] |= i->tex_mask << 14; - pc->emit[1] |= (i->tex_argc - 1) << 20; + pc->emit[1] |= (i->tex_dim - 1) << 20; + if (i->tex_cube) + pc->emit[1] |= 3 << 20; assert(i->ext.tex.s < 16); diff --git a/src/gallium/drivers/nvc0/nvc0_pc_print.c b/src/gallium/drivers/nvc0/nvc0_pc_print.c index 9eac5ad900a..6249f1fd1cd 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_print.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_print.c @@ -371,5 +371,7 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] = { NV_OP_FSET_F32, "fset", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 }, + { NV_OP_TXG, "texgrad", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 }, + { NV_OP_UNDEF, "BAD_OP", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 } }; diff --git a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c index 6f9d5de1976..d24f09a1507 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c @@ -492,6 +492,8 @@ pass_join_values(struct nv_pc_pass *ctx, int iter) case NV_OP_TXB: case NV_OP_TXL: case NV_OP_TXQ: + /* on nvc0, TEX src and dst can differ */ + break; case NV_OP_BIND: if (iter) break; diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index d0c82754894..fecfc76fb79 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -1156,30 +1156,59 @@ bld_lit(struct bld_context *bld, struct nv_value *dst0[4], } static INLINE void -get_tex_dim(const struct tgsi_full_instruction *insn, int *dim, int *arg) +describe_texture_target(unsigned target, int *dim, + int *array, int *cube, int *shadow) { - switch (insn->Texture.Texture) { + *array = *cube = *shadow = 0; + + switch (target) { case TGSI_TEXTURE_1D: - *arg = *dim = 1; + *dim = 1; break; case TGSI_TEXTURE_SHADOW1D: - *dim = 1; - *arg = 2; + *dim = *shadow = 1; break; case TGSI_TEXTURE_UNKNOWN: case TGSI_TEXTURE_2D: case TGSI_TEXTURE_RECT: - *arg = *dim = 2; + *dim = 2; break; case TGSI_TEXTURE_SHADOW2D: case TGSI_TEXTURE_SHADOWRECT: *dim = 2; - *arg = 3; + *shadow = 1; break; case TGSI_TEXTURE_3D: + *dim = 3; + break; case TGSI_TEXTURE_CUBE: - *dim = *arg = 3; + *dim = 2; + *cube = 1; + break; + /* + case TGSI_TEXTURE_CUBE_ARRAY: + *dim = 2; + *cube = *array = 1; break; + case TGSI_TEXTURE_1D_ARRAY: + *dim = *array = 1; + break; + case TGSI_TEXTURE_2D_ARRAY: + *dim = 2; + *array = 1; + break; + case TGSI_TEXTURE_SHADOW1D_ARRAY: + *dim = *array = *shadow = 1; + break; + case TGSI_TEXTURE_SHADOW2D_ARRAY: + *dim = 2; + *array = *shadow = 1; + break; + case TGSI_TEXTURE_CUBE_ARRAY: + *dim = 2; + *array = *cube = 1; + break; + */ default: assert(0); break; @@ -1215,13 +1244,13 @@ bld_clone(struct bld_context *bld, struct nv_instruction *nvi) /* NOTE: proj(t0) = (t0 / w) / (tc3 / w) = tc0 / tc2 handled by optimizer */ static void load_proj_tex_coords(struct bld_context *bld, - struct nv_value *t[4], int dim, int arg, + struct nv_value *t[4], int dim, int shadow, const struct tgsi_full_instruction *insn) { int c; unsigned mask = (1 << dim) - 1; - if (arg != dim) + if (shadow) mask |= 4; /* depth comparison value */ t[3] = emit_fetch(bld, insn, 0, 3); @@ -1279,33 +1308,68 @@ bld_quadop(struct bld_context *bld, ubyte qop, struct nv_value *src0, int lane, return val; } +/* order of TGSI operands: x y z layer shadow lod/bias */ +/* order of native operands: layer x y z | lod/bias shadow */ static struct nv_instruction * -emit_tex(struct bld_context *bld, uint opcode, - struct nv_value *dst[4], struct nv_value *t_in[4], - int argc, int tic, int tsc, int cube) +emit_tex(struct bld_context *bld, uint opcode, int tic, int tsc, + struct nv_value *dst[4], struct nv_value *arg[4], + int dim, int array, int cube, int shadow) { - struct nv_value *t[4]; - struct nv_instruction *nvi; + struct nv_value *src[4]; + struct nv_instruction *nvi, *bnd; int c; + int s = 0; + boolean lodbias = opcode == NV_OP_TXB || opcode == NV_OP_TXL; + + if (array) + arg[dim] = bld_cvt(bld, NV_TYPE_U32, NV_TYPE_F32, arg[dim]); + + /* ensure that all inputs reside in a GPR */ + for (c = 0; c < dim + array + cube + shadow; ++c) + (src[c] = bld_insn_1(bld, NV_OP_MOV, arg[c]))->insn->fixed = 1; + + /* bind { layer x y z } and { lod/bias shadow } to adjacent regs */ + + bnd = new_instruction(bld->pc, NV_OP_BIND); + if (array) { + src[s] = new_value(bld->pc, NV_FILE_GPR, 4); + bld_def(bnd, s, src[s]); + nv_reference(bld->pc, bnd, s++, arg[dim + cube]); + } + for (c = 0; c < dim + cube; ++c, ++s) { + src[s] = bld_def(bnd, s, new_value(bld->pc, NV_FILE_GPR, 4)); + nv_reference(bld->pc, bnd, s, arg[c]); + } + + if (shadow || lodbias) { + bnd = new_instruction(bld->pc, NV_OP_BIND); - /* the inputs to a tex instruction must be separate values */ - for (c = 0; c < argc; ++c) { - t[c] = bld_insn_1(bld, NV_OP_MOV, t_in[c]); - t[c]->insn->fixed = 1; + if (lodbias) { + src[s] = new_value(bld->pc, NV_FILE_GPR, 4); + bld_def(bnd, 0, src[s++]); + nv_reference(bld->pc, bnd, 0, arg[dim + cube + array + shadow]); + } + if (shadow) { + src[s] = new_value(bld->pc, NV_FILE_GPR, 4); + bld_def(bnd, lodbias, src[s++]); + nv_reference(bld->pc, bnd, lodbias, arg[dim + cube + array]); + } } nvi = new_instruction(bld->pc, opcode); for (c = 0; c < 4; ++c) dst[c] = bld_def(nvi, c, new_value(bld->pc, NV_FILE_GPR, 4)); - for (c = 0; c < argc; ++c) - nv_reference(bld->pc, nvi, c, t[c]); + for (c = 0; c < s; ++c) + nv_reference(bld->pc, nvi, c, src[c]); nvi->ext.tex.t = tic; nvi->ext.tex.s = tsc; nvi->tex_mask = 0xf; nvi->tex_cube = cube; + nvi->tex_dim = dim; + nvi->tex_cube = cube; + nvi->tex_shadow = shadow; nvi->tex_live = 0; - nvi->tex_argc = argc; return nvi; } @@ -1326,24 +1390,25 @@ bld_tex(struct bld_context *bld, struct nv_value *dst0[4], { struct nv_value *t[4], *s[3]; uint opcode = translate_opcode(insn->Instruction.Opcode); - int arg, dim, c; + int c, dim, array, cube, shadow; + const int lodbias = opcode == NV_OP_TXB || opcode == NV_OP_TXL; const int tic = insn->Src[1].Register.Index; const int tsc = tic; - const int cube = (insn->Texture.Texture == TGSI_TEXTURE_CUBE) ? 1 : 0; - get_tex_dim(insn, &dim, &arg); + describe_texture_target(insn->Texture.Texture, &dim, &array, &cube, &shadow); + + assert(dim + array + shadow + lodbias <= 5); if (!cube && insn->Instruction.Opcode == TGSI_OPCODE_TXP) - load_proj_tex_coords(bld, t, dim, arg, insn); + load_proj_tex_coords(bld, t, dim, shadow, insn); else { - for (c = 0; c < dim; ++c) + for (c = 0; c < dim + cube + array; ++c) t[c] = emit_fetch(bld, insn, 0, c); - if (arg != dim) - t[dim] = emit_fetch(bld, insn, 0, 2); + if (shadow) + t[c] = emit_fetch(bld, insn, 0, MAX2(c, 2)); } if (cube) { - assert(dim >= 3); for (c = 0; c < 3; ++c) s[c] = bld_insn_1(bld, NV_OP_ABS_F32, t[c]); @@ -1355,9 +1420,10 @@ bld_tex(struct bld_context *bld, struct nv_value *dst0[4], t[c] = bld_insn_2(bld, NV_OP_MUL_F32, t[c], s[0]); } - if (opcode == NV_OP_TXB || opcode == NV_OP_TXL) - t[arg++] = emit_fetch(bld, insn, 0, 3); - emit_tex(bld, opcode, dst0, t, arg, tic, tsc, cube); + if (lodbias) + t[dim + cube + array + shadow] = emit_fetch(bld, insn, 0, 3); + + emit_tex(bld, opcode, tic, tsc, dst0, t, dim, array, cube, shadow); } static INLINE struct nv_value * -- 2.30.2