From 5d61b6f1f64ca26dd038af0679873ef0353660dd Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 31 Oct 2009 15:05:01 +0000 Subject: [PATCH] i965g: wip on fragment shaders --- src/gallium/drivers/i965/brw_wm.h | 63 +- src/gallium/drivers/i965/brw_wm_fp.c | 871 ++++++++++++++++++++------- 2 files changed, 698 insertions(+), 236 deletions(-) diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h index 2cd5bb70818..8ee99420aa5 100644 --- a/src/gallium/drivers/i965/brw_wm.h +++ b/src/gallium/drivers/i965/brw_wm.h @@ -74,6 +74,7 @@ struct brw_wm_prog_key { GLuint vp_nr_outputs:6; GLuint nr_cbufs:3; + GLuint has_flow_control:1; GLuint program_string_id; }; @@ -176,9 +177,36 @@ struct brw_wm_instruction { #define MAX_WM_OPCODE (MAX_OPCODE + 9) #define BRW_FILE_PAYLOAD (TGSI_FILE_COUNT) -#define PAYLOAD_DEPTH (FRAG_ATTRIB_MAX) /* ?? */ +#define PAYLOAD_DEPTH (PIPE_MAX_SHADER_INPUTS) /* ?? */ + + +struct brw_fp_src { + unsigned file:4; + unsigned index:16; + unsigned swizzle:8; + unsigned indirect:1; + unsigned negate:1; + unsigned abs:1; +}; + +struct brw_fp_dst { + unsigned file:4; + unsigned index:16; + unsigned writemask:4; + unsigned indirect:1; + unsigned saturate:1; +}; + +struct brw_fp_instruction { + struct brw_fp_dst dst; + struct brw_fp_src src[3]; + unsigned opcode:8; + unsigned tex_unit:4; + unsigned tex_target:4; + unsigned target:10; /* destination surface for FB_WRITE */ + unsigned eot:1; /* mark last instruction (usually FB_WRITE) */ +}; -struct brw_passfp_program; struct brw_wm_compile { struct brw_compile func; @@ -198,9 +226,26 @@ struct brw_wm_compile { * simplifying and adding instructions for interpolation and * framebuffer writes. */ - struct brw_passfp_program *pass_fp; - - + struct { + GLfloat v[4]; + unsigned nr; + } immediate[BRW_WM_MAX_CONST+3]; + GLuint nr_immediates; + + struct brw_fp_instruction fp_instructions[BRW_WM_MAX_INSN]; + GLuint nr_fp_insns; + GLuint fp_temp; + GLuint fp_interp_emitted; + GLuint fp_fragcolor_emitted; + GLuint fp_first_internal_temp; + + struct brw_fp_src fp_pixel_xy; + struct brw_fp_src fp_delta_xy; + struct brw_fp_src fp_pixel_w; + + + /* Subsequent passes using SSA representation: + */ struct brw_wm_value vreg[BRW_WM_MAX_VREG]; GLuint nr_vreg; @@ -213,7 +258,7 @@ struct brw_wm_compile { } payload; - const struct brw_wm_ref *pass0_fp_reg[PROGRAM_PAYLOAD+1][256][4]; + const struct brw_wm_ref *pass0_fp_reg[BRW_FILE_PAYLOAD+1][256][4]; struct brw_wm_ref undef_ref; struct brw_wm_value undef_value; @@ -241,7 +286,7 @@ struct brw_wm_compile { struct { GLboolean inited; struct brw_reg reg; - } wm_regs[PROGRAM_PAYLOAD+1][256][4]; + } wm_regs[BRW_FILE_PAYLOAD+1][256][4]; GLboolean used_grf[BRW_WM_MAX_GRF]; GLuint first_free_grf; @@ -258,13 +303,15 @@ struct brw_wm_compile { GLint index; struct brw_reg reg; } current_const[3]; + + GLuint error; }; GLuint brw_wm_nr_args( GLuint opcode ); GLuint brw_wm_is_scalar_result( GLuint opcode ); -void brw_wm_pass_fp( struct brw_wm_compile *c ); +int brw_wm_pass_fp( struct brw_wm_compile *c ); void brw_wm_pass0( struct brw_wm_compile *c ); void brw_wm_pass1( struct brw_wm_compile *c ); void brw_wm_pass2( struct brw_wm_compile *c ); diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c index 8ba037cdae7..57933afbbee 100644 --- a/src/gallium/drivers/i965/brw_wm_fp.c +++ b/src/gallium/drivers/i965/brw_wm_fp.c @@ -31,15 +31,26 @@ #include "pipe/p_shader_tokens.h" +#include "pipe/p_error.h" + +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_info.h" +#include "tgsi/tgsi_util.h" #include "brw_wm.h" #include "brw_util.h" +#include "brw_debug.h" #define X 0 #define Y 1 #define Z 2 #define W 3 +#define GET_SWZ(swz, comp) (((swz) >> ((comp)*2)) & 0x3) static const char *wm_opcode_strings[] = { @@ -54,7 +65,294 @@ static const char *wm_opcode_strings[] = { "FRONTFACING", }; +/*********************************************************************** + * Source regs + */ + +static struct brw_fp_src src_reg(GLuint file, GLuint idx) +{ + struct brw_fp_src reg; + reg.file = file; + reg.index = idx; + reg.swizzle = BRW_SWIZZLE_XYZW; + reg.indirect = 0; + reg.negate = 0; + reg.abs = 0; + return reg; +} + +static struct brw_fp_src src_reg_from_dst(struct brw_fp_dst dst) +{ + return src_reg(dst.file, dst.index); +} + +static struct brw_fp_src src_undef( void ) +{ + return src_reg(TGSI_FILE_NULL, 0); +} + +static GLboolean src_is_undef(struct brw_fp_src src) +{ + return src.file == TGSI_FILE_NULL; +} + +static struct brw_fp_src src_swizzle( struct brw_fp_src reg, int x, int y, int z, int w ) +{ + unsigned swz = reg.swizzle; + + reg.swizzle = ( GET_SWZ(swz, x) << 0 | + GET_SWZ(swz, y) << 2 | + GET_SWZ(swz, z) << 4 | + GET_SWZ(swz, w) << 6 ); + + return reg; +} + +static struct brw_fp_src src_scalar( struct brw_fp_src reg, int x ) +{ + return src_swizzle(reg, x, x, x, x); +} + +static struct brw_fp_src src_abs( struct brw_fp_src src ) +{ + src.negate = 0; + src.abs = 1; + return src; +} + +static struct brw_fp_src src_negate( struct brw_fp_src src ) +{ + src.negate = 1; + src.abs = 0; + return src; +} + + +static int match_or_expand_immediate( const float *v, + unsigned nr, + float *v2, + unsigned *nr2, + unsigned *swizzle ) +{ + unsigned i, j; + + *swizzle = 0; + + for (i = 0; i < nr; i++) { + boolean found = FALSE; + + for (j = 0; j < *nr2 && !found; j++) { + if (v[i] == v2[j]) { + *swizzle |= j << (i * 2); + found = TRUE; + } + } + + if (!found) { + if (*nr2 >= 4) + return FALSE; + + v2[*nr2] = v[i]; + *swizzle |= *nr2 << (i * 2); + (*nr2)++; + } + } + + return TRUE; +} + + + +/* Internally generated immediates: overkill... + */ +static struct brw_fp_src src_imm( struct brw_wm_compile *c, + const GLfloat *v, + unsigned nr) +{ + unsigned i, j; + unsigned swizzle; + + /* Could do a first pass where we examine all existing immediates + * without expanding. + */ + + for (i = 0; i < c->nr_immediates; i++) { + if (match_or_expand_immediate( v, + nr, + c->immediate[i].v, + &c->immediate[i].nr, + &swizzle )) + goto out; + } + + if (c->nr_immediates < Elements(c->immediate)) { + i = c->nr_immediates++; + if (match_or_expand_immediate( v, + nr, + c->immediate[i].v, + &c->immediate[i].nr, + &swizzle )) + goto out; + } + + c->error = 1; + return src_undef(); + +out: + /* Make sure that all referenced elements are from this immediate. + * Has the effect of making size-one immediates into scalars. + */ + for (j = nr; j < 4; j++) + swizzle |= (swizzle & 0x3) << (j * 2); + + return src_swizzle( src_reg( TGSI_FILE_IMMEDIATE, i ), + GET_SWZ(swizzle, X), + GET_SWZ(swizzle, Y), + GET_SWZ(swizzle, Z), + GET_SWZ(swizzle, W) ); +} + + + +static struct brw_fp_src src_imm1f( struct brw_wm_compile *c, + GLfloat f ) +{ + return src_imm(c, &f, 1); +} + +static struct brw_fp_src src_imm4f( struct brw_wm_compile *c, + GLfloat x, + GLfloat y, + GLfloat z, + GLfloat w) +{ + GLfloat f[4] = {x,y,z,w}; + return src_imm(c, f, 4); +} + + + +/*********************************************************************** + * Dest regs + */ + +static struct brw_fp_dst dst_reg(GLuint file, GLuint idx) +{ + struct brw_fp_dst reg; + reg.file = file; + reg.index = idx; + reg.writemask = BRW_WRITEMASK_XYZW; + reg.indirect = 0; + return reg; +} + +static struct brw_fp_dst dst_mask( struct brw_fp_dst reg, int mask ) +{ + reg.writemask &= mask; + return reg; +} + +static struct brw_fp_dst dst_undef( void ) +{ + return dst_reg(TGSI_FILE_NULL, 0); +} + +static boolean dst_is_undef( struct brw_fp_dst dst ) +{ + return dst.file == TGSI_FILE_NULL; +} + +static struct brw_fp_dst dst_saturate( struct brw_fp_dst reg, boolean flag ) +{ + reg.saturate = flag; + return reg; +} + +static struct brw_fp_dst get_temp( struct brw_wm_compile *c ) +{ + int bit = ffs( ~c->fp_temp ); + + if (!bit) { + debug_printf("%s: out of temporaries\n", __FILE__); + } + + c->fp_temp |= 1<<(bit-1); + return dst_reg(TGSI_FILE_TEMPORARY, c->fp_first_internal_temp+(bit-1)); +} + + +static void release_temp( struct brw_wm_compile *c, struct brw_fp_dst temp ) +{ + c->fp_temp &= ~(1 << (temp.index - c->fp_first_internal_temp)); +} + + +/*********************************************************************** + * Instructions + */ + +static struct brw_fp_instruction *get_fp_inst(struct brw_wm_compile *c) +{ + return &c->fp_instructions[c->nr_fp_insns++]; +} + +static struct brw_fp_instruction * emit_tex_op(struct brw_wm_compile *c, + GLuint op, + struct brw_fp_dst dest, + GLuint tex_src_unit, + GLuint tex_src_target, + struct brw_fp_src src0, + struct brw_fp_src src1, + struct brw_fp_src src2 ) +{ + struct brw_fp_instruction *inst = get_fp_inst(c); + + inst->opcode = op; + inst->dst = dest; + inst->tex_unit = tex_src_unit; + inst->tex_target = tex_src_target; + inst->src[0] = src0; + inst->src[1] = src1; + inst->src[2] = src2; + return inst; +} + + +static INLINE void emit_op3(struct brw_wm_compile *c, + GLuint op, + struct brw_fp_dst dest, + struct brw_fp_src src0, + struct brw_fp_src src1, + struct brw_fp_src src2 ) +{ + emit_tex_op(c, op, dest, 0, 0, src0, src1, src2); +} + + +static INLINE void emit_op2(struct brw_wm_compile *c, + GLuint op, + struct brw_fp_dst dest, + struct brw_fp_src src0, + struct brw_fp_src src1) +{ + emit_tex_op(c, op, dest, 0, 0, src0, src1, src_undef()); +} + +static INLINE void emit_op1(struct brw_wm_compile *c, + GLuint op, + struct brw_fp_dst dest, + struct brw_fp_src src0) +{ + emit_tex_op(c, op, dest, 0, 0, src0, src_undef(), src_undef()); +} + +static INLINE void emit_op0(struct brw_wm_compile *c, + GLuint op, + struct brw_fp_dst dest) +{ + emit_tex_op(c, op, dest, 0, 0, src_undef(), src_undef(), src_undef()); +} @@ -66,10 +364,10 @@ static const char *wm_opcode_strings[] = { */ static void emit_scalar_insn(struct brw_wm_compile *c, unsigned opcode, - struct brw_dst dst, - struct brw_src src0, - struct brw_src src1, - struct brw_src src2 ) + struct brw_fp_dst dst, + struct brw_fp_src src0, + struct brw_fp_src src1, + struct brw_fp_src src2 ) { unsigned first_chan = ffs(dst.writemask) - 1; unsigned first_mask = 1 << first_chan; @@ -77,14 +375,14 @@ static void emit_scalar_insn(struct brw_wm_compile *c, if (dst.writemask == 0) return; - emit_op( c, opcode, - brw_writemask(dst, first_mask), - src0, src1, src2 ); + emit_op3( c, opcode, + dst_mask(dst, first_mask), + src0, src1, src2 ); if (dst.writemask != first_mask) { emit_op1(c, TGSI_OPCODE_MOV, - brw_writemask(dst, ~first_mask), - src_swizzle1(brw_src(dst), first_chan)); + dst_mask(dst, ~first_mask), + src_scalar(src_reg_from_dst(dst), first_chan)); } } @@ -93,11 +391,11 @@ static void emit_scalar_insn(struct brw_wm_compile *c, * Special instructions for interpolation and other tasks */ -static struct ureg_src get_pixel_xy( struct brw_wm_compile *c ) +static struct brw_fp_src get_pixel_xy( struct brw_wm_compile *c ) { - if (src_is_undef(c->pixel_xy)) { - struct ureg_dst pixel_xy = get_temp(c); - struct ureg_src payload_r0_depth = src_reg(TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH); + if (src_is_undef(c->fp_pixel_xy)) { + struct brw_fp_dst pixel_xy = get_temp(c); + struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH); /* Emit the out calculations, and hold onto the results. Use @@ -105,79 +403,85 @@ static struct ureg_src get_pixel_xy( struct brw_wm_compile *c ) */ /* pixel_xy.xy = PIXELXY payload[0]; */ - emit_op(c, - WM_PIXELXY, - dst_mask(pixel_xy, BRW_WRITEMASK_XY), - payload_r0_depth, - src_undef(), - src_undef()); + emit_op1(c, + WM_PIXELXY, + dst_mask(pixel_xy, BRW_WRITEMASK_XY), + payload_r0_depth); - c->pixel_xy = src_reg_from_dst(pixel_xy); + c->fp_pixel_xy = src_reg_from_dst(pixel_xy); } - return c->pixel_xy; + return c->fp_pixel_xy; } -static struct ureg_src get_delta_xy( struct brw_wm_compile *c ) +static struct brw_fp_src get_delta_xy( struct brw_wm_compile *c ) { - if (src_is_undef(c->delta_xy)) { - struct ureg_dst delta_xy = get_temp(c); - struct ureg_src pixel_xy = get_pixel_xy(c); - struct ureg_src payload_r0_depth = src_reg(TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH); + if (src_is_undef(c->fp_delta_xy)) { + struct brw_fp_dst delta_xy = get_temp(c); + struct brw_fp_src pixel_xy = get_pixel_xy(c); + struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH); /* deltas.xy = DELTAXY pixel_xy, payload[0] */ - emit_op(c, + emit_op3(c, WM_DELTAXY, dst_mask(delta_xy, BRW_WRITEMASK_XY), pixel_xy, payload_r0_depth, src_undef()); - c->delta_xy = src_reg_from_dst(delta_xy); + c->fp_delta_xy = src_reg_from_dst(delta_xy); } - return c->delta_xy; + return c->fp_delta_xy; } -static struct ureg_src get_pixel_w( struct brw_wm_compile *c ) +static struct brw_fp_src get_pixel_w( struct brw_wm_compile *c ) { - if (src_is_undef(c->pixel_w)) { - struct ureg_dst pixel_w = get_temp(c); - struct ureg_src deltas = get_delta_xy(c); - struct ureg_src interp_wpos = src_reg(TGSI_FILE_PAYLOAD, FRAG_ATTRIB_WPOS); + if (src_is_undef(c->fp_pixel_w)) { + struct brw_fp_dst pixel_w = get_temp(c); + struct brw_fp_src deltas = get_delta_xy(c); + + /* XXX: assuming position is always first -- valid? + */ + struct brw_fp_src interp_wpos = src_reg(BRW_FILE_PAYLOAD, 0); /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x */ - emit_op(c, - WM_PIXELW, - dst_mask(pixel_w, BRW_WRITEMASK_W), - interp_wpos, - deltas, - src_undef()); + emit_op3(c, + WM_PIXELW, + dst_mask(pixel_w, BRW_WRITEMASK_W), + interp_wpos, + deltas, + src_undef()); - c->pixel_w = src_reg_from_dst(pixel_w); + c->fp_pixel_w = src_reg_from_dst(pixel_w); } - return c->pixel_w; + return c->fp_pixel_w; } + +/*********************************************************************** + * Emit INTERP instructions ahead of first use of each attrib. + */ + static void emit_interp( struct brw_wm_compile *c, + GLuint idx, GLuint semantic, - GLuint semantic_index, GLuint interp_mode ) { - struct ureg_dst dst = dst_reg(TGSI_FILE_INPUT, idx); - struct ureg_src interp = src_reg(TGSI_FILE_PAYLOAD, idx); - struct ureg_src deltas = get_delta_xy(c); + struct brw_fp_dst dst = dst_reg(TGSI_FILE_INPUT, idx); + struct brw_fp_src interp = src_reg(BRW_FILE_PAYLOAD, idx); + struct brw_fp_src deltas = get_delta_xy(c); /* Need to use PINTERP on attributes which have been * multiplied by 1/W in the SF program, and LINTERP on those * which have not: */ switch (semantic) { - case FRAG_ATTRIB_WPOS: + case TGSI_SEMANTIC_POSITION: /* Have to treat wpos.xy specially: */ emit_op1(c, @@ -218,7 +522,8 @@ static void emit_interp( struct brw_wm_compile *c, } break; - case FRAG_ATTRIB_FOGC: + + case TGSI_SEMANTIC_FOG: /* Interpolate the fog coordinate */ emit_op3(c, WM_PINTERP, @@ -228,17 +533,17 @@ static void emit_interp( struct brw_wm_compile *c, get_pixel_w(c)); emit_op1(c, - TGSI_OPCODE_MOV, - dst_mask(dst, BRW_WRITEMASK_YZ), - brw_imm1f(0.0)); + TGSI_OPCODE_MOV, + dst_mask(dst, BRW_WRITEMASK_YZ), + src_imm1f(c, 0.0)); emit_op1(c, - TGSI_OPCODE_MOV, - dst_mask(dst, BRW_WRITEMASK_W), - brw_imm1f(1.0)); + TGSI_OPCODE_MOV, + dst_mask(dst, BRW_WRITEMASK_W), + src_imm1f(c, 1.0)); break; - case FRAG_ATTRIB_FACE: + case TGSI_SEMANTIC_FACE: /* XXX review/test this case */ emit_op0(c, WM_FRONTFACING, @@ -247,15 +552,15 @@ static void emit_interp( struct brw_wm_compile *c, emit_op1(c, TGSI_OPCODE_MOV, dst_mask(dst, BRW_WRITEMASK_YZ), - brw_imm1f(0.0)); + src_imm1f(c, 0.0)); emit_op1(c, TGSI_OPCODE_MOV, dst_mask(dst, BRW_WRITEMASK_W), - brw_imm1f(1.0)); + src_imm1f(c, 1.0)); break; - case FRAG_ATTRIB_PNTC: + case TGSI_SEMANTIC_PSIZE: /* XXX review/test this case */ emit_op3(c, WM_PINTERP, @@ -267,12 +572,12 @@ static void emit_interp( struct brw_wm_compile *c, emit_op1(c, TGSI_OPCODE_MOV, dst_mask(dst, BRW_WRITEMASK_Z), - brw_imm1f(c->pass_fp, 0.0f)); + src_imm1f(c, 0.0f)); emit_op1(c, TGSI_OPCODE_MOV, dst_mask(dst, BRW_WRITEMASK_W), - brw_imm1f(c->pass_fp, 1.0f)); + src_imm1f(c, 1.0f)); break; default: @@ -310,11 +615,11 @@ static void emit_interp( struct brw_wm_compile *c, * Expand various instructions here to simpler forms. */ static void precalc_dst( struct brw_wm_compile *c, - struct brw_dst dst, - struct brw_src src0, - struct brw_src src1 ) + struct brw_fp_dst dst, + struct brw_fp_src src0, + struct brw_fp_src src1 ) { - if (dst.WriteMask & BRW_WRITEMASK_Y) { + if (dst.writemask & BRW_WRITEMASK_Y) { /* dst.y = mul src0.y, src1.y */ emit_op2(c, @@ -324,25 +629,22 @@ static void precalc_dst( struct brw_wm_compile *c, src1); } - if (dst.WriteMask & BRW_WRITEMASK_XZ) { - struct prog_instruction *swz; - GLuint z = GET_SWZ(src0.Swizzle, Z); - + if (dst.writemask & BRW_WRITEMASK_XZ) { /* dst.z = mov src0.zzzz */ emit_op1(c, TGSI_OPCODE_MOV, dst_mask(dst, BRW_WRITEMASK_Z), - src_swizzle1(src0, Z)); + src_scalar(src0, Z)); - /* dst.x = immf(1.0) + /* dst.x = imm1f(1.0) */ emit_op1(c, TGSI_OPCODE_MOV, - brw_saturate(dst_mask(dst, BRW_WRITEMASK_X), 0), - src_immf(c, 1.0)); + dst_saturate(dst_mask(dst, BRW_WRITEMASK_X), 0), + src_imm1f(c, 1.0)); } - if (dst.WriteMask & BRW_WRITEMASK_W) { + if (dst.writemask & BRW_WRITEMASK_W) { /* dst.w = mov src1.w */ emit_op1(c, @@ -354,22 +656,22 @@ static void precalc_dst( struct brw_wm_compile *c, static void precalc_lit( struct brw_wm_compile *c, - struct ureg_dst dst, - struct ureg_src src0 ) + struct brw_fp_dst dst, + struct brw_fp_src src0 ) { - if (dst.WriteMask & BRW_WRITEMASK_XW) { + if (dst.writemask & BRW_WRITEMASK_XW) { /* dst.xw = imm(1.0f) */ emit_op1(c, TGSI_OPCODE_MOV, - brw_saturate(brw_writemask(dst, BRW_WRITEMASK_XW), 0), - brw_imm1f(1.0f)); + dst_saturate(dst_mask(dst, BRW_WRITEMASK_XW), 0), + src_imm1f(c, 1.0f)); } - if (dst.WriteMask & BRW_WRITEMASK_YZ) { + if (dst.writemask & BRW_WRITEMASK_YZ) { emit_op1(c, TGSI_OPCODE_LIT, - brw_writemask(dst, BRW_WRITEMASK_YZ), + dst_mask(dst, BRW_WRITEMASK_YZ), src0); } } @@ -382,41 +684,42 @@ static void precalc_lit( struct brw_wm_compile *c, * instruction itself. */ static void precalc_tex( struct brw_wm_compile *c, - struct brw_dst dst, + struct brw_fp_dst dst, + unsigned target, unsigned unit, - struct brw_src src0 ) + struct brw_fp_src src0 ) { - struct ureg_src coord = src_undef(); - struct ureg_dst tmp = dst_undef(); + struct brw_fp_src coord = src_undef(); + struct brw_fp_dst tmp = dst_undef(); assert(unit < BRW_MAX_TEX_UNIT); /* Cubemap: find longest component of coord vector and normalize * it. */ - if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) { - struct ureg_src tmpsrc; + if (target == TGSI_TEXTURE_CUBE) { + struct brw_fp_src tmpsrc; tmp = get_temp(c); - tmpsrc = brw_src(tmpcoord) + tmpsrc = src_reg_from_dst(tmp); /* tmp = abs(src0) */ emit_op1(c, TGSI_OPCODE_MOV, tmp, - brw_abs(src0)); + src_abs(src0)); /* tmp.X = MAX(tmp.X, tmp.Y) */ emit_op2(c, TGSI_OPCODE_MAX, - brw_writemask(tmp, BRW_WRITEMASK_X), - src_swizzle1(tmpsrc, X), - src_swizzle1(tmpsrc, Y)); + dst_mask(tmp, BRW_WRITEMASK_X), + src_scalar(tmpsrc, X), + src_scalar(tmpsrc, Y)); /* tmp.X = MAX(tmp.X, tmp.Z) */ emit_op2(c, TGSI_OPCODE_MAX, - brw_writemask(tmp, BRW_WRITEMASK_X), + dst_mask(tmp, BRW_WRITEMASK_X), tmpsrc, - src_swizzle1(tmpsrc, Z)); + src_scalar(tmpsrc, Z)); /* tmp.X = 1 / tmp.X */ emit_op1(c, TGSI_OPCODE_RCP, @@ -427,11 +730,12 @@ static void precalc_tex( struct brw_wm_compile *c, emit_op2(c, TGSI_OPCODE_MUL, tmp, src0, - src_swizzle1(tmpsrc, SWIZZLE_X)); + src_scalar(tmpsrc, X)); coord = tmpsrc; } - else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) { + else if (target == TGSI_TEXTURE_RECT || + target == TGSI_TEXTURE_SHADOWRECT) { /* XXX: need a mechanism for internally generated constants. */ coord = src0; @@ -448,19 +752,18 @@ static void precalc_tex( struct brw_wm_compile *c, if (c->key.yuvtex_mask & (1 << unit)) { /* convert ycbcr to RGBA */ GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<DstReg; - struct ureg_dst tmp = get_temp(c); - struct ureg_src tmpsrc = src_reg_from_dst(tmp); - struct ureg_src C0 = ureg_imm4f( c->ureg, -.5, -.0625, -.5, 1.164 ); - struct ureg_src C1 = ureg_imm4f( c->ureg, 1.596, -0.813, 2.018, -.391 ); + struct brw_fp_dst tmp = get_temp(c); + struct brw_fp_src tmpsrc = src_reg_from_dst(tmp); + struct brw_fp_src C0 = src_imm4f( c, -.5, -.0625, -.5, 1.164 ); + struct brw_fp_src C1 = src_imm4f( c, 1.596, -0.813, 2.018, -.391 ); /* tmp = TEX ... */ emit_tex_op(c, TGSI_OPCODE_TEX, - brw_saturate(tmp, dst.Saturate), + dst_saturate(tmp, dst.saturate), unit, - inst->TexSrcTarget, + target, coord, src_undef(), src_undef()); @@ -477,7 +780,7 @@ static void precalc_tex( struct brw_wm_compile *c, emit_op2(c, TGSI_OPCODE_MUL, dst_mask(tmp, BRW_WRITEMASK_Y), tmpsrc, - src_swizzle1(C0, W)); + src_scalar(C0, W)); /* * if (UV swaped) @@ -492,16 +795,16 @@ static void precalc_tex( struct brw_wm_compile *c, src_swizzle(tmpsrc, Z,Z,X,X) : src_swizzle(tmpsrc, X,X,Z,Z)), C1, - src_swizzle1(tmpsrc, Y)); + src_scalar(tmpsrc, Y)); /* RGB.y = MAD YUV.z, C1.w, RGB.y */ emit_op3(c, TGSI_OPCODE_MAD, dst_mask(dst, BRW_WRITEMASK_Y), - src_swizzle1(tmpsrc, Z), - src_swizzle1(C1, W), - src_swizzle1(src_reg_from_dst(dst), Y)); + src_scalar(tmpsrc, Z), + src_scalar(C1, W), + src_scalar(src_reg_from_dst(dst), Y)); release_temp(c, tmp); } @@ -509,9 +812,9 @@ static void precalc_tex( struct brw_wm_compile *c, /* ordinary RGBA tex instruction */ emit_tex_op(c, TGSI_OPCODE_TEX, - inst->DstReg, + dst, unit, - inst->TexSrcTarget, + target, coord, src_undef(), src_undef()); @@ -523,8 +826,8 @@ static void precalc_tex( struct brw_wm_compile *c, /* Release this temp if we ended up allocating it: */ - if (!brw_dst_is_undef(tmpcoord)) - release_temp(c, tmpcoord); + if (!dst_is_undef(tmp)) + release_temp(c, tmp); } @@ -532,13 +835,9 @@ static void precalc_tex( struct brw_wm_compile *c, * Check if the given TXP instruction really needs the divide-by-W step. */ static GLboolean projtex( struct brw_wm_compile *c, - const struct prog_instruction *inst ) + unsigned target, + struct brw_fp_src src ) { - const struct ureg_src src = inst->SrcReg[0]; - GLboolean retVal; - - assert(inst->Opcode == TGSI_OPCODE_TXP); - /* Only try to detect the simplest cases. Could detect (later) * cases where we are trying to emit code like RCP {1.0}, MUL x, * {1.0}, and so on. @@ -546,16 +845,15 @@ static GLboolean projtex( struct brw_wm_compile *c, * More complex cases than this typically only arise from * user-provided fragment programs anyway: */ - if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) - retVal = GL_FALSE; /* ut2004 gun rendering !?! */ - else if (src.File == TGSI_FILE_INPUT && - GET_SWZ(src.Swizzle, W) == W && - (c->key.proj_attrib_mask & (1 << src.Index)) == 0) - retVal = GL_FALSE; - else - retVal = GL_TRUE; - - return retVal; + if (target == TGSI_TEXTURE_CUBE) + return GL_FALSE; /* ut2004 gun rendering !?! */ + + if (src.file == TGSI_FILE_INPUT && + GET_SWZ(src.swizzle, W) == W && + (c->key.proj_attrib_mask & (1 << src.index)) == 0) + return GL_FALSE; + + return GL_TRUE; } @@ -563,110 +861,168 @@ static GLboolean projtex( struct brw_wm_compile *c, * Emit code for TXP. */ static void precalc_txp( struct brw_wm_compile *c, - const struct prog_instruction *inst ) + struct brw_fp_dst dst, + unsigned target, + unsigned unit, + struct brw_fp_src src0 ) { - struct ureg_src src0 = inst->SrcReg[0]; - - if (projtex(c, inst)) { - struct ureg_dst tmp = get_temp(c); - struct prog_instruction tmp_inst; + if (projtex(c, target, src0)) { + struct brw_fp_dst tmp = get_temp(c); /* tmp0.w = RCP inst.arg[0][3] */ - emit_op(c, + emit_op1(c, TGSI_OPCODE_RCP, dst_mask(tmp, BRW_WRITEMASK_W), - src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)), - src_undef(), - src_undef()); + src_scalar(src0, W)); /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww */ - emit_op(c, - TGSI_OPCODE_MUL, - dst_mask(tmp, BRW_WRITEMASK_XYZ), - src0, - src_swizzle1(src_reg_from_dst(tmp), W), - src_undef()); + emit_op2(c, + TGSI_OPCODE_MUL, + dst_mask(tmp, BRW_WRITEMASK_XYZ), + src0, + src_scalar(src_reg_from_dst(tmp), W)); - /* dst = precalc(TEX tmp0) + /* dst = TEX tmp0 */ - tmp_inst = *inst; - tmp_inst.SrcReg[0] = src_reg_from_dst(tmp); - precalc_tex(c, &tmp_inst); + precalc_tex(c, + dst, + target, + unit, + src_reg_from_dst(tmp)); release_temp(c, tmp); } else { - /* dst = precalc(TEX src0) + /* dst = TEX src0 */ - precalc_tex(c, inst); + precalc_tex(c, dst, target, unit, src0); } } +/* XXX: note this returns a src_reg. + */ +static struct brw_fp_src +find_output_by_semantic( struct brw_wm_compile *c, + unsigned semantic, + unsigned index ) +{ + const struct tgsi_shader_info *info = &c->fp->info; + unsigned i; + + for (i = 0; i < info->num_outputs; i++) + if (info->output_semantic_name[i] == semantic && + info->output_semantic_index[i] == index) + return src_reg( TGSI_FILE_OUTPUT, i ); + + /* If not found, return some arbitrary immediate value: + */ + return src_imm1f(c, 1.0); +} + static void emit_fb_write( struct brw_wm_compile *c ) { - struct ureg_src payload_r0_depth = src_reg(TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH); - struct ureg_src outdepth = src_reg(TGSI_FILE_OUTPUT, FRAG_RESULT_DEPTH); - struct ureg_src outcolor; - struct prog_instruction *inst; + struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH); + struct brw_fp_src outdepth = find_output_by_semantic(c, TGSI_SEMANTIC_POSITION, 0); GLuint i; - /* The inst->Aux field is used for FB write target and the EOT marker */ + outdepth = src_scalar(outdepth, Z); for (i = 0 ; i < c->key.nr_cbufs; i++) { - outcolor = find_output_by_semantic(c, TGSI_SEMANTIC_COLOR, i); + struct brw_fp_src outcolor; + unsigned target = 1<key.nr_cbufs - 1) + target |= 1; + + outcolor = find_output_by_semantic(c, TGSI_SEMANTIC_COLOR, i); - inst->Aux = (i<<1); + /* Use emit_tex_op so that we can specify the inst->tex_target + * field, which is abused to contain the FB write target and the + * EOT marker + */ + emit_tex_op(c, WM_FB_WRITE, + dst_undef(), + target, + 0, + outcolor, + payload_r0_depth, + outdepth); } - - /* Set EOT flag on last inst: - */ - inst->Aux |= 1; //eot } +static struct brw_fp_dst translate_dst( struct brw_wm_compile *c, + const struct tgsi_full_dst_register *dst, + unsigned saturate ) +{ + struct brw_fp_dst out; + + out.file = dst->DstRegister.File; + out.index = dst->DstRegister.Index; + out.writemask = dst->DstRegister.WriteMask; + out.indirect = dst->DstRegister.Indirect; + out.saturate = (saturate == TGSI_SAT_ZERO_ONE); + + if (out.indirect) { + assert(dst->DstRegisterInd.File == TGSI_FILE_ADDRESS); + assert(dst->DstRegisterInd.Index == 0); + } + + return out; +} -/*********************************************************************** - * Emit INTERP instructions ahead of first use of each attrib. - */ - -static void validate_src_regs( struct brw_wm_compile *c, - const struct prog_instruction *inst ) +static struct brw_fp_src translate_src( struct brw_wm_compile *c, + const struct tgsi_full_src_register *src ) { - GLuint nr_args = brw_wm_nr_args( inst->Opcode ); - GLuint i; + struct brw_fp_src out; + + out.file = src->SrcRegister.File; + out.index = src->SrcRegister.Index; + out.indirect = src->SrcRegister.Indirect; + + out.swizzle = ((src->SrcRegister.SwizzleX << 0) | + (src->SrcRegister.SwizzleY << 2) | + (src->SrcRegister.SwizzleZ << 4) | + (src->SrcRegister.SwizzleW << 6)); + + switch (tgsi_util_get_full_src_register_sign_mode( src, 0 )) { + case TGSI_UTIL_SIGN_CLEAR: + out.abs = 1; + out.negate = 0; + break; - for (i = 0; i < nr_args; i++) { - if (inst->SrcReg[i].File == TGSI_FILE_INPUT) { - GLuint idx = inst->SrcReg[i].Index; - if (!(c->fp_interp_emitted & (1<fp_interp_emitted |= 1<DstReg.File == TGSI_FILE_OUTPUT) { - GLuint idx = inst->DstReg.Index; - if (idx == FRAG_RESULT_COLOR) - c->fp_fragcolor_emitted |= inst->DstReg.WriteMask; + + if (out.indirect) { + assert(src->SrcRegisterInd.File == TGSI_FILE_ADDRESS); + assert(src->SrcRegisterInd.Index == 0); } + + return out; } @@ -674,59 +1030,78 @@ static void validate_dst_regs( struct brw_wm_compile *c, static void emit_insn( struct brw_wm_compile *c, const struct tgsi_full_instruction *inst ) { - - switch (inst->Opcode) { + unsigned opcode = inst->Instruction.Opcode; + struct brw_fp_dst dst; + struct brw_fp_src src[3]; + int i; + + dst = translate_dst( c, &inst->FullDstRegisters[0], + inst->Instruction.Saturate ); + + for (i = 0; i < inst->Instruction.NumSrcRegs; i++) + src[i] = translate_src( c, &inst->FullSrcRegisters[0] ); + + switch (opcode) { case TGSI_OPCODE_ABS: emit_op1(c, TGSI_OPCODE_MOV, dst, - brw_abs(src[0])); + src_abs(src[0])); break; case TGSI_OPCODE_SUB: emit_op2(c, TGSI_OPCODE_ADD, dst, src[0], - brw_negate(src[1])); + src_negate(src[1])); break; case TGSI_OPCODE_SCS: emit_op1(c, TGSI_OPCODE_SCS, - brw_writemask(dst, BRW_WRITEMASK_XY), + dst_mask(dst, BRW_WRITEMASK_XY), src[0]); break; case TGSI_OPCODE_DST: - precalc_dst(c, inst); + precalc_dst(c, dst, src[0], src[1]); break; case TGSI_OPCODE_LIT: - precalc_lit(c, inst); + precalc_lit(c, dst, src[0]); break; case TGSI_OPCODE_TEX: - precalc_tex(c, inst); + precalc_tex(c, dst, + inst->InstructionExtTexture.Texture, + src[0].file, /* sampler unit */ + src[1] ); break; case TGSI_OPCODE_TXP: - precalc_txp(c, inst); + precalc_txp(c, dst, + inst->InstructionExtTexture.Texture, + src[0].file, /* sampler unit */ + src[1] ); break; case TGSI_OPCODE_TXB: - out = emit_insn(c, inst); - out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit]; - assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT); + /* XXX: TXB not done + */ + precalc_tex(c, dst, + inst->InstructionExtTexture.Texture, + src[0].file, /* sampler unit */ + src[1] ); break; case TGSI_OPCODE_XPD: emit_op2(c, TGSI_OPCODE_XPD, - brw_writemask(dst, BRW_WRITEMASK_XYZ), + dst_mask(dst, BRW_WRITEMASK_XYZ), src[0], src[1]); break; case TGSI_OPCODE_KIL: emit_op1(c, TGSI_OPCODE_KIL, - brw_writemask(dst_undef(), 0), + dst_mask(dst_undef(), 0), src[0]); break; @@ -734,10 +1109,11 @@ static void emit_insn( struct brw_wm_compile *c, emit_fb_write(c); break; default: - if (brw_wm_is_scalar_result(inst->Opcode)) + if (!c->key.has_flow_control && + brw_wm_is_scalar_result(opcode)) emit_scalar_insn(c, opcode, dst, src[0], src[1], src[2]); else - emit_op(c, opcode, dst, src[0], src[1], src[2]); + emit_op3(c, opcode, dst, src[0], src[1], src[2]); break; } } @@ -746,46 +1122,70 @@ static void emit_insn( struct brw_wm_compile *c, * Initial pass for fragment program code generation. * This function is used by both the GLSL and non-GLSL paths. */ -void brw_wm_pass_fp( struct brw_wm_compile *c ) +int brw_wm_pass_fp( struct brw_wm_compile *c ) { - struct brw_fragment_program *fp = c->fp; - GLuint insn; + struct brw_fragment_shader *fs = c->fp; + struct tgsi_parse_context parse; + struct tgsi_full_instruction *inst; + struct tgsi_full_declaration *decl; + const float *imm; + GLuint size; + GLuint i; if (BRW_DEBUG & DEBUG_WM) { debug_printf("pre-fp:\n"); - tgsi_dump(fp->tokens, 0); + tgsi_dump(fs->tokens, 0); } - c->pixel_xy = brw_src_undef(); - c->delta_xy = brw_src_undef(); - c->pixel_w = brw_src_undef(); + c->fp_pixel_xy = src_undef(); + c->fp_delta_xy = src_undef(); + c->fp_pixel_w = src_undef(); c->nr_fp_insns = 0; - c->fp->tex_units_used = 0x0; + c->nr_immediates = 0; /* Loop over all instructions doing assorted simplifications and * transformations. */ - tgsi_parse_init( &parse, tokens ); + tgsi_parse_init( &parse, fs->tokens ); while( !tgsi_parse_end_of_tokens( &parse ) ) { tgsi_parse_token( &parse ); switch( parse.FullToken.Token.Type ) { case TGSI_TOKEN_TYPE_DECLARATION: - /* If branching shader, emit preamble instructions at decl time, as - * instruction order in the shader does not correspond to the order - * instructions are executed in the wild. - * - * This is where special instructions such as WM_CINTERP, - * WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to compute - * shader inputs from varying vars. + /* Turn intput declarations into special WM_* instructions. * * XXX: For non-branching shaders, consider deferring variable * initialization as late as possible to minimize register * usage. This is how the original BRW driver worked. + * + * In a branching shader, must preamble instructions at decl + * time, as instruction order in the shader does not + * correspond to the order instructions are executed in the + * wild. + * + * This is where special instructions such as WM_CINTERP, + * WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to + * compute shader inputs from the payload registers and pixel + * position. */ - validate_src_regs(c, inst); - validate_dst_regs(c, inst); + decl = &parse.FullToken.FullDeclaration; + if( decl->Declaration.File == TGSI_FILE_INPUT ) { + unsigned first, last, mask; + unsigned attrib; + + first = decl->DeclarationRange.First; + last = decl->DeclarationRange.Last; + mask = decl->Declaration.UsageMask; + + for (attrib = first; attrib <= last; attrib++) { + emit_interp(c, + attrib, + decl->Semantic.SemanticName, + decl->Declaration.Interpolate ); + } + } + break; case TGSI_TOKEN_TYPE_IMMEDIATE: @@ -795,21 +1195,36 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) * float value per instruction. Just save the data for now * and use directly later. */ + i = c->nr_immediates++; + imm = &parse.FullToken.FullImmediate.u[i].Float; + size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; + + if (c->nr_immediates >= BRW_WM_MAX_CONST) + return PIPE_ERROR_OUT_OF_MEMORY; + + for (i = 0; i < size; i++) + c->immediate[c->nr_immediates].v[i] = imm[i]; + + for (; i < 4; i++) + c->immediate[c->nr_immediates].v[i] = 0.0; + + c->immediate[c->nr_immediates].nr = size; + c->nr_immediates++; break; case TGSI_TOKEN_TYPE_INSTRUCTION: inst = &parse.FullToken.FullInstruction; - emit_insn( c, inst ); + emit_insn(c, inst); break; } } - c->brw_program = brw_finalize( c->builder ); - if (BRW_DEBUG & DEBUG_WM) { debug_printf("pass_fp:\n"); - brw_print_program( c->brw_program ); + //brw_print_program( c->fp_brw_program ); debug_printf("\n"); } + + return c->error; } -- 2.30.2