X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_wm_glsl.c;h=41db3a805b77c93404c4c5a5530490a5c8c18f90;hb=34239862430aa38b0b1b6740c47d66f7260ae3e4;hp=cb728190f5c5ef2fc3717a4429a54f93b3bc943f;hpb=035c0cf71a5fe3beee55654e1f7148adfe626cc0;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index cb728190f5c..41db3a805b7 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -8,12 +8,17 @@ enum _subroutine { SUB_NOISE1, SUB_NOISE2, SUB_NOISE3, SUB_NOISE4 }; -/* Only guess, need a flag in gl_fragment_program later */ + +/** + * Determine if the given fragment program uses GLSL features such + * as flow conditionals, loops, subroutines. + * Some GLSL shaders may use these features, others might not. + */ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) { int i; for (i = 0; i < fp->Base.NumInstructions; i++) { - struct prog_instruction *inst = &fp->Base.Instructions[i]; + const struct prog_instruction *inst = &fp->Base.Instructions[i]; switch (inst->Opcode) { case OPCODE_IF: case OPCODE_TRUNC: @@ -36,6 +41,10 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) return GL_FALSE; } + +/** + * Record the mapping of a Mesa register to a hardware register. + */ static void set_reg(struct brw_wm_compile *c, int file, int index, int component, struct brw_reg reg) { @@ -43,6 +52,10 @@ static void set_reg(struct brw_wm_compile *c, int file, int index, c->wm_regs[file][index][component].inited = GL_TRUE; } +/** + * Examine instruction's write mask to find index of first component + * enabled for writing. + */ static int get_scalar_dst_index(struct prog_instruction *inst) { int i; @@ -62,6 +75,10 @@ static struct brw_reg alloc_tmp(struct brw_wm_compile *c) return reg; } +/** + * Save current temp register info. + * There must be a matching call to release_tmps(). + */ static int mark_tmps(struct brw_wm_compile *c) { return c->tmp_index; @@ -77,8 +94,22 @@ static void release_tmps(struct brw_wm_compile *c, int mark) c->tmp_index = mark; } +/** + * Convert Mesa src register to brw register. + * + * Since we're running in SOA mode each Mesa register corresponds to four + * hardware registers. We allocate the hardware registers as needed here. + * + * \param file register file, one of PROGRAM_x + * \param index register number + * \param component src component (X=0, Y=1, Z=2, W=3) + * \param nr not used?!? + * \param neg negate value? + * \param abs take absolute value? + */ static struct brw_reg -get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, GLuint neg, GLuint abs) +get_reg(struct brw_wm_compile *c, int file, int index, int component, + int nr, GLuint neg, GLuint abs) { struct brw_reg reg; switch (file) { @@ -89,21 +120,46 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, GL break; case PROGRAM_UNDEFINED: return brw_null_reg(); - default: + case PROGRAM_TEMPORARY: + case PROGRAM_INPUT: + case PROGRAM_OUTPUT: + case PROGRAM_PAYLOAD: break; + default: + _mesa_problem(NULL, "Unexpected file in get_reg()"); + return brw_null_reg(); } - if(c->wm_regs[file][index][component].inited) + /* see if we've already allocated a HW register for this Mesa register */ + if (c->wm_regs[file][index][component].inited) { + /* yes, re-use */ reg = c->wm_regs[file][index][component].reg; - else + } + else { + /* no, allocate new register */ reg = brw_vec8_grf(c->reg_index, 0); + } - if(!c->wm_regs[file][index][component].inited) { + /* if this is a new register allocation, record it in the table */ + if (!c->wm_regs[file][index][component].inited) { set_reg(c, file, index, component, reg); c->reg_index++; } - if (neg & (1<< component)) { + if (c->reg_index >= BRW_WM_MAX_GRF - 12) { + /* ran out of temporary registers! */ +#if 1 + /* This is a big hack for now. + * Return bad register index, just don't hang the GPU. + */ + _mesa_fprintf(stderr, "out of regs %d\n", c->reg_index); + c->reg_index = BRW_WM_MAX_GRF - 13; +#else + return brw_null_reg(); +#endif + } + + if (neg & (1 << component)) { reg = negate(reg); } if (abs) @@ -111,6 +167,12 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, GL return reg; } + +/** + * Preallocate registers. This sets up the Mesa to hardware register + * mapping for certain registers, such as constants (uniforms/state vars) + * and shader inputs. + */ static void prealloc_reg(struct brw_wm_compile *c) { int i, j; @@ -119,29 +181,55 @@ static void prealloc_reg(struct brw_wm_compile *c) GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted; for (i = 0; i < 4; i++) { - reg = (i < c->key.nr_depth_regs) - ? brw_vec8_grf(i*2, 0) : brw_vec8_grf(0, 0); + if (i < c->key.nr_depth_regs) + reg = brw_vec8_grf(i * 2, 0); + else + reg = brw_vec8_grf(0, 0); set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg); } - c->reg_index += 2*c->key.nr_depth_regs; + c->reg_index += 2 * c->key.nr_depth_regs; + + /* constants */ { - int nr_params = c->fp->program.Base.Parameters->NumParameters; - struct gl_program_parameter_list *plist = - c->fp->program.Base.Parameters; - int index = 0; - c->prog_data.nr_params = 4*nr_params; - for (i = 0; i < nr_params; i++) { - for (j = 0; j < 4; j++, index++) { - reg = brw_vec1_grf(c->reg_index + index/8, - index%8); - c->prog_data.param[index] = - &plist->ParameterValues[i][j]; - set_reg(c, PROGRAM_STATE_VAR, i, j, reg); - } - } - c->nr_creg = 2*((4*nr_params+15)/16); - c->reg_index += c->nr_creg; + const int nr_params = c->fp->program.Base.Parameters->NumParameters; + + if (1 /* XXX threshold: nr_params <= 8 */) { + const struct gl_program_parameter_list *plist = + c->fp->program.Base.Parameters; + int index = 0; + + /* number of float constants in CURBE */ + c->prog_data.nr_params = 4 * nr_params; + + /* loop over program constants (float[4]) */ + for (i = 0; i < nr_params; i++) { + /* loop over XYZW channels */ + for (j = 0; j < 4; j++, index++) { + reg = brw_vec1_grf(c->reg_index + index / 8, index % 8); + /* Save pointer to parameter/constant value. + * Constants will be copied in prepare_constant_buffer() + */ + c->prog_data.param[index] = &plist->ParameterValues[i][j]; + set_reg(c, PROGRAM_STATE_VAR, i, j, reg); + } + } + /* number of constant regs used (each reg is float[8]) */ + c->nr_creg = 2 * ((4 * nr_params + 15) / 16); + c->reg_index += c->nr_creg; + } + else { + /* number of float constants in CURBE */ + c->prog_data.nr_params = 0; + + /* When there's a lot of FP constanst we'll store them in a + * texture-like buffer instead of using the CURBE buffer. + * This means we won't use GRF registers for constants and we'll + * have to fetch constants with a dataport read. + */ + } } + + /* fragment shader inputs */ for (i = 0; i < FRAG_ATTRIB_MAX; i++) { if (inputs & (1<reg_index += 2; - } } + c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; c->prog_data.urb_read_length = nr_interp_regs * 2; c->prog_data.curb_read_length = c->nr_creg; @@ -161,28 +249,74 @@ static void prealloc_reg(struct brw_wm_compile *c) c->reg_index += 2; } + +/** + * Convert Mesa dst register to brw register. + */ static struct brw_reg get_dst_reg(struct brw_wm_compile *c, - struct prog_instruction *inst, int component, int nr) + const struct prog_instruction *inst, + GLuint component) { + const int nr = 1; return get_reg(c, inst->DstReg.File, inst->DstReg.Index, component, nr, 0, 0); } + +/** + * Convert Mesa src register to brw register. + */ static struct brw_reg get_src_reg(struct brw_wm_compile *c, - struct prog_src_register *src, int index, int nr) + const struct prog_instruction *inst, + GLuint srcRegIndex, GLuint channel) { - int component = GET_SWZ(src->Swizzle, index); + const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; + const GLuint nr = 1; + const GLuint component = GET_SWZ(src->Swizzle, channel); + return get_reg(c, src->File, src->Index, component, nr, - src->NegateBase, src->Abs); + src->NegateBase, src->Abs); +} + + +/** + * Same as \sa get_src_reg() but if the register is a literal, emit + * a brw_reg encoding the literal. + * Note that a brw instruction only allows one src operand to be a literal. + * For instructions with more than one operand, only the second can be a literal. + */ +static struct brw_reg get_src_reg_imm(struct brw_wm_compile *c, + const struct prog_instruction *inst, + GLuint srcRegIndex, GLuint channel) +{ + const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; + if (src->File == PROGRAM_CONSTANT) { + /* a literal */ + const int component = GET_SWZ(src->Swizzle, channel); + const GLfloat *param = + c->fp->program.Base.Parameters->ParameterValues[src->Index]; + GLfloat value = param[component]; + if (src->NegateBase) + value = -value; + if (src->Abs) + value = FABSF(value); + return brw_imm_f(value); + } + else { + return get_src_reg(c, inst, srcRegIndex, channel); + } } -/* Subroutines are minimal support for resusable instruction sequences. - They are implemented as simply as possible to minimise overhead: there - is no explicit support for communication between the caller and callee - other than saving the return address in a temporary register, nor is - there any automatic local storage. This implies that great care is - required before attempting reentrancy or any kind of nested - subroutine invocations. */ + +/** + * Subroutines are minimal support for resusable instruction sequences. + * They are implemented as simply as possible to minimise overhead: there + * is no explicit support for communication between the caller and callee + * other than saving the return address in a temporary register, nor is + * there any automatic local storage. This implies that great care is + * required before attempting reentrancy or any kind of nested + * subroutine invocations. + */ static void invoke_subroutine( struct brw_wm_compile *c, enum _subroutine subroutine, void (*emit)( struct brw_wm_compile * ) ) @@ -247,8 +381,8 @@ static void emit_abs( struct brw_wm_compile *c, for (i = 0; i < 4; i++) { if (inst->DstReg.WriteMask & (1<SrcReg[0], i, 1); + dst = get_dst_reg(c, inst, i); + src = get_src_reg(c, inst, 0, i); brw_MOV(p, dst, brw_abs(src)); } } @@ -265,9 +399,9 @@ static void emit_trunc( struct brw_wm_compile *c, for (i = 0; i < 4; i++) { if (mask & (1<SrcReg[0], i, 1); - brw_RNDD(p, dst, src); + dst = get_dst_reg(c, inst, i); + src = get_src_reg(c, inst, 0, i); + brw_RNDZ(p, dst, src); } } brw_set_saturate(p, 0); @@ -283,8 +417,8 @@ static void emit_mov( struct brw_wm_compile *c, for (i = 0; i < 4; i++) { if (mask & (1<SrcReg[0], i, 1); + dst = get_dst_reg(c, inst, i); + src = get_src_reg_imm(c, inst, 0, i); brw_MOV(p, dst, src); } } @@ -301,8 +435,8 @@ static void emit_pixel_xy(struct brw_wm_compile *c, struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; - dst0 = get_dst_reg(c, inst, 0, 1); - dst1 = get_dst_reg(c, inst, 1, 1); + dst0 = get_dst_reg(c, inst, 0); + dst1 = get_dst_reg(c, inst, 1); /* Calculate pixel centers by adding 1 or 0 to each of the * micro-tile coordinates passed in r1. */ @@ -319,21 +453,20 @@ static void emit_pixel_xy(struct brw_wm_compile *c, stride(suboffset(r1_uw, 5), 2, 4, 0), brw_imm_v(0x11001100)); } - } static void emit_delta_xy(struct brw_wm_compile *c, - struct prog_instruction *inst) + struct prog_instruction *inst) { struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg dst0, dst1, src0, src1; struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; - dst0 = get_dst_reg(c, inst, 0, 1); - dst1 = get_dst_reg(c, inst, 1, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - src1 = get_src_reg(c, &inst->SrcReg[0], 1, 1); + dst0 = get_dst_reg(c, inst, 0); + dst1 = get_dst_reg(c, inst, 1); + src0 = get_src_reg(c, inst, 0, 0); + src1 = get_src_reg(c, inst, 0, 1); /* Calc delta X,Y by subtracting origin in r1 from the pixel * centers. */ @@ -351,10 +484,8 @@ static void emit_delta_xy(struct brw_wm_compile *c, negate(suboffset(r1,1))); } - } - static void fire_fb_write( struct brw_wm_compile *c, GLuint base_reg, GLuint nr, @@ -397,33 +528,59 @@ static void emit_fb_write(struct brw_wm_compile *c, */ if (c->key.aa_dest_stencil_reg) nr += 1; - { - brw_push_insn_state(p); - for (channel = 0; channel < 4; channel++) { - src0 = get_src_reg(c, &inst->SrcReg[0], channel, 1); - /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ - /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ - brw_MOV(p, brw_message_reg(nr + channel), src0); - } - /* skip over the regs populated above: */ - nr += 8; - brw_pop_insn_state(p); + + brw_push_insn_state(p); + for (channel = 0; channel < 4; channel++) { + src0 = get_src_reg(c, inst, 0, channel); + /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ + /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ + brw_MOV(p, brw_message_reg(nr + channel), src0); } + /* skip over the regs populated above: */ + nr += 8; + brw_pop_insn_state(p); - if (c->key.source_depth_to_render_target) - { - if (c->key.computes_depth) { - src0 = get_src_reg(c, &inst->SrcReg[2], 2, 1); - brw_MOV(p, brw_message_reg(nr), src0); - } else { - src0 = get_src_reg(c, &inst->SrcReg[1], 1, 1); - brw_MOV(p, brw_message_reg(nr), src0); - } + if (c->key.source_depth_to_render_target) { + if (c->key.computes_depth) { + src0 = get_src_reg(c, inst, 2, 2); + brw_MOV(p, brw_message_reg(nr), src0); + } + else { + src0 = get_src_reg(c, inst, 1, 1); + brw_MOV(p, brw_message_reg(nr), src0); + } + + nr += 2; + } - nr += 2; + if (c->key.dest_depth_reg) { + GLuint comp = c->key.dest_depth_reg / 2; + GLuint off = c->key.dest_depth_reg % 2; + + assert(comp == 1); + assert(off == 0); +#if 0 + /* XXX do we need this code? comp always 1, off always 0, it seems */ + if (off != 0) { + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1)); + /* 2nd half? */ + brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]); + brw_pop_insn_state(p); + } + else +#endif + { + struct brw_reg src = get_src_reg(c, inst, 1, 1); + brw_MOV(p, brw_message_reg(nr), src); + } + nr += 2; } - target = inst->Sampler >> 1; - eot = inst->Sampler & 1; + + target = inst->Aux >> 1; + eot = inst->Aux & 1; fire_fb_write(c, 0, nr, target, eot); } @@ -436,10 +593,10 @@ static void emit_pixel_w( struct brw_wm_compile *c, struct brw_reg dst, src0, delta0, delta1; struct brw_reg interp3; - dst = get_dst_reg(c, inst, 3, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); - delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); + dst = get_dst_reg(c, inst, 3); + src0 = get_src_reg(c, inst, 0, 0); + delta0 = get_src_reg(c, inst, 1, 0); + delta1 = get_src_reg(c, inst, 1, 1); interp3 = brw_vec1_grf(src0.nr+1, 4); /* Calc 1/w - just linterp wpos[3] optimized by putting the @@ -465,12 +622,12 @@ static void emit_linterp(struct brw_wm_compile *c, struct brw_reg interp[4]; struct brw_reg dst, delta0, delta1; struct brw_reg src0; + GLuint nr, i; - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); - delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); - GLuint nr = src0.nr; - int i; + src0 = get_src_reg(c, inst, 0, 0); + delta0 = get_src_reg(c, inst, 1, 0); + delta1 = get_src_reg(c, inst, 1, 1); + nr = src0.nr; interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); @@ -479,7 +636,7 @@ static void emit_linterp(struct brw_wm_compile *c, for(i = 0; i < 4; i++ ) { if (mask & (1<SrcReg[0], 0, 1); - GLuint nr = src0.nr; - int i; + src0 = get_src_reg(c, inst, 0, 0); + nr = src0.nr; interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); @@ -506,7 +663,7 @@ static void emit_cinterp(struct brw_wm_compile *c, for(i = 0; i < 4; i++ ) { if (mask & (1<SrcReg[0], 0, 1); - delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); - delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); - w = get_src_reg(c, &inst->SrcReg[2], 3, 1); - GLuint nr = src0.nr; - int i; + src0 = get_src_reg(c, inst, 0, 0); + delta0 = get_src_reg(c, inst, 1, 0); + delta1 = get_src_reg(c, inst, 1, 1); + w = get_src_reg(c, inst, 2, 3); + nr = src0.nr; interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); @@ -536,7 +693,7 @@ static void emit_pinterp(struct brw_wm_compile *c, for(i = 0; i < 4; i++ ) { if (mask & (1<func; + struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); + struct brw_reg dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + + for (i = 0; i < 4; i++) { + if (mask & (1<SrcReg[0], i2, 1)); - src1 = get_src_reg(c, &inst->SrcReg[1], i1, 1); + dst = get_dst_reg(c, inst, i); + src0 = negate(get_src_reg(c, inst, 0, i2)); + src1 = get_src_reg_imm(c, inst, 1, i1); brw_MUL(p, brw_null_reg(), src0, src1); - src0 = get_src_reg(c, &inst->SrcReg[0], i1, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i2, 1); + src0 = get_src_reg(c, inst, 0, i1); + src1 = get_src_reg_imm(c, inst, 1, i2); brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); brw_MAC(p, dst, src0, src1); brw_set_saturate(p, 0); @@ -577,11 +764,11 @@ static void emit_dp3(struct brw_wm_compile *c, int i; struct brw_compile *p = &c->func; for (i = 0; i < 3; i++) { - src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1); + src0[i] = get_src_reg(c, inst, 0, i); + src1[i] = get_src_reg_imm(c, inst, 1, i); } - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); brw_MUL(p, brw_null_reg(), src0[0], src1[0]); brw_MAC(p, brw_null_reg(), src0[1], src1[1]); brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); @@ -596,10 +783,10 @@ static void emit_dp4(struct brw_wm_compile *c, int i; struct brw_compile *p = &c->func; for (i = 0; i < 4; i++) { - src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1); + src0[i] = get_src_reg(c, inst, 0, i); + src1[i] = get_src_reg_imm(c, inst, 1, i); } - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); brw_MUL(p, brw_null_reg(), src0[0], src1[0]); brw_MAC(p, brw_null_reg(), src0[1], src1[1]); brw_MAC(p, brw_null_reg(), src0[2], src1[2]); @@ -615,35 +802,58 @@ static void emit_dph(struct brw_wm_compile *c, int i; struct brw_compile *p = &c->func; for (i = 0; i < 4; i++) { - src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1); + src0[i] = get_src_reg(c, inst, 0, i); + src1[i] = get_src_reg_imm(c, inst, 1, i); } - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); brw_MUL(p, brw_null_reg(), src0[0], src1[0]); brw_MAC(p, brw_null_reg(), src0[1], src1[1]); brw_MAC(p, dst, src0[2], src1[2]); brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_ADD(p, dst, src0[3], src1[3]); + brw_ADD(p, dst, dst, src1[3]); brw_set_saturate(p, 0); } +/** + * Emit a scalar instruction, like RCP, RSQ, LOG, EXP. + * Note that the result of the function is smeared across the dest + * register's X, Y, Z and W channels (subject to writemasking of course). + */ static void emit_math1(struct brw_wm_compile *c, struct prog_instruction *inst, GLuint func) { struct brw_compile *p = &c->func; - struct brw_reg src0, dst; + struct brw_reg src0, dst, tmp; + const int mark = mark_tmps( c ); + int i; - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); + tmp = alloc_tmp(c); + + /* Get first component of source register */ + src0 = get_src_reg(c, inst, 0, 0); + + /* tmp = func(src0) */ brw_MOV(p, brw_message_reg(2), src0); brw_math(p, - dst, - func, - (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, - 2, - brw_null_reg(), - BRW_MATH_DATA_VECTOR, - BRW_MATH_PRECISION_FULL); + tmp, + func, + (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, + 2, + brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + + /*tmp.dw1.bits.swizzle = SWIZZLE_XXXX;*/ + + /* replicate tmp value across enabled dest channels */ + for (i = 0; i < 4; i++) { + if (inst->DstReg.WriteMask & (1 << i)) { + dst = get_dst_reg(c, inst, i); + brw_MOV(p, dst, tmp); + } + } + + release_tmps(c, mark); } static void emit_rcp(struct brw_wm_compile *c, @@ -692,15 +902,28 @@ static void emit_add(struct brw_wm_compile *c, brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); for (i = 0 ; i < 4; i++) { if (mask & (1<SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); + src1 = get_src_reg_imm(c, inst, 1, i); brw_ADD(p, dst, src0, src1); } } brw_set_saturate(p, 0); } +static void emit_arl(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, addr_reg; + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + addr_reg = brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_ADDRESS, 0); + src0 = get_src_reg(c, inst, 0, 0); /* channel 0 */ + brw_MOV(p, addr_reg, src0); + brw_set_saturate(p, 0); +} + static void emit_sub(struct brw_wm_compile *c, struct prog_instruction *inst) { @@ -711,9 +934,9 @@ static void emit_sub(struct brw_wm_compile *c, brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); for (i = 0 ; i < 4; i++) { if (mask & (1<SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); + src1 = get_src_reg_imm(c, inst, 1, i); brw_ADD(p, dst, src0, negate(src1)); } } @@ -730,9 +953,9 @@ static void emit_mul(struct brw_wm_compile *c, brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); for (i = 0 ; i < 4; i++) { if (mask & (1<SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); + src1 = get_src_reg_imm(c, inst, 1, i); brw_MUL(p, dst, src0, src1); } } @@ -749,8 +972,8 @@ static void emit_frc(struct brw_wm_compile *c, brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); for (i = 0 ; i < 4; i++) { if (mask & (1<SrcReg[0], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg_imm(c, inst, 0, i); brw_FRC(p, dst, src0); } } @@ -768,8 +991,8 @@ static void emit_flr(struct brw_wm_compile *c, brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); for (i = 0 ; i < 4; i++) { if (mask & (1<SrcReg[0], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg_imm(c, inst, 0, i); brw_RNDD(p, dst, src0); } } @@ -786,9 +1009,9 @@ static void emit_max(struct brw_wm_compile *c, brw_push_insn_state(p); for (i = 0; i < 4; i++) { if (mask & (1<SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); + src1 = get_src_reg_imm(c, inst, 1, i); brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); brw_MOV(p, dst, src0); brw_set_saturate(p, 0); @@ -814,9 +1037,9 @@ static void emit_min(struct brw_wm_compile *c, brw_push_insn_state(p); for (i = 0; i < 4; i++) { if (mask & (1<SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg_imm(c, inst, 0, i); + src1 = get_src_reg(c, inst, 1, i); brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); brw_MOV(p, dst, src0); brw_set_saturate(p, 0); @@ -837,9 +1060,9 @@ static void emit_pow(struct brw_wm_compile *c, { struct brw_compile *p = &c->func; struct brw_reg dst, src0, src1; - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], 0, 1); + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + src0 = get_src_reg_imm(c, inst, 0, 0); + src1 = get_src_reg_imm(c, inst, 1, 0); brw_MOV(p, brw_message_reg(2), src0); brw_MOV(p, brw_message_reg(3), src1); @@ -864,10 +1087,10 @@ static void emit_lrp(struct brw_wm_compile *c, int mark = mark_tmps(c); for (i = 0; i < 4; i++) { if (mask & (1<SrcReg[0], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + src1 = get_src_reg_imm(c, inst, 1, i); if (src1.nr == dst.nr) { tmp1 = alloc_tmp(c); @@ -875,7 +1098,7 @@ static void emit_lrp(struct brw_wm_compile *c, } else tmp1 = src1; - src2 = get_src_reg(c, &inst->SrcReg[2], i, 1); + src2 = get_src_reg(c, inst, 2, i); if (src2.nr == dst.nr) { tmp2 = alloc_tmp(c); brw_MOV(p, tmp2, src2); @@ -892,15 +1115,19 @@ static void emit_lrp(struct brw_wm_compile *c, } } +/** + * For GLSL shaders, this KIL will be unconditional. + * It may be contained inside an IF/ENDIF structure of course. + */ static void emit_kil(struct brw_wm_compile *c) { - struct brw_compile *p = &c->func; - struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK - brw_AND(p, depth, c->emit_mask_reg, depth); - brw_pop_insn_state(p); + struct brw_compile *p = &c->func; + struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK + brw_AND(p, depth, c->emit_mask_reg, depth); + brw_pop_insn_state(p); } static void emit_mad(struct brw_wm_compile *c, @@ -913,10 +1140,10 @@ static void emit_mad(struct brw_wm_compile *c, for (i = 0; i < 4; i++) { if (mask & (1<SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); - src2 = get_src_reg(c, &inst->SrcReg[2], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); + src1 = get_src_reg_imm(c, inst, 1, i); + src2 = get_src_reg_imm(c, inst, 2, i); brw_MUL(p, dst, src0, src1); brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); @@ -936,9 +1163,9 @@ static void emit_sop(struct brw_wm_compile *c, for (i = 0; i < 4; i++) { if (mask & (1<SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); + src1 = get_src_reg_imm(c, inst, 1, i); brw_push_insn_state(p); brw_CMP(p, brw_null_reg(), cond, src0, src1); brw_set_predicate_control(p, BRW_PREDICATE_NONE); @@ -995,8 +1222,8 @@ static void emit_ddx(struct brw_wm_compile *c, struct brw_reg dst; struct brw_reg src0, w; GLuint nr, i; - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - w = get_src_reg(c, &inst->SrcReg[1], 3, 1); + src0 = get_src_reg(c, inst, 0, 0); + w = get_src_reg(c, inst, 1, 3); nr = src0.nr; interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); @@ -1005,7 +1232,7 @@ static void emit_ddx(struct brw_wm_compile *c, brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); for(i = 0; i < 4; i++ ) { if (mask & (1<SrcReg[0], 0, 1); + src0 = get_src_reg(c, inst, 0, 0); nr = src0.nr; - w = get_src_reg(c, &inst->SrcReg[1], 3, 1); + w = get_src_reg(c, inst, 1, 3); interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); interp[2] = brw_vec1_grf(nr+1, 0); @@ -1033,7 +1260,7 @@ static void emit_ddy(struct brw_wm_compile *c, brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); for(i = 0; i < 4; i++ ) { if (mask & (1<SrcReg, 0, 1 ); + src = get_src_reg( c, inst, 0, 0 ); param = alloc_tmp( c ); @@ -1179,7 +1406,7 @@ static void emit_noise1( struct brw_wm_compile *c, brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); for (i = 0 ; i < 4; i++) { if (mask & (1<SrcReg, 0, 1 ); - src1 = get_src_reg( c, inst->SrcReg, 1, 1 ); + src0 = get_src_reg( c, inst, 0, 0 ); + src1 = get_src_reg( c, inst, 0, 1 ); param0 = alloc_tmp( c ); param1 = alloc_tmp( c ); @@ -1352,7 +1579,7 @@ static void emit_noise2( struct brw_wm_compile *c, brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); for (i = 0 ; i < 4; i++) { if (mask & (1<func; @@ -1400,21 +1629,19 @@ static void noise3_sub( struct brw_wm_compile *c ) { /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to be hashed. Also compute the remainders (offsets within the unit cube), interleaved to reduce register dependency penalties. */ - brw_RNDD( p, itmp[ 0 ], param0 ); - brw_RNDD( p, itmp[ 1 ], param1 ); - brw_RNDD( p, itmp[ 2 ], param2 ); - brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xBC8F ) ); /* constant used later */ - brw_MOV( p, itmp[ 5 ], brw_imm_ud( 0xD0BD ) ); /* constant used later */ - brw_MOV( p, itmp[ 6 ], brw_imm_ud( 0x9B93 ) ); /* constant used later */ + brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param0 ); + brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param1 ); + brw_RNDD( p, retype( itmp[ 2 ], BRW_REGISTER_TYPE_D ), param2 ); brw_FRC( p, param0, param0 ); brw_FRC( p, param1, param1 ); brw_FRC( p, param2, param2 ); /* Since we now have only 16 bits of precision in the hash, we must be more careful about thorough mixing to maintain entropy as we squash the input vector into a small scalar. */ - brw_MUL( p, brw_acc_reg(), itmp[ 4 ], itmp[ 0 ] ); - brw_MAC( p, brw_acc_reg(), itmp[ 5 ], itmp[ 1 ] ); - brw_MAC( p, itmp[ 0 ], itmp[ 6 ], itmp[ 2 ] ); + brw_MUL( p, brw_null_reg(), low_words( itmp[ 0 ] ), brw_imm_uw( 0xBC8F ) ); + brw_MAC( p, brw_null_reg(), low_words( itmp[ 1 ] ), brw_imm_uw( 0xD0BD ) ); + brw_MAC( p, low_words( itmp[ 0 ] ), low_words( itmp[ 2 ] ), + brw_imm_uw( 0x9B93 ) ); brw_ADD( p, high_words( itmp[ 0 ] ), low_words( itmp[ 0 ] ), brw_imm_uw( 0xBC8F ) ); @@ -1640,9 +1867,9 @@ static void emit_noise3( struct brw_wm_compile *c, assert( mark == 0 ); - src0 = get_src_reg( c, inst->SrcReg, 0, 1 ); - src1 = get_src_reg( c, inst->SrcReg, 1, 1 ); - src2 = get_src_reg( c, inst->SrcReg, 2, 1 ); + src0 = get_src_reg( c, inst, 0, 0 ); + src1 = get_src_reg( c, inst, 0, 1 ); + src2 = get_src_reg( c, inst, 0, 2 ); param0 = alloc_tmp( c ); param1 = alloc_tmp( c ); @@ -1658,7 +1885,433 @@ static void emit_noise3( struct brw_wm_compile *c, brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); for (i = 0 ; i < 4; i++) { if (mask & (1<SaturateMode == SATURATE_ZERO_ONE ) + brw_set_saturate( p, 0 ); + + release_tmps( c, mark ); +} + +/** + * For the four-dimensional case, the little micro-optimisation benefits + * we obtain by unrolling all the loops aren't worth the massive bloat it + * now causes. Instead, we loop twice around performing a similar operation + * to noise3, once for the w=0 cube and once for the w=1, with a bit more + * code to glue it all together. + */ +static void noise4_sub( struct brw_wm_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg param[ 4 ], + x0y0, x0y1, x1y0, x1y1, /* gradients at four of the corners */ + w0, /* noise for the w=0 cube */ + floors[ 2 ], /* integer coordinates of base corner of hypercube */ + interp[ 4 ], /* interpolation coefficients */ + t, tmp[ 8 ], /* float temporaries */ + itmp[ 8 ], /* unsigned integer temporaries (aliases of floats above) */ + wtmp[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */ + int i, j; + int mark = mark_tmps( c ); + GLuint loop, origin; + + x0y0 = alloc_tmp( c ); + x0y1 = alloc_tmp( c ); + x1y0 = alloc_tmp( c ); + x1y1 = alloc_tmp( c ); + t = alloc_tmp( c ); + w0 = alloc_tmp( c ); + floors[ 0 ] = retype( alloc_tmp( c ), BRW_REGISTER_TYPE_UD ); + floors[ 1 ] = retype( alloc_tmp( c ), BRW_REGISTER_TYPE_UD ); + + for( i = 0; i < 4; i++ ) { + param[ i ] = lookup_tmp( c, mark - 5 + i ); + interp[ i ] = alloc_tmp( c ); + } + + for( i = 0; i < 8; i++ ) { + tmp[ i ] = alloc_tmp( c ); + itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD ); + wtmp[ i ] = brw_uw16_grf( tmp[ i ].nr, 0 ); + } + + brw_set_access_mode( p, BRW_ALIGN_1 ); + + /* We only want 16 bits of precision from the integral part of each + co-ordinate, but unfortunately the RNDD semantics would saturate + at 16 bits if we performed the operation directly to a 16-bit + destination. Therefore, we round to 32-bit temporaries where + appropriate, and then store only the lower 16 bits. */ + brw_RNDD( p, retype( floors[ 0 ], BRW_REGISTER_TYPE_D ), param[ 0 ] ); + brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param[ 1 ] ); + brw_RNDD( p, retype( floors[ 1 ], BRW_REGISTER_TYPE_D ), param[ 2 ] ); + brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param[ 3 ] ); + brw_MOV( p, high_words( floors[ 0 ] ), low_words( itmp[ 0 ] ) ); + brw_MOV( p, high_words( floors[ 1 ] ), low_words( itmp[ 1 ] ) ); + + /* Modify the flag register here, because the side effect is useful + later (see below). We know for certain that all flags will be + cleared, since the FRC instruction cannot possibly generate + negative results. Even for exceptional inputs (infinities, denormals, + NaNs), the architecture guarantees that the L conditional is false. */ + brw_set_conditionalmod( p, BRW_CONDITIONAL_L ); + brw_FRC( p, param[ 0 ], param[ 0 ] ); + brw_set_predicate_control( p, BRW_PREDICATE_NONE ); + for( i = 1; i < 4; i++ ) + brw_FRC( p, param[ i ], param[ i ] ); + + /* Calculate the interpolation coefficients (6t^5 - 15t^4 + 10t^3) first + of all. */ + for( i = 0; i < 4; i++ ) + brw_MUL( p, interp[ i ], param[ i ], brw_imm_f( 6.0 ) ); + for( i = 0; i < 4; i++ ) + brw_ADD( p, interp[ i ], interp[ i ], brw_imm_f( -15.0 ) ); + for( i = 0; i < 4; i++ ) + brw_MUL( p, interp[ i ], interp[ i ], param[ i ] ); + for( i = 0; i < 4; i++ ) + brw_ADD( p, interp[ i ], interp[ i ], brw_imm_f( 10.0 ) ); + for( j = 0; j < 3; j++ ) + for( i = 0; i < 4; i++ ) + brw_MUL( p, interp[ i ], interp[ i ], param[ i ] ); + + /* Mark the current address, as it will be a jump destination. The + following code will be executed twice: first, with the flag + register clear indicating the w=0 case, and second with flags + set for w=1. */ + loop = p->nr_insn; + + /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to + be hashed. Since we have only 16 bits of precision in the hash, we + must be careful about thorough mixing to maintain entropy as we + squash the input vector into a small scalar. */ + brw_MUL( p, brw_null_reg(), low_words( floors[ 0 ] ), + brw_imm_uw( 0xBC8F ) ); + brw_MAC( p, brw_null_reg(), high_words( floors[ 0 ] ), + brw_imm_uw( 0xD0BD ) ); + brw_MAC( p, brw_null_reg(), low_words( floors[ 1 ] ), + brw_imm_uw( 0x9B93 ) ); + brw_MAC( p, low_words( itmp[ 0 ] ), high_words( floors[ 1 ] ), + brw_imm_uw( 0xA359 ) ); + brw_ADD( p, high_words( itmp[ 0 ] ), low_words( itmp[ 0 ] ), + brw_imm_uw( 0xBC8F ) ); + + /* Temporarily disable the execution mask while we work with ExecSize=16 + channels (the mask is set for ExecSize=8 and is probably incorrect). + Although this might cause execution of unwanted channels, the code + writes only to temporary registers and has no side effects, so + disabling the mask is harmless. */ + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_ADD( p, wtmp[ 1 ], wtmp[ 0 ], brw_imm_uw( 0xD0BD ) ); + brw_ADD( p, wtmp[ 2 ], wtmp[ 0 ], brw_imm_uw( 0x9B93 ) ); + brw_ADD( p, wtmp[ 3 ], wtmp[ 1 ], brw_imm_uw( 0x9B93 ) ); + + /* We're now ready to perform the hashing. The eight hashes are + interleaved for performance. The hash function used is + designed to rapidly achieve avalanche and require only 16x16 + bit multiplication, and 8-bit swizzles (which we get for + free). */ + for( i = 0; i < 4; i++ ) + brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0x28D9 ) ); + for( i = 0; i < 4; i++ ) + brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ), + odd_bytes( wtmp[ i ] ) ); + for( i = 0; i < 4; i++ ) + brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0xC6D5 ) ); + for( i = 0; i < 4; i++ ) + brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ), + odd_bytes( wtmp[ i ] ) ); + brw_pop_insn_state( p ); + + /* Now we want to initialise the four rear gradients based on the + hashes. Format conversion from signed integer to float leaves + everything scaled too high by a factor of pow( 2, 15 ), but + we correct for that right at the end. */ + /* x component */ + brw_ADD( p, t, param[ 0 ], brw_imm_f( -1.0 ) ); + brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) ); + brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) ); + brw_MOV( p, x1y0, high_words( tmp[ 0 ] ) ); + brw_MOV( p, x1y1, high_words( tmp[ 1 ] ) ); + + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) ); + brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) ); + brw_pop_insn_state( p ); + + brw_MUL( p, x1y0, x1y0, t ); + brw_MUL( p, x1y1, x1y1, t ); + brw_ADD( p, t, param[ 1 ], brw_imm_f( -1.0 ) ); + brw_MUL( p, x0y0, x0y0, param[ 0 ] ); + brw_MUL( p, x0y1, x0y1, param[ 0 ] ); + + /* y component */ + brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) ); + brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) ); + brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) ); + brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) ); + + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) ); + brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) ); + brw_pop_insn_state( p ); + + brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); + brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); + /* prepare t for the w component (used below): w the first time through + the loop; w - 1 the second time) */ + brw_set_predicate_control( p, BRW_PREDICATE_NORMAL ); + brw_ADD( p, t, param[ 3 ], brw_imm_f( -1.0 ) ); + p->current->header.predicate_inverse = 1; + brw_MOV( p, t, param[ 3 ] ); + p->current->header.predicate_inverse = 0; + brw_set_predicate_control( p, BRW_PREDICATE_NONE ); + brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 1 ] ); + brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 1 ] ); + + brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); + brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); + + /* z component */ + brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) ); + brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) ); + brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) ); + brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) ); + + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) ); + brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) ); + brw_pop_insn_state( p ); + + brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 2 ] ); + brw_MUL( p, tmp[ 5 ], tmp[ 5 ], param[ 2 ] ); + brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 2 ] ); + brw_MUL( p, tmp[ 7 ], tmp[ 7 ], param[ 2 ] ); + + brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); + brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); + + /* w component */ + brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) ); + brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) ); + brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) ); + brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) ); + + brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t ); + brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); + brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t ); + brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); + brw_ADD( p, t, param[ 0 ], brw_imm_f( -1.0 ) ); + + brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); + brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); + + /* Here we interpolate in the y dimension... */ + brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); + brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); + brw_MUL( p, x0y1, x0y1, interp[ 1 ] ); + brw_MUL( p, x1y1, x1y1, interp[ 1 ] ); + brw_ADD( p, x0y0, x0y0, x0y1 ); + brw_ADD( p, x1y0, x1y0, x1y1 ); + + /* And now in x. Leave the result in tmp[ 0 ] (see below)... */ + brw_ADD( p, x1y0, x1y0, negate( x0y0 ) ); + brw_MUL( p, x1y0, x1y0, interp[ 0 ] ); + brw_ADD( p, tmp[ 0 ], x0y0, x1y0 ); + + /* Now do the same thing for the front four gradients... */ + /* x component */ + brw_MOV( p, x0y0, low_words( tmp[ 2 ] ) ); + brw_MOV( p, x0y1, low_words( tmp[ 3 ] ) ); + brw_MOV( p, x1y0, high_words( tmp[ 2 ] ) ); + brw_MOV( p, x1y1, high_words( tmp[ 3 ] ) ); + + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) ); + brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) ); + brw_pop_insn_state( p ); + + brw_MUL( p, x1y0, x1y0, t ); + brw_MUL( p, x1y1, x1y1, t ); + brw_ADD( p, t, param[ 1 ], brw_imm_f( -1.0 ) ); + brw_MUL( p, x0y0, x0y0, param[ 0 ] ); + brw_MUL( p, x0y1, x0y1, param[ 0 ] ); + + /* y component */ + brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) ); + brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) ); + brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) ); + brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) ); + + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) ); + brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) ); + brw_pop_insn_state( p ); + + brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); + brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); + brw_ADD( p, t, param[ 2 ], brw_imm_f( -1.0 ) ); + brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 1 ] ); + brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 1 ] ); + + brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); + brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); + + /* z component */ + brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) ); + brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) ); + brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) ); + brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) ); + + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) ); + brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) ); + brw_pop_insn_state( p ); + + brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t ); + brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); + brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t ); + brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); + /* prepare t for the w component (used below): w the first time through + the loop; w - 1 the second time) */ + brw_set_predicate_control( p, BRW_PREDICATE_NORMAL ); + brw_ADD( p, t, param[ 3 ], brw_imm_f( -1.0 ) ); + p->current->header.predicate_inverse = 1; + brw_MOV( p, t, param[ 3 ] ); + p->current->header.predicate_inverse = 0; + brw_set_predicate_control( p, BRW_PREDICATE_NONE ); + + brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); + brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); + + /* w component */ + brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) ); + brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) ); + brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) ); + brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) ); + + brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t ); + brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); + brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t ); + brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); + + brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); + brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); + + /* Interpolate in the y dimension: */ + brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); + brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); + brw_MUL( p, x0y1, x0y1, interp[ 1 ] ); + brw_MUL( p, x1y1, x1y1, interp[ 1 ] ); + brw_ADD( p, x0y0, x0y0, x0y1 ); + brw_ADD( p, x1y0, x1y0, x1y1 ); + + /* And now in x. The rear face is in tmp[ 0 ] (see above), so this + time put the front face in tmp[ 1 ] and we're nearly there... */ + brw_ADD( p, x1y0, x1y0, negate( x0y0 ) ); + brw_MUL( p, x1y0, x1y0, interp[ 0 ] ); + brw_ADD( p, tmp[ 1 ], x0y0, x1y0 ); + + /* Another interpolation, in the z dimension: */ + brw_ADD( p, tmp[ 1 ], tmp[ 1 ], negate( tmp[ 0 ] ) ); + brw_MUL( p, tmp[ 1 ], tmp[ 1 ], interp[ 2 ] ); + brw_ADD( p, tmp[ 0 ], tmp[ 0 ], tmp[ 1 ] ); + + /* Exit the loop if we've computed both cubes... */ + origin = p->nr_insn; + brw_push_insn_state( p ); + brw_set_predicate_control( p, BRW_PREDICATE_NORMAL ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_ADD( p, brw_ip_reg(), brw_ip_reg(), brw_imm_d( 0 ) ); + brw_pop_insn_state( p ); + + /* Save the result for the w=0 case, and increment the w coordinate: */ + brw_MOV( p, w0, tmp[ 0 ] ); + brw_ADD( p, high_words( floors[ 1 ] ), high_words( floors[ 1 ] ), + brw_imm_uw( 1 ) ); + + /* Loop around for the other cube. Explicitly set the flag register + (unfortunately we must spend an extra instruction to do this: we + can't rely on a side effect of the previous MOV or ADD because + conditional modifiers which are normally true might be false in + exceptional circumstances, e.g. given a NaN input; the add to + brw_ip_reg() is not suitable because the IP is not an 8-vector). */ + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_MOV( p, brw_flag_reg(), brw_imm_uw( 0xFF ) ); + brw_ADD( p, brw_ip_reg(), brw_ip_reg(), + brw_imm_d( ( loop - p->nr_insn ) << 4 ) ); + brw_pop_insn_state( p ); + + /* Patch the previous conditional branch now that we know the + destination address. */ + brw_set_src1( p->store + origin, + brw_imm_d( ( p->nr_insn - origin ) << 4 ) ); + + /* The very last interpolation. */ + brw_ADD( p, tmp[ 0 ], tmp[ 0 ], negate( w0 ) ); + brw_MUL( p, tmp[ 0 ], tmp[ 0 ], interp[ 3 ] ); + brw_ADD( p, tmp[ 0 ], tmp[ 0 ], w0 ); + + /* scale by pow( 2, -15 ), as described above */ + brw_MUL( p, param[ 0 ], tmp[ 0 ], brw_imm_f( 0.000030517578125 ) ); + + release_tmps( c, mark ); +} + +static void emit_noise4( struct brw_wm_compile *c, + struct prog_instruction *inst ) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, src1, src2, src3, param0, param1, param2, param3, dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + int mark = mark_tmps( c ); + + assert( mark == 0 ); + + src0 = get_src_reg( c, inst, 0, 0 ); + src1 = get_src_reg( c, inst, 0, 1 ); + src2 = get_src_reg( c, inst, 0, 2 ); + src3 = get_src_reg( c, inst, 0, 3 ); + + param0 = alloc_tmp( c ); + param1 = alloc_tmp( c ); + param2 = alloc_tmp( c ); + param3 = alloc_tmp( c ); + + brw_MOV( p, param0, src0 ); + brw_MOV( p, param1, src1 ); + brw_MOV( p, param2, src2 ); + brw_MOV( p, param3, src3 ); + + invoke_subroutine( c, SUB_NOISE4, noise4_sub ); + + /* Fill in the result: */ + brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); + for (i = 0 ; i < 4; i++) { + if (mask & (1<DstReg.WriteMask; struct brw_reg src0[2], dst[2]; - dst[0] = get_dst_reg(c, inst, 0, 1); - dst[1] = get_dst_reg(c, inst, 1, 1); + dst[0] = get_dst_reg(c, inst, 0); + dst[1] = get_dst_reg(c, inst, 1); - src0[0] = get_src_reg(c, &inst->SrcReg[0], 0, 1); - src0[1] = get_src_reg(c, &inst->SrcReg[0], 1, 1); + src0[0] = get_src_reg(c, inst, 0, 0); + src0[1] = get_src_reg(c, inst, 0, 1); /* Calculate the pixel offset from window bottom left into destination * X and Y channels. @@ -1702,7 +2355,7 @@ static void emit_wpos_xy(struct brw_wm_compile *c, } /* TODO - BIAS on SIMD8 not workind yet... + BIAS on SIMD8 not working yet... */ static void emit_txb(struct brw_wm_compile *c, struct prog_instruction *inst) @@ -1710,19 +2363,20 @@ static void emit_txb(struct brw_wm_compile *c, struct brw_compile *p = &c->func; struct brw_reg dst[4], src[4], payload_reg; GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; - GLuint i; + payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); + for (i = 0; i < 4; i++) - dst[i] = get_dst_reg(c, inst, i, 1); + dst[i] = get_dst_reg(c, inst, i); for (i = 0; i < 4; i++) - src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); + src[i] = get_src_reg(c, inst, 0, i); switch (inst->TexSrcTarget) { case TEXTURE_1D_INDEX: - brw_MOV(p, brw_message_reg(2), src[0]); - brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); - brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(2), src[0]); /* s coord */ + brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); /* t coord */ + brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); /* r coord */ break; case TEXTURE_2D_INDEX: case TEXTURE_RECT_INDEX: @@ -1736,28 +2390,28 @@ static void emit_txb(struct brw_wm_compile *c, brw_MOV(p, brw_message_reg(4), src[2]); break; } - brw_MOV(p, brw_message_reg(5), src[3]); - brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(5), src[3]); /* bias */ + brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */ brw_SAMPLE(p, - retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), - 1, - retype(payload_reg, BRW_REGISTER_TYPE_UW), - unit + MAX_DRAW_BUFFERS, /* surface */ - unit, /* sampler */ - inst->DstReg.WriteMask, - BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, - 4, - 4, - 0); + retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */ + 1, /* msg_reg_nr */ + retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */ + unit + MAX_DRAW_BUFFERS, /* surface */ + unit, /* sampler */ + inst->DstReg.WriteMask, /* writemask */ + BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, /* msg_type */ + 4, /* response_length */ + 4, /* msg_length */ + 0); /* eot */ } + static void emit_tex(struct brw_wm_compile *c, struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg dst[4], src[4], payload_reg; GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; - GLuint msg_len; GLuint i, nr; GLuint emit; @@ -1766,10 +2420,9 @@ static void emit_tex(struct brw_wm_compile *c, payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); for (i = 0; i < 4; i++) - dst[i] = get_dst_reg(c, inst, i, 1); + dst[i] = get_dst_reg(c, inst, i); for (i = 0; i < 4; i++) - src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); - + src[i] = get_src_reg(c, inst, 0, i); switch (inst->TexSrcTarget) { case TEXTURE_1D_INDEX: @@ -1788,6 +2441,7 @@ static void emit_tex(struct brw_wm_compile *c, } msg_len = 1; + /* move/load S, T, R coords */ for (i = 0; i < nr; i++) { static const GLuint swz[4] = {0,1,2,2}; if (emit & (1<DstReg.WriteMask, - BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE, - 4, - shadow ? 6 : 4, - 0); + retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */ + 1, /* msg_reg_nr */ + retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */ + unit + MAX_DRAW_BUFFERS, /* surface */ + unit, /* sampler */ + inst->DstReg.WriteMask, /* writemask */ + BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE, /* msg_type */ + 4, /* response_length */ + shadow ? 6 : 4, /* msg_length */ + 0); /* eot */ if (shadow) brw_MOV(p, dst[3], brw_imm_f(1.0)); } + +static void emit_get_constant(struct brw_context *brw, + struct brw_wm_compile *c, + struct prog_instruction *inst, + GLuint constIndex) +{ + struct brw_compile *p = &c->func; + struct brw_reg dst[4]; + GLuint i; + const int mark = mark_tmps( c ); + struct brw_reg writeback_reg[4]; + + /* XXX only need 1 temp reg??? */ + for (i = 0; i < 4; i++) { + writeback_reg[i] = alloc_tmp(c); + } + + for (i = 0; i < 4; i++) { + dst[i] = get_dst_reg(c, inst, i); + } + + /* Get float[4] vector from constant buffer */ + brw_dp_READ_4(p, + writeback_reg[0], /* first writeback dest */ + 1, /* msg_reg */ + GL_FALSE, /* rel addr? */ + 16 * constIndex, /* byte offset */ + BRW_WM_MAX_SURF - 1 /* surface, binding table index */ + ); + + /* Extract the four channel values, smear across dest registers */ + for (i = 0; i < 4; i++) { + /* extract 1 float from the writeback reg */ + struct brw_reg new_src = stride(writeback_reg[0], 0, 1, 0); + new_src.subnr = i * 4; + /* and smear it into the dest register */ + brw_MOV(p, dst[i], new_src); + } + + release_tmps( c, mark ); +} + + +/** + * Resolve subroutine calls after code emit is done. + */ static void post_wm_emit( struct brw_wm_compile *c ) { - GLuint nr_insns = c->fp->program.Base.NumInstructions; - GLuint insn, target_insn; - struct prog_instruction *inst1, *inst2; - struct brw_instruction *brw_inst1, *brw_inst2; - int offset; - for (insn = 0; insn < nr_insns; insn++) { - inst1 = &c->fp->program.Base.Instructions[insn]; - brw_inst1 = inst1->Data; - switch (inst1->Opcode) { - case OPCODE_CAL: - target_insn = inst1->BranchTarget; - inst2 = &c->fp->program.Base.Instructions[target_insn]; - brw_inst2 = inst2->Data; - offset = brw_inst2 - brw_inst1; - brw_set_src1(brw_inst1, brw_imm_d(offset*16)); - break; - default: - break; - } - } + brw_resolve_cals(&c->func); } static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) @@ -1859,10 +2540,6 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) for (i = 0; i < c->nr_fp_insns; i++) { struct prog_instruction *inst = &c->prog_instructions[i]; - struct prog_instruction *orig_inst; - - if ((orig_inst = inst->Data) != 0) - orig_inst->Data = current_insn(p); if (inst->CondUpdate) brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); @@ -1894,12 +2571,18 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) case WM_FB_WRITE: emit_fb_write(c, inst); break; + case WM_FRONTFACING: + emit_frontfacing(c, inst); + break; case OPCODE_ABS: emit_abs(c, inst); break; case OPCODE_ADD: emit_add(c, inst); break; + case OPCODE_ARL: + emit_arl(c, inst); + break; case OPCODE_SUB: emit_sub(c, inst); break; @@ -1916,7 +2599,17 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) emit_trunc(c, inst); break; case OPCODE_MOV: +#if 0 + /* test hook for new constant buffer code */ + if (inst->SrcReg[0].File == PROGRAM_UNIFORM) { + emit_get_constant(brw, c, inst, inst->SrcReg[0].Index); + } + else { + emit_mov(c, inst); + } +#else emit_mov(c, inst); +#endif break; case OPCODE_DP3: emit_dp3(c, inst); @@ -1996,8 +2689,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) case OPCODE_NOISE3: emit_noise3(c, inst); break; - /* case OPCODE_NOISE4: */ - /* not yet implemented */ + case OPCODE_NOISE4: + emit_noise4(c, inst); + break; case OPCODE_TEX: emit_tex(c, inst); break; @@ -2019,7 +2713,10 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) brw_ENDIF(p, if_inst[--if_insn]); break; case OPCODE_BGNSUB: + brw_save_label(p, inst->Comment, p->nr_insn); + break; case OPCODE_ENDSUB: + /* no-op */ break; case OPCODE_CAL: brw_push_insn_state(p); @@ -2029,8 +2726,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) brw_set_access_mode(p, BRW_ALIGN_16); brw_ADD(p, get_addr_reg(stack_index), get_addr_reg(stack_index), brw_imm_d(4)); - orig_inst = inst->Data; - orig_inst->Data = &p->store[p->nr_insn]; + brw_save_call(&c->func, inst->Comment, p->nr_insn); brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); brw_pop_insn_state(p); break; @@ -2083,14 +2779,34 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) brw_set_predicate_control(p, BRW_PREDICATE_NONE); } post_wm_emit(c); - for (i = 0; i < c->fp->program.Base.NumInstructions; i++) - c->fp->program.Base.Instructions[i].Data = NULL; + + if (c->reg_index >= BRW_WM_MAX_GRF) { + _mesa_problem(NULL, "Ran out of registers in brw_wm_emit_glsl()"); + /* XXX we need to do some proper error recovery here */ + } } + +/** + * Do GPU code generation for shaders that use GLSL features such as + * flow control. Other shaders will be compiled with the + */ void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) { + if (INTEL_DEBUG & DEBUG_WM) { + _mesa_printf("brw_wm_glsl_emit:\n"); + } + + /* initial instruction translation/simplification */ brw_wm_pass_fp(c); + + /* actual code generation */ brw_wm_emit_glsl(brw, c); + + if (INTEL_DEBUG & DEBUG_WM) { + brw_wm_print_program(c, "brw_wm_glsl_emit done"); + } + c->prog_data.total_grf = c->reg_index; c->prog_data.total_scratch = 0; }