From ee32e9b4753eca62e360f96ce61ef7ff683e6bb7 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 15 Apr 2009 16:49:18 -0600 Subject: [PATCH] i965: implement relative addressing for VS constant buffer reads A scatter-read should be possible, but we're just using two READs for the time being. --- src/mesa/drivers/dri/i965/brw_eu.h | 1 + src/mesa/drivers/dri/i965/brw_eu_emit.c | 59 +++++++++++- src/mesa/drivers/dri/i965/brw_vs_emit.c | 114 ++++++++++++------------ 3 files changed, 115 insertions(+), 59 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 66f8eb840c1..896e67dbfe9 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -868,6 +868,7 @@ void brw_dp_READ_4( struct brw_compile *p, void brw_dp_READ_4_vs( struct brw_compile *p, struct brw_reg dest, GLboolean relAddr, + struct brw_reg addrReg, GLuint location, GLuint bind_table_index ); diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index c731a93a8d6..df2141660c0 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -1003,15 +1003,18 @@ void brw_dp_READ_4( struct brw_compile *p, /** - * Read float[4] constant from VS constant buffer. + * Read float[4] constant(s) from VS constant buffer. + * For relative addressing, two float[4] constants will be read into 'dest'. + * Otherwise, one float[4] constant will be read into the lower half of 'dest'. */ void brw_dp_READ_4_vs(struct brw_compile *p, struct brw_reg dest, GLboolean relAddr, + struct brw_reg addrReg, GLuint location, GLuint bind_table_index) { - const GLuint msg_reg_nr = 1; + GLuint msg_reg_nr = 1; /* printf("vs const read msg, location %u, msg_reg_nr %d\n", @@ -1034,7 +1037,12 @@ void brw_dp_READ_4_vs(struct brw_compile *p, b = brw_message_reg(msg_reg_nr); b = retype(b, BRW_REGISTER_TYPE_UD); /*b = get_element_ud(b, 2);*/ - brw_MOV(p, b, brw_imm_ud(location)); + if (relAddr) { + brw_ADD(p, b, addrReg, brw_imm_ud(location)); + } + else { + brw_MOV(p, b, brw_imm_ud(location)); + } brw_pop_insn_state(p); } @@ -1053,13 +1061,56 @@ void brw_dp_READ_4_vs(struct brw_compile *p, brw_set_dp_read_message(insn, bind_table_index, - 0, /* msg_control (0 means 1 Oword) */ + 0, /* msg_control (0 means 1 Oword, lower half) */ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 0, /* source cache = data cache */ 1, /* msg_length */ 1, /* response_length (1 Oword) */ 0); /* eot */ } + + if (relAddr) { + /* second read to get second constant */ + msg_reg_nr++; + { + /* Setup MRF[1] with location/offset into const buffer */ + struct brw_reg b; + + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + b = brw_message_reg(msg_reg_nr); + b = retype(b, BRW_REGISTER_TYPE_UD); + addrReg = suboffset(addrReg, 1); /* upper half of addrReg */ + brw_ADD(p, b, addrReg, brw_imm_ud(location)); + + brw_pop_insn_state(p); + } + + { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = BRW_PREDICATE_NONE; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.mask_control = BRW_MASK_DISABLE; + /*insn->header.access_mode = BRW_ALIGN_16;*/ + + brw_set_dest(insn, dest); + brw_set_src0(insn, brw_null_reg()); + + brw_set_dp_read_message(insn, + bind_table_index, + 1, /* msg_control (1 means 1 Oword, upper half) */ + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + 0, /* source cache = data cache */ + 1, /* msg_length */ + 1, /* response_length (1 Oword) */ + 0); /* eot */ + } + } } diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 19ead73d8ca..98fbdf5064d 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -709,7 +709,7 @@ get_constant(struct brw_vs_compile *c, assert(argIndex < 3); - if (c->current_const[argIndex].index != src->Index) { + if (c->current_const[argIndex].index != src->Index || src->RelAddr) { c->current_const[argIndex].index = src->Index; @@ -722,15 +722,18 @@ get_constant(struct brw_vs_compile *c, brw_dp_READ_4_vs(p, c->current_const[argIndex].reg, /* writeback dest */ src->RelAddr, /* relative indexing? */ + c->regs[PROGRAM_ADDRESS][0], /* address register */ 16 * src->Index, /* byte offset */ SURF_INDEX_VERT_CONST_BUFFER /* binding table index */ ); } - /* replicate lower four floats into upper four floats (to get XYZWXYZW) */ const_reg = c->current_const[argIndex].reg; - const_reg = stride(const_reg, 0, 4, 0); - const_reg.subnr = 0; + if (!src->RelAddr) { + /* replicate lower four floats into upper half (to get XYZWXYZW) */ + const_reg = stride(const_reg, 0, 4, 0); + const_reg.subnr = 0; + } return const_reg; } @@ -771,6 +774,42 @@ static struct brw_reg get_reg( struct brw_vs_compile *c, } +/** + * Indirect addressing: get reg[[arg] + offset]. + */ +static struct brw_reg deref( struct brw_vs_compile *c, + struct brw_reg arg, + GLint offset) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = vec4(get_tmp(c)); + struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0]; + struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_UW); + GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16; + struct brw_reg indirect = brw_vec4_indirect(0,0); + + { + brw_push_insn_state(p); + brw_set_access_mode(p, BRW_ALIGN_1); + + /* This is pretty clunky - load the address register twice and + * fetch each 4-dword value in turn. There must be a way to do + * this in a single pass, but I couldn't get it to work. + */ + brw_ADD(p, brw_address_reg(0), vp_address, brw_imm_d(byte_offset)); + brw_MOV(p, tmp, indirect); + + brw_ADD(p, brw_address_reg(0), suboffset(vp_address, 8), brw_imm_d(byte_offset)); + brw_MOV(p, suboffset(tmp, 4), indirect); + + brw_pop_insn_state(p); + } + + /* NOTE: tmp not released */ + return vec8(tmp); +} + + /** * Get brw reg corresponding to the instruction's [argIndex] src reg. * TODO: relative addressing! @@ -782,19 +821,29 @@ get_src_reg( struct brw_vs_compile *c, { const GLuint file = inst->SrcReg[argIndex].File; const GLint index = inst->SrcReg[argIndex].Index; + const GLboolean relAddr = inst->SrcReg[argIndex].RelAddr; switch (file) { case PROGRAM_TEMPORARY: case PROGRAM_INPUT: case PROGRAM_OUTPUT: - assert(c->regs[file][index].nr != 0); - return c->regs[file][index]; + if (relAddr) { + return deref(c, c->regs[file][0], index); + } + else { + assert(c->regs[file][index].nr != 0); + return c->regs[file][index]; + } + case PROGRAM_STATE_VAR: case PROGRAM_CONSTANT: case PROGRAM_UNIFORM: if (c->use_const_buffer) { return get_constant(c, inst, argIndex); } + else if (relAddr) { + return deref(c, c->regs[PROGRAM_STATE_VAR][0], index); + } else { assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0); return c->regs[PROGRAM_STATE_VAR][index]; @@ -817,42 +866,6 @@ get_src_reg( struct brw_vs_compile *c, } -/** - * Indirect addressing: get reg[[arg] + offset]. - */ -static struct brw_reg deref( struct brw_vs_compile *c, - struct brw_reg arg, - GLint offset) -{ - struct brw_compile *p = &c->func; - struct brw_reg tmp = vec4(get_tmp(c)); - struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0]; - struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_UW); - GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16; - struct brw_reg indirect = brw_vec4_indirect(0,0); - - { - brw_push_insn_state(p); - brw_set_access_mode(p, BRW_ALIGN_1); - - /* This is pretty clunky - load the address register twice and - * fetch each 4-dword value in turn. There must be a way to do - * this in a single pass, but I couldn't get it to work. - */ - brw_ADD(p, brw_address_reg(0), vp_address, brw_imm_d(byte_offset)); - brw_MOV(p, tmp, indirect); - - brw_ADD(p, brw_address_reg(0), suboffset(vp_address, 8), brw_imm_d(byte_offset)); - brw_MOV(p, suboffset(tmp, 4), indirect); - - brw_pop_insn_state(p); - } - - /* NOTE: tmp not released */ - return vec8(tmp); -} - - static void emit_arl( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0 ) @@ -864,8 +877,8 @@ static void emit_arl( struct brw_vs_compile *c, if (need_tmp) tmp = get_tmp(c); - brw_RNDD(p, tmp, arg0); - brw_MUL(p, dst, tmp, brw_imm_d(16)); + brw_RNDD(p, tmp, arg0); /* tmp = round(arg0) */ + brw_MUL(p, dst, tmp, brw_imm_d(16)); /* dst = tmp * 16 */ if (need_tmp) release_tmp(c, tmp); @@ -888,13 +901,7 @@ static struct brw_reg get_arg( struct brw_vs_compile *c, if (src->File == PROGRAM_UNDEFINED) return brw_null_reg(); - if (src->RelAddr) { - /* XXX fix */ - reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index); - } - else { - reg = get_src_reg(c, inst, argIndex); - } + reg = get_src_reg(c, inst, argIndex); /* Convert 3-bit swizzle to 2-bit. */ @@ -989,10 +996,7 @@ static void emit_swz( struct brw_vs_compile *c, if (src_mask) { struct brw_reg arg0; - if (src.RelAddr) - arg0 = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index); - else - arg0 = get_src_reg(c, inst, argIndex); + arg0 = get_src_reg(c, inst, argIndex); arg0 = brw_swizzle(arg0, src_swz[0], src_swz[1], -- 2.30.2