From 2c485cda2062ca2b9af89ea62618515d960c7904 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 29 Jun 2012 16:41:32 +0000 Subject: [PATCH] radeon/llvm: Emit raw ISA for vertex fetch instructions --- src/gallium/drivers/r600/r600_shader.c | 60 +++++++---- .../drivers/radeon/R600CodeEmitter.cpp | 58 +--------- .../drivers/radeon/R600Instructions.td | 102 ++++++++++++++++-- 3 files changed, 139 insertions(+), 81 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 32d5a78758f..4dd424367a9 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -258,6 +258,17 @@ int r600_compute_shader_create(struct pipe_context * ctx, #endif /* HAVE_OPENCL */ +static uint32_t i32_from_byte_stream(unsigned char * bytes, + unsigned * bytes_read) +{ + unsigned i; + uint32_t out = 0; + for (i = 0; i < 4; i++) { + out |= bytes[(*bytes_read)++] << (8 * i); + } + return out; +} + static unsigned r600_src_from_byte_stream(unsigned char * bytes, unsigned bytes_read, struct r600_bytecode_alu * alu, unsigned src_idx) { @@ -443,27 +454,36 @@ static int r600_vtx_from_byte_stream(struct r600_shader_ctx *ctx, unsigned char * bytes, unsigned bytes_read) { struct r600_bytecode_vtx vtx; + + uint32_t word0 = i32_from_byte_stream(bytes, &bytes_read); + uint32_t word1 = i32_from_byte_stream(bytes, &bytes_read); + uint32_t word2 = i32_from_byte_stream(bytes, &bytes_read); + memset(&vtx, 0, sizeof(vtx)); - vtx.inst = bytes[bytes_read++]; - vtx.fetch_type = bytes[bytes_read++]; - vtx.buffer_id = bytes[bytes_read++]; - vtx.src_gpr = bytes[bytes_read++]; - vtx.src_sel_x = bytes[bytes_read++]; - vtx.mega_fetch_count = bytes[bytes_read++]; - vtx.dst_gpr = bytes[bytes_read++]; - vtx.dst_sel_x = bytes[bytes_read++]; - vtx.dst_sel_y = bytes[bytes_read++]; - vtx.dst_sel_z = bytes[bytes_read++]; - vtx.dst_sel_w = bytes[bytes_read++]; - vtx.use_const_fields = bytes[bytes_read++]; - vtx.data_format = bytes[bytes_read++]; - vtx.num_format_all = bytes[bytes_read++]; - vtx.format_comp_all = bytes[bytes_read++]; - vtx.srf_mode_all = bytes[bytes_read++]; - /* offset is 2 bytes wide */ - vtx.offset = bytes[bytes_read++]; - vtx.offset |= (bytes[bytes_read++] << 8); - vtx.endian = bytes[bytes_read++]; + + /* WORD0 */ + vtx.inst = G_SQ_VTX_WORD0_VTX_INST(word0); + vtx.fetch_type = G_SQ_VTX_WORD0_FETCH_TYPE(word0); + vtx.buffer_id = G_SQ_VTX_WORD0_BUFFER_ID(word0); + vtx.src_gpr = G_SQ_VTX_WORD0_SRC_GPR(word0); + vtx.src_sel_x = G_SQ_VTX_WORD0_SRC_SEL_X(word0); + vtx.mega_fetch_count = G_SQ_VTX_WORD0_MEGA_FETCH_COUNT(word0); + + /* WORD1 */ + vtx.dst_gpr = G_SQ_VTX_WORD1_GPR_DST_GPR(word1); + vtx.dst_sel_x = G_SQ_VTX_WORD1_DST_SEL_X(word1); + vtx.dst_sel_y = G_SQ_VTX_WORD1_DST_SEL_Y(word1); + vtx.dst_sel_z = G_SQ_VTX_WORD1_DST_SEL_Z(word1); + vtx.dst_sel_w = G_SQ_VTX_WORD1_DST_SEL_W(word1); + vtx.use_const_fields = G_SQ_VTX_WORD1_USE_CONST_FIELDS(word1); + vtx.data_format = G_SQ_VTX_WORD1_DATA_FORMAT(word1); + vtx.num_format_all = G_SQ_VTX_WORD1_NUM_FORMAT_ALL(word1); + vtx.format_comp_all = G_SQ_VTX_WORD1_FORMAT_COMP_ALL(word1); + vtx.srf_mode_all = G_SQ_VTX_WORD1_SRF_MODE_ALL(word1); + + /* WORD 2*/ + vtx.offset = G_SQ_VTX_WORD2_OFFSET(word2); + vtx.endian = G_SQ_VTX_WORD2_ENDIAN_SWAP(word2); if (r600_bytecode_add_vtx(ctx->bc, &vtx)) { fprintf(stderr, "Error adding vtx\n"); diff --git a/src/gallium/drivers/radeon/R600CodeEmitter.cpp b/src/gallium/drivers/radeon/R600CodeEmitter.cpp index 3042f38af8a..99964d4beb1 100644 --- a/src/gallium/drivers/radeon/R600CodeEmitter.cpp +++ b/src/gallium/drivers/radeon/R600CodeEmitter.cpp @@ -206,60 +206,12 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) { case AMDIL::VTX_READ_PARAM_eg: case AMDIL::VTX_READ_GLOBAL_eg: { - emitByte(INSTR_VTX); - // inst - emitByte(0); - - // fetch_type - emitByte(2); - - // buffer_id - emitByte(MI.getOpcode() == AMDIL::VTX_READ_PARAM_eg ? 0 : 1); - - // src_gpr - emitByte(getHWReg(MI.getOperand(1).getReg())); - - // src_sel_x - emitByte(TRI->getHWRegChan(MI.getOperand(1).getReg())); - - // mega_fetch_count - emitByte(3); - - // dst_gpr - emitByte(getHWReg(MI.getOperand(0).getReg())); - - // dst_sel_x - emitByte(0); - - // dst_sel_y - emitByte(7); + uint64_t InstWord01 = getBinaryCodeForInstr(MI); + uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset - // dst_sel_z - emitByte(7); - - // dst_sel_w - emitByte(7); - - // use_const_fields - emitByte(1); - - // data_format - emitByte(0); - - // num_format_all - emitByte(0); - - // format_comp_all - emitByte(0); - - // srf_mode_all - emitByte(0); - - // offset - emitTwoBytes(MI.getOperand(2).getImm()); - - // endian - emitByte(0); + emitByte(INSTR_VTX); + emit(InstWord01); + emit(InstWord2); break; } diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td index 6c74c6cd7bd..d42e74cfae2 100644 --- a/src/gallium/drivers/radeon/R600Instructions.td +++ b/src/gallium/drivers/radeon/R600Instructions.td @@ -922,18 +922,104 @@ def RAT_WRITE_CACHELESS_eg : EG_CF_RAT <0x57, 0x2, 0, (outs), } // End usesCustomInserter = 1 -class VTX_READ_eg pattern> : InstR600ISA < - (outs R600_TReg32_X:$dst), - (ins MEMxi:$ptr), - "VTX_READ_eg $dst, $ptr", - pattern ->; +class VTX_READ_eg buffer_id, dag outs, list pattern> + : InstR600ISA { + + // Operands + bits<7> DST_GPR; + bits<7> SRC_GPR; + + // Static fields + bits<5> VC_INST = 0; + bits<2> FETCH_TYPE = 2; + bits<1> FETCH_WHOLE_QUAD = 0; + bits<8> BUFFER_ID = buffer_id; + bits<1> SRC_REL = 0; + // XXX: We can infer this field based on the SRC_GPR. This would allow us + // to store vertex addresses in any channel, not just X. + bits<2> SRC_SEL_X = 0; + bits<6> MEGA_FETCH_COUNT; + bits<1> DST_REL = 0; + bits<3> DST_SEL_X; + bits<3> DST_SEL_Y; + bits<3> DST_SEL_Z; + bits<3> DST_SEL_W; + // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL, + // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored, + // however, based on my testing if USE_CONST_FIELDS is set, then all + // these fields need to be set to 0. + bits<1> USE_CONST_FIELDS = 0; + bits<6> DATA_FORMAT; + bits<2> NUM_FORMAT_ALL = 1; + bits<1> FORMAT_COMP_ALL = 0; + bits<1> SRF_MODE_ALL = 0; + + // LLVM can only encode 64-bit instructions, so these fields are manually + // encoded in R600CodeEmitter + // + // bits<16> OFFSET; + // bits<2> ENDIAN_SWAP = 0; + // bits<1> CONST_BUF_NO_STRIDE = 0; + // bits<1> MEGA_FETCH = 0; + // bits<1> ALT_CONST = 0; + // bits<2> BUFFER_INDEX_MODE = 0; + + // VTX_WORD0 + let Inst{4-0} = VC_INST; + let Inst{6-5} = FETCH_TYPE; + let Inst{7} = FETCH_WHOLE_QUAD; + let Inst{15-8} = BUFFER_ID; + let Inst{22-16} = SRC_GPR; + let Inst{23} = SRC_REL; + let Inst{25-24} = SRC_SEL_X; + let Inst{31-26} = MEGA_FETCH_COUNT; + + // VTX_WORD1_GPR + let Inst{38-32} = DST_GPR; + let Inst{39} = DST_REL; + let Inst{40} = 0; // Reserved + let Inst{43-41} = DST_SEL_X; + let Inst{46-44} = DST_SEL_Y; + let Inst{49-47} = DST_SEL_Z; + let Inst{52-50} = DST_SEL_W; + let Inst{53} = USE_CONST_FIELDS; + let Inst{59-54} = DATA_FORMAT; + let Inst{61-60} = NUM_FORMAT_ALL; + let Inst{62} = FORMAT_COMP_ALL; + let Inst{63} = SRF_MODE_ALL; + + // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding + // is done in R600CodeEmitter + // + // Inst{79-64} = OFFSET; + // Inst{81-80} = ENDIAN_SWAP; + // Inst{82} = CONST_BUF_NO_STRIDE; + // Inst{83} = MEGA_FETCH; + // Inst{84} = ALT_CONST; + // Inst{86-85} = BUFFER_INDEX_MODE; + // Inst{95-86} = 0; Reserved + + // VTX_WORD3 (Padding) + // + // Inst{127-96} = 0; +} + +class VTX_READ_32_eg buffer_id, list pattern> + : VTX_READ_eg { + + let MEGA_FETCH_COUNT = 4; + let DST_SEL_X = 0; + let DST_SEL_Y = 7; // Masked + let DST_SEL_Z = 7; // Masked + let DST_SEL_W = 7; // Masked + let DATA_FORMAT = 0xD; // COLOR_32 +} -def VTX_READ_PARAM_eg : VTX_READ_eg <0, +def VTX_READ_PARAM_eg : VTX_READ_32_eg <0, [(set (i32 R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))] >; -def VTX_READ_GLOBAL_eg : VTX_READ_eg <1, +def VTX_READ_GLOBAL_eg : VTX_READ_32_eg <1, [(set (i32 R600_TReg32_X:$dst), (global_load ADDRVTX_READ:$ptr))] >; -- 2.30.2