X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fr600%2Fr700_assembler.c;h=c01b2fbb1464ccb42a85eec40a1496ebd21dba20;hb=43867acb6afc7fad26cdc2f22b2a3bb6eeefb2da;hp=6e8d1cd9270dcc73079503e05ec99ddb6aec1b56;hpb=de460871605c5575c5513dd1283cb61710b60cfe;p=mesa.git diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 6e8d1cd9270..c01b2fbb146 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -32,6 +32,7 @@ #include "main/mtypes.h" #include "main/imports.h" +#include "shader/prog_parameter.h" #include "radeon_debug.h" #include "r600_context.h" @@ -41,6 +42,39 @@ #define USE_CF_FOR_CONTINUE_BREAK 1 #define USE_CF_FOR_POP_AFTER 1 +struct prog_instruction noise1_insts[12] = { + {OPCODE_BGNSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_MOV , {{0, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 2, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_MOV , {{8, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 4, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_MOV , {{8, 0, 585, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 8, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_SGT , {{0, 0, 585, 0, 0, 0}, {8, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 1, 1, 0, 8, 1672, 0}, 1, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_IF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 7, 0, 0}, 0, 0, 0, 1, 0, 0, 0, 15, 0, 0, 0}, + {OPCODE_MOV , {{0, 0, 1755, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_ENDIF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_MOV , {{0, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_ENDSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0} +}; +float noise1_const[2][4] = { + {0.300000f, 0.900000f, 0.500000f, 0.300000f} +}; + +COMPILED_SUB noise1_presub = { + &(noise1_insts[0]), + 12, + 2, + 1, + 0, + &(noise1_const[0]), + SWIZZLE_X, + SWIZZLE_X, + SWIZZLE_X, + SWIZZLE_X, + {0,0,0}, + 0 +}; + BITS addrmode_PVSDST(PVSDST * pPVSDST) { return pPVSDST->addrmode0 | ((BITS)pPVSDST->addrmode1 << 1); @@ -330,17 +364,20 @@ GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size) return(format); } -unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm) +unsigned int r700GetNumOperands(GLuint opcode, GLuint nIsOp3) { - if(pAsm->D.dst.op3) + if(nIsOp3 > 0) { return 3; } - switch (pAsm->D.dst.opcode) + switch (opcode) { case SQ_OP2_INST_ADD: + case SQ_OP2_INST_KILLE: case SQ_OP2_INST_KILLGT: + case SQ_OP2_INST_KILLGE: + case SQ_OP2_INST_KILLNE: case SQ_OP2_INST_MUL: case SQ_OP2_INST_MAX: case SQ_OP2_INST_MIN: @@ -363,6 +400,7 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm) case SQ_OP2_INST_MOVA_FLOOR: case SQ_OP2_INST_FRACT: case SQ_OP2_INST_FLOOR: + case SQ_OP2_INST_TRUNC: case SQ_OP2_INST_EXP_IEEE: case SQ_OP2_INST_LOG_CLAMPED: case SQ_OP2_INST_LOG_IEEE: @@ -374,7 +412,7 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm) return 1; default: radeon_error( - "Need instruction operand number for %x.\n", pAsm->D.dst.opcode); + "Need instruction operand number for %x.\n", opcode); }; return 3; @@ -388,99 +426,94 @@ int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700 pAsm->pR700Shader = pShader; pAsm->currentShaderType = spt; - pAsm->cf_last_export_ptr = NULL; + pAsm->cf_last_export_ptr = NULL; - pAsm->cf_current_export_clause_ptr = NULL; - pAsm->cf_current_alu_clause_ptr = NULL; - pAsm->cf_current_tex_clause_ptr = NULL; - pAsm->cf_current_vtx_clause_ptr = NULL; - pAsm->cf_current_cf_clause_ptr = NULL; + pAsm->cf_current_export_clause_ptr = NULL; + pAsm->cf_current_alu_clause_ptr = NULL; + pAsm->cf_current_tex_clause_ptr = NULL; + pAsm->cf_current_vtx_clause_ptr = NULL; + pAsm->cf_current_cf_clause_ptr = NULL; - // No clause has been created yet - pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE; + // No clause has been created yet + pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE; - pAsm->number_of_colorandz_exports = 0; - pAsm->number_of_exports = 0; - pAsm->number_of_export_opcodes = 0; + pAsm->number_of_colorandz_exports = 0; + pAsm->number_of_exports = 0; + pAsm->number_of_export_opcodes = 0; pAsm->alu_x_opcode = 0; pAsm->D2.bits = 0; - pAsm->D.bits = 0; - pAsm->S[0].bits = 0; - pAsm->S[1].bits = 0; - pAsm->S[2].bits = 0; + pAsm->D.bits = 0; + pAsm->S[0].bits = 0; + pAsm->S[1].bits = 0; + pAsm->S[2].bits = 0; - pAsm->uLastPosUpdate = 0; + pAsm->uLastPosUpdate = 0; - *(BITS *) &pAsm->fp_stOutFmt0 = 0; + *(BITS *) &pAsm->fp_stOutFmt0 = 0; - pAsm->uIIns = 0; - pAsm->uOIns = 0; - pAsm->number_used_registers = 0; - pAsm->uUsedConsts = 256; + pAsm->uIIns = 0; + pAsm->uOIns = 0; + pAsm->number_used_registers = 0; + pAsm->uUsedConsts = 256; - // Fragment programs - pAsm->uBoolConsts = 0; - pAsm->uIntConsts = 0; - pAsm->uInsts = 0; - pAsm->uConsts = 0; + // Fragment programs + pAsm->uBoolConsts = 0; + pAsm->uIntConsts = 0; + pAsm->uInsts = 0; + pAsm->uConsts = 0; - pAsm->FCSP = 0; - pAsm->fc_stack[0].type = FC_NONE; + pAsm->FCSP = 0; + pAsm->fc_stack[0].type = FC_NONE; - pAsm->branch_depth = 0; - pAsm->max_branch_depth = 0; - - pAsm->aArgSubst[0] = - pAsm->aArgSubst[1] = - pAsm->aArgSubst[2] = - pAsm->aArgSubst[3] = (-1); - - pAsm->uOutputs = 0; - - for (i=0; icolor_export_register_number[i] = (-1); - } + pAsm->aArgSubst[0] = + pAsm->aArgSubst[1] = + pAsm->aArgSubst[2] = + pAsm->aArgSubst[3] = (-1); + pAsm->uOutputs = 0; - pAsm->depth_export_register_number = (-1); - pAsm->stencil_export_register_number = (-1); - pAsm->coverage_to_mask_export_register_number = (-1); - pAsm->mask_export_register_number = (-1); + for (i=0; icolor_export_register_number[i] = (-1); + } - pAsm->starting_export_register_number = 0; - pAsm->starting_vfetch_register_number = 0; - pAsm->starting_temp_register_number = 0; - pAsm->uFirstHelpReg = 0; + pAsm->depth_export_register_number = (-1); + pAsm->stencil_export_register_number = (-1); + pAsm->coverage_to_mask_export_register_number = (-1); + pAsm->mask_export_register_number = (-1); - pAsm->input_position_is_used = GL_FALSE; - pAsm->input_normal_is_used = GL_FALSE; + pAsm->starting_export_register_number = 0; + pAsm->starting_vfetch_register_number = 0; + pAsm->starting_temp_register_number = 0; + pAsm->uFirstHelpReg = 0; + pAsm->input_position_is_used = GL_FALSE; + pAsm->input_normal_is_used = GL_FALSE; - for (i=0; iinput_color_is_used[ i ] = GL_FALSE; - } + for (i=0; iinput_color_is_used[ i ] = GL_FALSE; + } - for (i=0; iinput_texture_unit_is_used[ i ] = GL_FALSE; - } + for (i=0; iinput_texture_unit_is_used[ i ] = GL_FALSE; + } - for (i=0; ivfetch_instruction_ptr_array[ i ] = NULL; - } + for (i=0; ivfetch_instruction_ptr_array[ i ] = NULL; + } - pAsm->number_of_inputs = 0; + pAsm->number_of_inputs = 0; - pAsm->is_tex = GL_FALSE; - pAsm->need_tex_barrier = GL_FALSE; + pAsm->is_tex = GL_FALSE; + pAsm->need_tex_barrier = GL_FALSE; pAsm->subs = NULL; pAsm->unSubArraySize = 0; @@ -494,18 +527,27 @@ int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700 pAsm->CALLSTACK[0].plstCFInstructions_local = &(pAsm->pR700Shader->lstCFInstructions); - pAsm->CALLSTACK[0].stackUsage.bits = 0; + pAsm->CALLSTACK[0].max = 0; + pAsm->CALLSTACK[0].current = 0; SetActiveCFlist(pAsm->pR700Shader, pAsm->CALLSTACK[0].plstCFInstructions_local); pAsm->unCFflags = 0; - return 0; + pAsm->presubs = NULL; + pAsm->unPresubArraySize = 0; + pAsm->unNumPresub = 0; + pAsm->unCurNumILInsts = 0; + + pAsm->unVetTexBits = 0; + + return 0; } GLboolean IsTex(gl_inst_opcode Opcode) { - if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) ) + if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) || + (OPCODE_DDX==Opcode) || (OPCODE_DDY==Opcode) ) { return GL_TRUE; } @@ -849,6 +891,7 @@ GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm, GLubyte element, GLuint _signed, GLboolean normalize, + GLenum format, VTX_FETCH_METHOD * pFetchMethod) { GLuint client_size_inbyte; @@ -897,10 +940,21 @@ GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm, vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X; vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count; - vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_X; - vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y; - vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z; - vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W; + if(format == GL_BGRA) + { + vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_Z; + vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y; + vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_X; + vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W; + } + else + { + vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_X; + vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y; + vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z; + vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W; + + } vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1; vfetch_instruction_ptr->m_Word1.f.data_format = data_format; @@ -1038,7 +1092,8 @@ GLboolean checkop2(r700_AssemblerBase* pAsm) checkop_init(pAsm); - if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) || + if( (pILInst->SrcReg[0].File == PROGRAM_UNIFORM) || + (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) || (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) || (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) || (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) ) @@ -1049,7 +1104,8 @@ GLboolean checkop2(r700_AssemblerBase* pAsm) { bSrcConst[0] = GL_FALSE; } - if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) || + if( (pILInst->SrcReg[1].File == PROGRAM_UNIFORM) || + (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) || (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) || (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) || (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) ) @@ -1082,7 +1138,8 @@ GLboolean checkop3(r700_AssemblerBase* pAsm) checkop_init(pAsm); - if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) || + if( (pILInst->SrcReg[0].File == PROGRAM_UNIFORM) || + (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) || (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) || (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) || (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) ) @@ -1093,7 +1150,8 @@ GLboolean checkop3(r700_AssemblerBase* pAsm) { bSrcConst[0] = GL_FALSE; } - if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) || + if( (pILInst->SrcReg[1].File == PROGRAM_UNIFORM) || + (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) || (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) || (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) || (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) ) @@ -1104,7 +1162,8 @@ GLboolean checkop3(r700_AssemblerBase* pAsm) { bSrcConst[1] = GL_FALSE; } - if( (pILInst->SrcReg[2].File == PROGRAM_CONSTANT) || + if( (pILInst->SrcReg[2].File == PROGRAM_UNIFORM) || + (pILInst->SrcReg[2].File == PROGRAM_CONSTANT) || (pILInst->SrcReg[2].File == PROGRAM_LOCAL_PARAM) || (pILInst->SrcReg[2].File == PROGRAM_ENV_PARAM) || (pILInst->SrcReg[2].File == PROGRAM_STATE_VAR) ) @@ -1215,10 +1274,18 @@ GLboolean assemble_src(r700_AssemblerBase *pAsm, } pAsm->S[fld].src.rtype = SRC_REG_CONSTANT; - pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index; + if(pILInst->SrcReg[src].Index < 0) + { + WARN_ONCE("Negative register offsets not supported yet!\n"); + pAsm->S[fld].src.reg = 0; + } + else + { + pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index; + } break; case PROGRAM_INPUT: - setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE); + setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE); pAsm->S[fld].src.rtype = SRC_REG_INPUT; switch (pAsm->currentShaderType) { @@ -1287,6 +1354,15 @@ GLboolean assemble_dst(r700_AssemblerBase *pAsm) pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1; pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1; + if(pILInst->SaturateMode == SATURATE_ZERO_ONE) + { + pAsm->D2.dst2.SaturateMode = 1; + } + else + { + pAsm->D2.dst2.SaturateMode = 0; + } + return GL_TRUE; } @@ -1346,6 +1422,7 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) else { switch (pILInst->SrcReg[0].File) { + case PROGRAM_UNIFORM: case PROGRAM_CONSTANT: case PROGRAM_LOCAL_PARAM: case PROGRAM_ENV_PARAM: @@ -1358,46 +1435,65 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; break; case PROGRAM_INPUT: - switch (pILInst->SrcReg[0].Index) + if(SPT_VP == pAsm->currentShaderType) + { + switch (pILInst->SrcReg[0].Index) + { + case VERT_ATTRIB_TEX0: + case VERT_ATTRIB_TEX1: + case VERT_ATTRIB_TEX2: + case VERT_ATTRIB_TEX3: + case VERT_ATTRIB_TEX4: + case VERT_ATTRIB_TEX5: + case VERT_ATTRIB_TEX6: + case VERT_ATTRIB_TEX7: + bValidTexCoord = GL_TRUE; + pAsm->S[0].src.reg = + pAsm->ucVP_AttributeMap[pILInst->SrcReg[0].Index]; + pAsm->S[0].src.rtype = SRC_REG_INPUT; + break; + } + } + else { - case FRAG_ATTRIB_WPOS: - case FRAG_ATTRIB_COL0: - case FRAG_ATTRIB_COL1: - case FRAG_ATTRIB_FOGC: - case FRAG_ATTRIB_TEX0: - case FRAG_ATTRIB_TEX1: - case FRAG_ATTRIB_TEX2: - case FRAG_ATTRIB_TEX3: - case FRAG_ATTRIB_TEX4: - case FRAG_ATTRIB_TEX5: - case FRAG_ATTRIB_TEX6: - case FRAG_ATTRIB_TEX7: - bValidTexCoord = GL_TRUE; + switch (pILInst->SrcReg[0].Index) + { + case FRAG_ATTRIB_WPOS: + case FRAG_ATTRIB_COL0: + case FRAG_ATTRIB_COL1: + case FRAG_ATTRIB_FOGC: + case FRAG_ATTRIB_TEX0: + case FRAG_ATTRIB_TEX1: + case FRAG_ATTRIB_TEX2: + case FRAG_ATTRIB_TEX3: + case FRAG_ATTRIB_TEX4: + case FRAG_ATTRIB_TEX5: + case FRAG_ATTRIB_TEX6: + case FRAG_ATTRIB_TEX7: + bValidTexCoord = GL_TRUE; + pAsm->S[0].src.reg = + pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; + pAsm->S[0].src.rtype = SRC_REG_INPUT; + break; + case FRAG_ATTRIB_FACE: + fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n"); + break; + case FRAG_ATTRIB_PNTC: + fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n"); + break; + } + + if( (pILInst->SrcReg[0].Index >= FRAG_ATTRIB_VAR0) || + (pILInst->SrcReg[0].Index < FRAG_ATTRIB_MAX) ) + { + bValidTexCoord = GL_TRUE; pAsm->S[0].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; pAsm->S[0].src.rtype = SRC_REG_INPUT; - break; - case FRAG_ATTRIB_FACE: - fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n"); - break; - case FRAG_ATTRIB_PNTC: - fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n"); - break; - case FRAG_ATTRIB_VAR0: - fprintf(stderr, "FRAG_ATTRIB_VAR0 unsupported\n"); - break; + } } - if( (pILInst->SrcReg[0].Index >= FRAG_ATTRIB_VAR0) || - (pILInst->SrcReg[0].Index < FRAG_ATTRIB_MAX) ) - { - bValidTexCoord = GL_TRUE; - pAsm->S[0].src.reg = - pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; - pAsm->S[0].src.rtype = SRC_REG_INPUT; - } - - break; + break; } } @@ -1442,8 +1538,17 @@ GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalize tex_instruction_ptr->m_Word0.f.tex_inst = pAsm->D.dst.opcode; tex_instruction_ptr->m_Word0.f.bc_frac_mode = 0x0; tex_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0; + tex_instruction_ptr->m_Word0.f.alt_const = 0; - tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg; + if(SPT_VP == pAsm->currentShaderType) + { + tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg + VERT_ATTRIB_MAX; + pAsm->unVetTexBits |= 1 << texture_unit_source->reg; + } + else + { + tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg; + } tex_instruction_ptr->m_Word1.f.lod_bias = 0x0; if (normalized) { @@ -1462,7 +1567,6 @@ GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalize tex_instruction_ptr->m_Word2.f.offset_x = 0x0; tex_instruction_ptr->m_Word2.f.offset_y = 0x0; tex_instruction_ptr->m_Word2.f.offset_z = 0x0; - tex_instruction_ptr->m_Word2.f.sampler_id = texture_unit_source->reg; // dst @@ -1721,7 +1825,7 @@ GLboolean add_alu_instruction(r700_AssemblerBase* pAsm, } else { - pAsm->cf_current_alu_clause_ptr->m_Word1.f.count++; + pAsm->cf_current_alu_clause_ptr->m_Word1.f.count += (GetInstructionSize(alu_instruction_ptr->m_ShaderInstType) / 2); } // If this clause constains any instruction that is forward dependent on a TEX instruction, @@ -1998,7 +2102,7 @@ GLboolean check_scalar(r700_AssemblerBase* pAsm, GLuint swizzle_key; - GLuint number_of_operands = r700GetNumOperands(pAsm); + GLuint number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3); for (src=0; srcD.dst.opcode, pAsm->D.dst.op3); for (src=0; srcD.dst.opcode, pAsm->D.dst.op3); //GLuint channel_swizzle, j; //GLuint chan_counter[4] = {0, 0, 0, 0}; //PVSSRC * pSource[3]; @@ -2221,23 +2329,44 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) contiguous_slots_needed = 0; - if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) ) + if(!is_single_scalar_operation) { contiguous_slots_needed = 4; } + contiguous_slots_needed += pAsm->D2.dst2.literal_slots; + initialize(pAsm); for (scalar_channel_index=0; scalar_channel_index < number_of_scalar_operations; scalar_channel_index++) { - R700ALUInstruction* alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); - if (alu_instruction_ptr == NULL) - { - return GL_FALSE; - } - Init_R700ALUInstruction(alu_instruction_ptr); + if(scalar_channel_index == (number_of_scalar_operations-1)) + { + switch(pAsm->D2.dst2.literal_slots) + { + case 0: + alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); + Init_R700ALUInstruction(alu_instruction_ptr); + break; + case 1: + alu_instruction_ptr_hl = (R700ALUInstructionHalfLiteral*) CALLOC_STRUCT(R700ALUInstructionHalfLiteral); + Init_R700ALUInstructionHalfLiteral(alu_instruction_ptr_hl, pAsm->C[0].f, pAsm->C[1].f); + alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_hl; + break; + case 2: + alu_instruction_ptr_fl = (R700ALUInstructionFullLiteral*) CALLOC_STRUCT(R700ALUInstructionFullLiteral); + Init_R700ALUInstructionFullLiteral(alu_instruction_ptr_fl,pAsm->C[0].f, pAsm->C[1].f, pAsm->C[2].f, pAsm->C[3].f); + alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_fl; + break; + }; + } + else + { + alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); + Init_R700ALUInstruction(alu_instruction_ptr); + } //src 0 current_source_index = 0; @@ -2247,7 +2376,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) current_source_index, pcurrent_source, scalar_channel_index) ) - { + { return GL_FALSE; } @@ -2261,13 +2390,13 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) current_source_index, pcurrent_source, scalar_channel_index) ) - { + { return GL_FALSE; } } //other bits - alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_AR_X; + alu_instruction_ptr->m_Word0.f.index_mode = pAsm->D2.dst2.index_mode; if( (is_single_scalar_operation == GL_TRUE) || (GL_TRUE == bSplitInst) ) @@ -2279,9 +2408,17 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ? 1 : 0; } - alu_instruction_ptr->m_Word0.f.pred_sel = 0x0; - alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0; - alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0; + alu_instruction_ptr->m_Word0.f.pred_sel = (pAsm->D.dst.pred_inv > 0) ? 1 : 0; + if(1 == pAsm->D.dst.predicated) + { + alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x1; + alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1; + } + else + { + alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0; + alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0; + } // dst if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) || @@ -2290,7 +2427,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg; } else - { + { radeon_error("Only temp destination registers supported for ALU dest regs.\n"); return GL_FALSE; } @@ -2320,7 +2457,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index; - alu_instruction_ptr->m_Word1.f.clamp = pAsm->pILInst[pAsm->uiCurInst].SaturateMode; + alu_instruction_ptr->m_Word1.f.clamp = pAsm->D2.dst2.SaturateMode; if (pAsm->D.dst.op3) { @@ -2347,8 +2484,8 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) { alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode; - alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0; - alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0; + alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = pAsm->S[0].src.abs; + alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = pAsm->S[1].src.abs; //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0; //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0; @@ -2376,8 +2513,8 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) { alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode; - alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0; - alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0; + alu_instruction_ptr->m_Word1_OP2.f.src0_abs = pAsm->S[0].src.abs; + alu_instruction_ptr->m_Word1_OP2.f.src1_abs = pAsm->S[1].src.abs; //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0; //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0; @@ -2404,7 +2541,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) } if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) ) - { + { return GL_FALSE; } @@ -2415,682 +2552,120 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) if (is_single_scalar_operation) { if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) ) - { + { return GL_FALSE; } } else { if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) ) - { - return 1; + { + return GL_FALSE; } } - contiguous_slots_needed = 0; + contiguous_slots_needed -= 1; } return GL_TRUE; } -GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm) +GLboolean next_ins(r700_AssemblerBase *pAsm) { - GLuint number_of_scalar_operations; - GLboolean is_single_scalar_operation; - GLuint scalar_channel_index; - - PVSSRC * pcurrent_source; - int current_source_index; - GLuint contiguous_slots_needed; - - GLuint uNumSrc = r700GetNumOperands(pAsm); - - GLboolean bSplitInst = GL_FALSE; + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); - if (1 == pAsm->D.dst.math) + if( GL_TRUE == pAsm->is_tex ) { - is_single_scalar_operation = GL_TRUE; - number_of_scalar_operations = 1; + if (pILInst->TexSrcTarget == TEXTURE_RECT_INDEX) { + if( GL_FALSE == assemble_tex_instruction(pAsm, GL_FALSE) ) + { + radeon_error("Error assembling TEX instruction\n"); + return GL_FALSE; + } + } else { + if( GL_FALSE == assemble_tex_instruction(pAsm, GL_TRUE) ) + { + radeon_error("Error assembling TEX instruction\n"); + return GL_FALSE; + } + } } else + { //ALU + if( GL_FALSE == assemble_alu_instruction(pAsm) ) + { + radeon_error("Error assembling ALU instruction\n"); + return GL_FALSE; + } + } + + if(pAsm->D.dst.rtype == DST_REG_OUT) { - is_single_scalar_operation = GL_FALSE; - number_of_scalar_operations = 4; + if(pAsm->D.dst.op3) + { + // There is no mask for OP3 instructions, so all channels are written + pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] = 0xF; + } + else + { + pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] + |= (unsigned char)pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask; + } } + + //reset for next inst. + pAsm->D.bits = 0; + pAsm->D2.bits = 0; + pAsm->S[0].bits = 0; + pAsm->S[1].bits = 0; + pAsm->S[2].bits = 0; + pAsm->is_tex = GL_FALSE; + pAsm->need_tex_barrier = GL_FALSE; + pAsm->D2.bits = 0; + pAsm->C[0].bits = pAsm->C[1].bits = pAsm->C[2].bits = pAsm->C[3].bits = 0; + return GL_TRUE; +} - contiguous_slots_needed = 0; +GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode) +{ + BITS tmp; + + checkop1(pAsm); + + tmp = gethelpr(pAsm); + + // opcode tmp.x, a.x + // MOV dst, tmp.x + + pAsm->D.dst.opcode = opcode; + pAsm->D.dst.math = 1; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; - if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) ) + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) { - contiguous_slots_needed = 4; + return GL_FALSE; } - initialize(pAsm); + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } - for (scalar_channel_index=0; - scalar_channel_index < number_of_scalar_operations; - scalar_channel_index++) + // Now replicate result to all necessary channels in destination + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if( GL_FALSE == assemble_dst(pAsm) ) { - R700ALUInstruction* alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); - if (alu_instruction_ptr == NULL) - { - return GL_FALSE; - } - Init_R700ALUInstruction(alu_instruction_ptr); - - //src 0 - current_source_index = 0; - pcurrent_source = &(pAsm->S[0].src); + return GL_FALSE; + } - if (GL_FALSE == assemble_alu_src(alu_instruction_ptr, - current_source_index, - pcurrent_source, - scalar_channel_index) ) - { - return GL_FALSE; - } - - if (uNumSrc > 1) - { - // Process source 1 - current_source_index = 1; - pcurrent_source = &(pAsm->S[current_source_index].src); - - if (GL_FALSE == assemble_alu_src(alu_instruction_ptr, - current_source_index, - pcurrent_source, - scalar_channel_index) ) - { - return GL_FALSE; - } - } - - //other bits - alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_LOOP; - - if( (is_single_scalar_operation == GL_TRUE) - || (GL_TRUE == bSplitInst) ) - { - alu_instruction_ptr->m_Word0.f.last = 1; - } - else - { - alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ? 1 : 0; - } - - alu_instruction_ptr->m_Word0.f.pred_sel = (pAsm->D.dst.pred_inv > 0) ? 1 : 0; - if(1 == pAsm->D.dst.predicated) - { - alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x1; - alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1; - } - else - { - alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0; - alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0; - } - - // dst - if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) || - (pAsm->D.dst.rtype == DST_REG_OUT) ) - { - alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg; - } - else - { - radeon_error("Only temp destination registers supported for ALU dest regs.\n"); - return GL_FALSE; - } - - alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; //D.rtype - - if ( is_single_scalar_operation == GL_TRUE ) - { - // Override scalar_channel_index since only one scalar value will be written - if(pAsm->D.dst.writex) - { - scalar_channel_index = 0; - } - else if(pAsm->D.dst.writey) - { - scalar_channel_index = 1; - } - else if(pAsm->D.dst.writez) - { - scalar_channel_index = 2; - } - else if(pAsm->D.dst.writew) - { - scalar_channel_index = 3; - } - } - - alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index; - - alu_instruction_ptr->m_Word1.f.clamp = pAsm->D2.dst2.SaturateMode; - - if (pAsm->D.dst.op3) - { - //op3 - - alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode; - - //There's 3rd src for op3 - current_source_index = 2; - pcurrent_source = &(pAsm->S[current_source_index].src); - - if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr, - current_source_index, - pcurrent_source, - scalar_channel_index) ) - { - return GL_FALSE; - } - } - else - { - //op2 - if (pAsm->bR6xx) - { - alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode; - - alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0; - alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0; - - //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0; - //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0; - switch (scalar_channel_index) - { - case 0: - alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writex; - break; - case 1: - alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writey; - break; - case 2: - alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writez; - break; - case 3: - alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writew; - break; - default: - alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1; //SQ_SEL_MASK; - break; - } - alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF; - } - else - { - alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode; - - alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0; - alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0; - - //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0; - //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0; - switch (scalar_channel_index) - { - case 0: - alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writex; - break; - case 1: - alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writey; - break; - case 2: - alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writez; - break; - case 3: - alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writew; - break; - default: - alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1; //SQ_SEL_MASK; - break; - } - alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF; - } - } - - if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) ) - { - return GL_FALSE; - } - - /* - * Judge the type of current instruction, is it vector or scalar - * instruction. - */ - if (is_single_scalar_operation) - { - if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) ) - { - return GL_FALSE; - } - } - else - { - if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) ) - { - return 1; - } - } - - contiguous_slots_needed = 0; - } - - return GL_TRUE; -} - -GLboolean assemble_alu_instruction_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral) -{ - R700ALUInstruction * alu_instruction_ptr; - R700ALUInstructionHalfLiteral * alu_instruction_ptr_hl; - R700ALUInstructionFullLiteral * alu_instruction_ptr_fl; - - GLuint number_of_scalar_operations; - GLboolean is_single_scalar_operation; - GLuint scalar_channel_index; - - GLuint contiguous_slots_needed; - GLuint lastInstruction; - GLuint not_masked[4]; - - GLuint uNumSrc = r700GetNumOperands(pAsm); - - GLboolean bSplitInst = GL_FALSE; - - number_of_scalar_operations = 0; - contiguous_slots_needed = 0; - - if(1 == pAsm->D.dst.writew) - { - lastInstruction = 3; - number_of_scalar_operations++; - not_masked[3] = 1; - } - else - { - not_masked[3] = 0; - } - if(1 == pAsm->D.dst.writez) - { - lastInstruction = 2; - number_of_scalar_operations++; - not_masked[2] = 1; - } - else - { - not_masked[2] = 0; - } - if(1 == pAsm->D.dst.writey) - { - lastInstruction = 1; - number_of_scalar_operations++; - not_masked[1] = 1; - } - else - { - not_masked[1] = 0; - } - if(1 == pAsm->D.dst.writex) - { - lastInstruction = 0; - number_of_scalar_operations++; - not_masked[0] = 1; - } - else - { - not_masked[0] = 0; - } - - if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) ) - { - contiguous_slots_needed = 4; - } - else - { - contiguous_slots_needed = number_of_scalar_operations; - } - - if(1 == pAsm->D2.dst2.literal) - { - contiguous_slots_needed += 1; - } - else if(2 == pAsm->D2.dst2.literal) - { - contiguous_slots_needed += 2; - } - - initialize(pAsm); - - for (scalar_channel_index=0; scalar_channel_index < 4; scalar_channel_index++) - { - if(0 == not_masked[scalar_channel_index]) - { - continue; - } - - if(scalar_channel_index == lastInstruction) - { - switch (pAsm->D2.dst2.literal) - { - case 0: - alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); - if (alu_instruction_ptr == NULL) - { - return GL_FALSE; - } - Init_R700ALUInstruction(alu_instruction_ptr); - break; - case 1: - alu_instruction_ptr_hl = (R700ALUInstructionHalfLiteral*) CALLOC_STRUCT(R700ALUInstructionHalfLiteral); - if (alu_instruction_ptr_hl == NULL) - { - return GL_FALSE; - } - Init_R700ALUInstructionHalfLiteral(alu_instruction_ptr_hl, pLiteral[0], pLiteral[1]); - alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_hl; - break; - case 2: - alu_instruction_ptr_fl = (R700ALUInstructionFullLiteral*) CALLOC_STRUCT(R700ALUInstructionFullLiteral); - if (alu_instruction_ptr_fl == NULL) - { - return GL_FALSE; - } - Init_R700ALUInstructionFullLiteral(alu_instruction_ptr_fl, pLiteral[0], pLiteral[1], pLiteral[2], pLiteral[3]); - alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_fl; - break; - default: - break; - }; - } - else - { - alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); - if (alu_instruction_ptr == NULL) - { - return GL_FALSE; - } - Init_R700ALUInstruction(alu_instruction_ptr); - } - - //src 0 - if (GL_FALSE == assemble_alu_src(alu_instruction_ptr, - 0, - &(pAsm->S[0].src), - scalar_channel_index) ) - { - return GL_FALSE; - } - - if (uNumSrc > 1) - { - // Process source 1 - if (GL_FALSE == assemble_alu_src(alu_instruction_ptr, - 1, - &(pAsm->S[1].src), - scalar_channel_index) ) - { - return GL_FALSE; - } - } - - //other bits - alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_LOOP; - - if(scalar_channel_index == lastInstruction) - { - alu_instruction_ptr->m_Word0.f.last = 1; - } - - alu_instruction_ptr->m_Word0.f.pred_sel = 0x0; - if(1 == pAsm->D.dst.predicated) - { - alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x1; - alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1; - } - else - { - alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0; - alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0; - } - - // dst - if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) || - (pAsm->D.dst.rtype == DST_REG_OUT) ) - { - alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg; - } - else - { - radeon_error("Only temp destination registers supported for ALU dest regs.\n"); - return GL_FALSE; - } - - alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; //D.rtype - - alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index; - - alu_instruction_ptr->m_Word1.f.clamp = pAsm->D2.dst2.SaturateMode; - - if (pAsm->D.dst.op3) - { - //op3 - alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode; - - //There's 3rd src for op3 - if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr, - 2, - &(pAsm->S[2].src), - scalar_channel_index) ) - { - return GL_FALSE; - } - } - else - { - //op2 - if (pAsm->bR6xx) - { - alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode; - alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0; - alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0; - alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1; - alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF; - } - else - { - alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode; - alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0; - alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0; - alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1; - alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF; - } - } - - if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) ) - { - return GL_FALSE; - } - - if (1 == number_of_scalar_operations) - { - if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) ) - { - return GL_FALSE; - } - } - else - { - if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) ) - { - return GL_FALSE; - } - } - - contiguous_slots_needed -= 2; - } - - return GL_TRUE; -} - -GLboolean next_ins(r700_AssemblerBase *pAsm) -{ - struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); - - if( GL_TRUE == pAsm->is_tex ) - { - if (pILInst->TexSrcTarget == TEXTURE_RECT_INDEX) { - if( GL_FALSE == assemble_tex_instruction(pAsm, GL_FALSE) ) - { - radeon_error("Error assembling TEX instruction\n"); - return GL_FALSE; - } - } else { - if( GL_FALSE == assemble_tex_instruction(pAsm, GL_TRUE) ) - { - radeon_error("Error assembling TEX instruction\n"); - return GL_FALSE; - } - } - } - else - { //ALU - if( GL_FALSE == assemble_alu_instruction(pAsm) ) - { - radeon_error("Error assembling ALU instruction\n"); - return GL_FALSE; - } - } - - if(pAsm->D.dst.rtype == DST_REG_OUT) - { - if(pAsm->D.dst.op3) - { - // There is no mask for OP3 instructions, so all channels are written - pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] = 0xF; - } - else - { - pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] - |= (unsigned char)pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask; - } - } - - //reset for next inst. - pAsm->D.bits = 0; - pAsm->D2.bits = 0; - pAsm->S[0].bits = 0; - pAsm->S[1].bits = 0; - pAsm->S[2].bits = 0; - pAsm->is_tex = GL_FALSE; - pAsm->need_tex_barrier = GL_FALSE; - - return GL_TRUE; -} - -GLboolean next_ins2(r700_AssemblerBase *pAsm) -{ - struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); - - //ALU - if( GL_FALSE == assemble_alu_instruction2(pAsm) ) - { - radeon_error("Error assembling ALU instruction\n"); - return GL_FALSE; - } - - if(pAsm->D.dst.rtype == DST_REG_OUT) - { - if(pAsm->D.dst.op3) - { - // There is no mask for OP3 instructions, so all channels are written - pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] = 0xF; - } - else - { - pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] - |= (unsigned char)pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask; - } - } - - //reset for next inst. - pAsm->D.bits = 0; - pAsm->D2.bits = 0; - pAsm->S[0].bits = 0; - pAsm->S[1].bits = 0; - pAsm->S[2].bits = 0; - pAsm->is_tex = GL_FALSE; - pAsm->need_tex_barrier = GL_FALSE; - - //richard nov.16 glsl - pAsm->D2.bits = 0; - - return GL_TRUE; -} - -/* not work yet */ -GLboolean next_ins_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral) -{ - struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); - - //ALU - if( GL_FALSE == assemble_alu_instruction_literal(pAsm, pLiteral) ) - { - radeon_error("Error assembling ALU instruction\n"); - return GL_FALSE; - } - - //reset for next inst. - pAsm->D.bits = 0; - pAsm->D2.bits = 0; - pAsm->S[0].bits = 0; - pAsm->S[1].bits = 0; - pAsm->S[2].bits = 0; - pAsm->is_tex = GL_FALSE; - pAsm->need_tex_barrier = GL_FALSE; - return GL_TRUE; -} - -GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode) -{ - BITS tmp; - - checkop1(pAsm); - - tmp = gethelpr(pAsm); - - // opcode tmp.x, a.x - // MOV dst, tmp.x - - pAsm->D.dst.opcode = opcode; - pAsm->D.dst.math = 1; - - setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); - pAsm->D.dst.rtype = DST_REG_TEMPORARY; - pAsm->D.dst.reg = tmp; - pAsm->D.dst.writex = 1; - - if( GL_FALSE == assemble_src(pAsm, 0, -1) ) - { - return GL_FALSE; - } - - if ( GL_FALSE == next_ins(pAsm) ) - { - return GL_FALSE; - } - - // Now replicate result to all necessary channels in destination - pAsm->D.dst.opcode = SQ_OP2_INST_MOV; - - if( GL_FALSE == assemble_dst(pAsm) ) - { - return GL_FALSE; - } - - setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - pAsm->S[0].src.rtype = DST_REG_TEMPORARY; - pAsm->S[0].src.reg = tmp; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); noneg_PVSSRC(&(pAsm->S[0].src)); @@ -3280,9 +2855,44 @@ GLboolean assemble_CMP(r700_AssemblerBase *pAsm) return GL_TRUE; } -GLboolean assemble_COS(r700_AssemblerBase *pAsm) +GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode) { - return assemble_math_function(pAsm, SQ_OP2_INST_COS); + int tmp; + checkop1(pAsm); + + tmp = gethelpr(pAsm); + + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + + assemble_src(pAsm, 0, -1); + + pAsm->S[1].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + pAsm->D2.dst2.literal_slots = 1; + pAsm->C[0].f = 1/(3.1415926535 * 2); + pAsm->C[1].f = 0.0F; + next_ins(pAsm); + + pAsm->D.dst.opcode = opcode; + pAsm->D.dst.math = 1; + + assemble_dst(pAsm); + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + next_ins(pAsm); + + //TODO - replicate if more channels set in WriteMask + return GL_TRUE; + } GLboolean assemble_DOT(r700_AssemblerBase *pAsm) @@ -3550,13 +3160,15 @@ GLboolean assemble_FRC(r700_AssemblerBase *pAsm) return GL_TRUE; } -GLboolean assemble_KIL(r700_AssemblerBase *pAsm) -{ - /* TODO: doc says KILL has to be last(end) ALU clause */ - - checkop1(pAsm); +GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode) +{ + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); - pAsm->D.dst.opcode = SQ_OP2_INST_KILLGT; + if(pILInst->Opcode == OPCODE_KIL) + checkop1(pAsm); + + pAsm->D.dst.opcode = opcode; + //pAsm->D.dst.math = 1; setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); pAsm->D.dst.rtype = DST_REG_TEMPORARY; @@ -3569,21 +3181,34 @@ GLboolean assemble_KIL(r700_AssemblerBase *pAsm) setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; pAsm->S[0].src.reg = 0; - setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0); noneg_PVSSRC(&(pAsm->S[0].src)); - if ( GL_FALSE == assemble_src(pAsm, 0, 1) ) + if(pILInst->Opcode == OPCODE_KIL_NV) { - return GL_FALSE; + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = 0; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_1); + neg_PVSSRC(&(pAsm->S[1].src)); } - + else + { + if( GL_FALSE == assemble_src(pAsm, 0, 1) ) + { + return GL_FALSE; + } + + } + if ( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; } + /* Doc says KILL has to be last(end) ALU clause */ pAsm->pR700Shader->killIsUsed = GL_TRUE; + pAsm->alu_x_opcode = SQ_CF_INST_ALU; return GL_TRUE; } @@ -3647,6 +3272,7 @@ GLboolean assemble_LRP(r700_AssemblerBase *pAsm) { return GL_FALSE; } + if( GL_FALSE == assemble_src(pAsm, 2, -1) ) { return GL_FALSE; @@ -4382,77 +4008,67 @@ GLboolean assemble_RSQ(r700_AssemblerBase *pAsm) return assemble_math_function(pAsm, SQ_OP2_INST_RECIPSQRT_IEEE); } -GLboolean assemble_SIN(r700_AssemblerBase *pAsm) -{ - return assemble_math_function(pAsm, SQ_OP2_INST_SIN); -} - GLboolean assemble_SCS(r700_AssemblerBase *pAsm) { BITS tmp; - checkop1(pAsm); - - tmp = gethelpr(pAsm); - - // COS tmp.x, a.x - pAsm->D.dst.opcode = SQ_OP2_INST_COS; - pAsm->D.dst.math = 1; + checkop1(pAsm); - setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); - pAsm->D.dst.rtype = DST_REG_TEMPORARY; - pAsm->D.dst.reg = tmp; - pAsm->D.dst.writex = 1; + tmp = gethelpr(pAsm); + /* tmp.x = src /2*PI */ + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; - if( GL_FALSE == assemble_src(pAsm, 0, -1) ) - { - return GL_FALSE; - } + assemble_src(pAsm, 0, -1); - if ( GL_FALSE == next_ins(pAsm) ) - { - return GL_FALSE; - } + pAsm->S[1].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + pAsm->D2.dst2.literal_slots = 1; + pAsm->C[0].f = 1/(3.1415926535 * 2); + pAsm->C[1].f = 0.0F; - // SIN tmp.y, a.x - pAsm->D.dst.opcode = SQ_OP2_INST_SIN; - pAsm->D.dst.math = 1; + next_ins(pAsm); - setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); - pAsm->D.dst.rtype = DST_REG_TEMPORARY; - pAsm->D.dst.reg = tmp; - pAsm->D.dst.writey = 1; + // COS dst.x, a.x + pAsm->D.dst.opcode = SQ_OP2_INST_COS; + pAsm->D.dst.math = 1; - if( GL_FALSE == assemble_src(pAsm, 0, -1) ) - { - return GL_FALSE; - } + assemble_dst(pAsm); + /* mask y */ + pAsm->D.dst.writey = 0; - if( GL_FALSE == next_ins(pAsm) ) - { - return GL_FALSE; - } + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); - // MOV dst.mask, tmp - pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } - if( GL_FALSE == assemble_dst(pAsm) ) - { - return GL_FALSE; - } + // SIN dst.y, a.x + pAsm->D.dst.opcode = SQ_OP2_INST_SIN; + pAsm->D.dst.math = 1; - setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - pAsm->S[0].src.rtype = DST_REG_TEMPORARY; - pAsm->S[0].src.reg = tmp; + assemble_dst(pAsm); + /* mask x */ + pAsm->D.dst.writex = 0; - noswizzle_PVSSRC(&(pAsm->S[0].src)); - pAsm->S[0].src.swizzlez = SQ_SEL_0; - pAsm->S[0].src.swizzlew = SQ_SEL_0; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); - if ( GL_FALSE == next_ins(pAsm) ) - { - return GL_FALSE; - } + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } return GL_TRUE; } @@ -4465,7 +4081,7 @@ GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode) } pAsm->D.dst.opcode = opcode; - pAsm->D.dst.math = 1; + //pAsm->D.dst.math = 1; if( GL_FALSE == assemble_dst(pAsm) ) { @@ -4492,32 +4108,34 @@ GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode) GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode) { - if( GL_FALSE == checkop2(pAsm) ) - { - return GL_FALSE; - } + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); pAsm->D.dst.opcode = opcode; pAsm->D.dst.math = 1; pAsm->D.dst.predicated = 1; - pAsm->D2.dst2.SaturateMode = pAsm->pILInst[pAsm->uiCurInst].SaturateMode; - if( GL_FALSE == assemble_dst(pAsm) ) - { - return GL_FALSE; - } + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = pAsm->uHelpReg; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0; - if( GL_FALSE == assemble_src(pAsm, 0, -1) ) - { - return GL_FALSE; - } + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = pAsm->last_cond_register + pAsm->starting_temp_register_number; + pAsm->S[0].src.swizzlex = pILInst->DstReg.CondSwizzle & 0x7; + noneg_PVSSRC(&(pAsm->S[0].src)); - if( GL_FALSE == assemble_src(pAsm, 1, -1) ) - { - return GL_FALSE; - } + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = pAsm->uHelpReg; + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[1].src)); + pAsm->S[1].src.swizzlex = SQ_SEL_0; + pAsm->S[1].src.swizzley = SQ_SEL_0; + pAsm->S[1].src.swizzlez = SQ_SEL_0; + pAsm->S[1].src.swizzlew = SQ_SEL_0; - if( GL_FALSE == next_ins2(pAsm) ) + if( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; } @@ -4603,6 +4221,7 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) switch (pAsm->pILInst[pAsm->uiCurInst].SrcReg[0].File) { + case PROGRAM_UNIFORM: case PROGRAM_CONSTANT: case PROGRAM_LOCAL_PARAM: case PROGRAM_ENV_PARAM: @@ -4623,22 +4242,6 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) need_barrier = GL_TRUE; } - switch (pAsm->pILInst[pAsm->uiCurInst].Opcode) - { - case OPCODE_TEX: - break; - case OPCODE_TXB: - radeon_error("do not support TXB yet\n"); - return GL_FALSE; - break; - case OPCODE_TXP: - break; - default: - radeon_error("Internal error: bad texture op (not TEX)\n"); - return GL_FALSE; - break; - } - if (pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP) { GLuint tmp = gethelpr(pAsm); @@ -4703,37 +4306,19 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) return GL_FALSE; } - if( GL_FALSE == assemble_src(pAsm, 0, 1) ) - { - return GL_FALSE; - } - - swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y); - swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_X, SQ_SEL_Z, SQ_SEL_Z); - - if( GL_FALSE == next_ins(pAsm) ) - { - return GL_FALSE; - } - - /* tmp1.z = ABS(tmp1.z) dont have abs support in assembler currently - * have to do explicit instruction - */ - pAsm->D.dst.opcode = SQ_OP2_INST_MAX; - setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); - pAsm->D.dst.rtype = DST_REG_TEMPORARY; - pAsm->D.dst.reg = tmp1; - pAsm->D.dst.writez = 1; - - setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; - pAsm->S[0].src.reg = tmp1; - noswizzle_PVSSRC(&(pAsm->S[0].src)); - pAsm->S[1].bits = pAsm->S[0].bits; - flipneg_PVSSRC(&(pAsm->S[1].src)); - - next_ins(pAsm); + if( GL_FALSE == assemble_src(pAsm, 0, 1) ) + { + return GL_FALSE; + } + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y); + swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_X, SQ_SEL_Z, SQ_SEL_Z); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + /* tmp1.z = RCP_e(|tmp1.z|) */ pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE; pAsm->D.dst.math = 1; @@ -4746,13 +4331,13 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; pAsm->S[0].src.reg = tmp1; pAsm->S[0].src.swizzlex = SQ_SEL_Z; + pAsm->S[0].src.abs = 1; next_ins(pAsm); /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x * muladd has no writemask, have to use another temp - * also no support for imm constants, so add 1 here */ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; pAsm->D.dst.op3 = 1; @@ -4769,30 +4354,12 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) pAsm->S[1].src.reg = tmp1; setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z); setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE); - pAsm->S[2].src.rtype = SRC_REG_TEMPORARY; + /* immediate c 1.5 */ + pAsm->D2.dst2.literal_slots = 1; + pAsm->C[0].f = 1.5F; + pAsm->S[2].src.rtype = SRC_REC_LITERAL; pAsm->S[2].src.reg = tmp1; - setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_1); - - next_ins(pAsm); - - /* ADD the remaining .5 */ - pAsm->D.dst.opcode = SQ_OP2_INST_ADD; - setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); - pAsm->D.dst.rtype = DST_REG_TEMPORARY; - pAsm->D.dst.reg = tmp2; - pAsm->D.dst.writex = 1; - pAsm->D.dst.writey = 1; - pAsm->D.dst.writez = 0; - pAsm->D.dst.writew = 0; - - setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; - pAsm->S[0].src.reg = tmp2; - noswizzle_PVSSRC(&(pAsm->S[0].src)); - setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); - pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; - pAsm->S[1].src.reg = 252; // SQ_ALU_SRC_0_5 - noswizzle_PVSSRC(&(pAsm->S[1].src)); + setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X); next_ins(pAsm); @@ -4817,14 +4384,35 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) } - pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; + switch(pAsm->pILInst[pAsm->uiCurInst].Opcode) + { + case OPCODE_DDX: + /* will these need WQM(1) on CF inst ? */ + pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_H; + break; + case OPCODE_DDY: + pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_V; + break; + case OPCODE_TXB: + pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_L; + break; + default: + if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1) + pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_C; + else + pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; + } + + pAsm->is_tex = GL_TRUE; + if ( GL_TRUE == need_barrier ) + pAsm->is_tex = GL_TRUE; if ( GL_TRUE == need_barrier ) { pAsm->need_tex_barrier = GL_TRUE; } // Set src1 to tex unit id - pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit; + pAsm->S[1].src.reg = pAsm->SamplerUnits[pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit]; pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; //No sw info from mesa compiler, so hard code here. @@ -4858,11 +4446,46 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) pAsm->S[0].src.swizzlew = SQ_SEL_Y; } + if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1) + { + /* compare value goes to w chan ? */ + pAsm->S[0].src.swizzlew = SQ_SEL_Z; + } + if ( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; } + /* add ARB shadow ambient but clamp to 0..1 */ + if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1) + { + /* ADD_SAT dst, dst, ambient[texunit] */ + pAsm->D.dst.opcode = SQ_OP2_INST_ADD; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + pAsm->D2.dst2.SaturateMode = 1; + + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = pAsm->D.dst.reg; + noswizzle_PVSSRC(&(pAsm->S[0].src)); + noneg_PVSSRC(&(pAsm->S[0].src)); + + pAsm->S[1].src.rtype = SRC_REG_CONSTANT; + pAsm->S[1].src.reg = pAsm->shadow_regs[pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit]; + noswizzle_PVSSRC(&(pAsm->S[1].src)); + noneg_PVSSRC(&(pAsm->S[1].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + } + return GL_TRUE; } @@ -4981,19 +4604,75 @@ GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm) return GL_TRUE; } -inline void checkStackDepth(r700_AssemblerBase *pAsm, GLuint uReason) +static inline void decreaseCurrent(r700_AssemblerBase *pAsm, GLuint uReason) +{ + switch (uReason) + { + case FC_PUSH_VPM: + pAsm->CALLSTACK[pAsm->CALLSP].current--; + break; + case FC_PUSH_WQM: + pAsm->CALLSTACK[pAsm->CALLSP].current -= 4; + break; + case FC_LOOP: + pAsm->CALLSTACK[pAsm->CALLSP].current -= 4; + break; + case FC_REP: + /* TODO : for 16 vp asic, should -= 2; */ + pAsm->CALLSTACK[pAsm->CALLSP].current -= 1; + break; + }; +} + +static inline void checkStackDepth(r700_AssemblerBase *pAsm, GLuint uReason, GLboolean bCheckMaxOnly) { + if(GL_TRUE == bCheckMaxOnly) + { + switch (uReason) + { + case FC_PUSH_VPM: + if((pAsm->CALLSTACK[pAsm->CALLSP].current + 1) + > pAsm->CALLSTACK[pAsm->CALLSP].max) + { + pAsm->CALLSTACK[pAsm->CALLSP].max = + pAsm->CALLSTACK[pAsm->CALLSP].current + 1; + } + break; + case FC_PUSH_WQM: + if((pAsm->CALLSTACK[pAsm->CALLSP].current + 4) + > pAsm->CALLSTACK[pAsm->CALLSP].max) + { + pAsm->CALLSTACK[pAsm->CALLSP].max = + pAsm->CALLSTACK[pAsm->CALLSP].current + 4; + } + break; + } + return; + } + switch (uReason) { case FC_PUSH_VPM: + pAsm->CALLSTACK[pAsm->CALLSP].current++; break; case FC_PUSH_WQM: + pAsm->CALLSTACK[pAsm->CALLSP].current += 4; break; case FC_LOOP: + pAsm->CALLSTACK[pAsm->CALLSP].current += 4; break; case FC_REP: + /* TODO : for 16 vp asic, should += 2; */ + pAsm->CALLSTACK[pAsm->CALLSP].current += 1; break; }; + + if(pAsm->CALLSTACK[pAsm->CALLSP].current + > pAsm->CALLSTACK[pAsm->CALLSP].max) + { + pAsm->CALLSTACK[pAsm->CALLSP].max = + pAsm->CALLSTACK[pAsm->CALLSP].current; + } } GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset) @@ -5044,6 +4723,11 @@ GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops) GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse) { + pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; + + assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE); + + if(GL_FALSE == add_cf_instruction(pAsm) ) { return GL_FALSE; @@ -5051,7 +4735,7 @@ GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse) if(GL_TRUE != bHasElse) { - pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; } else { @@ -5069,7 +4753,6 @@ GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse) pAsm->FCSP++; pAsm->fc_stack[pAsm->FCSP].type = FC_IF; - pAsm->fc_stack[pAsm->FCSP].bpush = 0; pAsm->fc_stack[pAsm->FCSP].mid = NULL; pAsm->fc_stack[pAsm->FCSP].midLen= 0; pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr; @@ -5081,21 +4764,13 @@ GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse) } #endif /* USE_CF_FOR_POP_AFTER */ - pAsm->branch_depth++; + checkStackDepth(pAsm, FC_PUSH_VPM, GL_FALSE); - if(pAsm->branch_depth > pAsm->max_branch_depth) - { - pAsm->max_branch_depth = pAsm->branch_depth; - } return GL_TRUE; } GLboolean assemble_ELSE(r700_AssemblerBase *pAsm) { -#ifdef USE_CF_FOR_POP_AFTER - pops(pAsm, 1); -#endif /* USE_CF_FOR_POP_AFTER */ - if(GL_FALSE == add_cf_instruction(pAsm) ) { return GL_FALSE; @@ -5130,7 +4805,7 @@ GLboolean assemble_ELSE(r700_AssemblerBase *pAsm) GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm) { #ifdef USE_CF_FOR_POP_AFTER - pops(pAsm, 1); + pops(pAsm, 1); #endif /* USE_CF_FOR_POP_AFTER */ pAsm->alu_x_opcode = SQ_CF_INST_ALU; @@ -5155,9 +4830,11 @@ GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm) radeon_error("if/endif in shader code are not paired. \n"); return GL_FALSE; } - pAsm->branch_depth--; + pAsm->FCSP--; + decreaseCurrent(pAsm, FC_PUSH_VPM); + return GL_TRUE; } @@ -5182,24 +4859,24 @@ GLboolean assemble_BGNLOOP(r700_AssemblerBase *pAsm) pAsm->FCSP++; pAsm->fc_stack[pAsm->FCSP].type = FC_LOOP; - pAsm->fc_stack[pAsm->FCSP].bpush = 1; pAsm->fc_stack[pAsm->FCSP].mid = NULL; pAsm->fc_stack[pAsm->FCSP].unNumMid = 0; pAsm->fc_stack[pAsm->FCSP].midLen = 0; pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr; - pAsm->branch_depth++; + checkStackDepth(pAsm, FC_LOOP, GL_FALSE); - if(pAsm->branch_depth > pAsm->max_branch_depth) - { - pAsm->max_branch_depth = pAsm->branch_depth; - } return GL_TRUE; } GLboolean assemble_BRK(r700_AssemblerBase *pAsm) { #ifdef USE_CF_FOR_CONTINUE_BREAK + + pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; + + assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE); + unsigned int unFCSP; for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--) { @@ -5257,6 +4934,8 @@ GLboolean assemble_BRK(r700_AssemblerBase *pAsm) pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1; + checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE); + #endif //USE_CF_FOR_CONTINUE_BREAK return GL_TRUE; } @@ -5264,6 +4943,10 @@ GLboolean assemble_BRK(r700_AssemblerBase *pAsm) GLboolean assemble_CONT(r700_AssemblerBase *pAsm) { #ifdef USE_CF_FOR_CONTINUE_BREAK + pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; + + assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE); + unsigned int unFCSP; for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--) { @@ -5321,6 +5004,8 @@ GLboolean assemble_CONT(r700_AssemblerBase *pAsm) pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1; + checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE); + #endif /* USE_CF_FOR_CONTINUE_BREAK */ return GL_TRUE; @@ -5367,32 +5052,36 @@ GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm) return GL_FALSE; } - unsigned int unFCSP = 0; + GLuint unFCSP; + GLuint unIF = 0; if((pAsm->unCFflags & HAS_CURRENT_LOOPRET) > 0) { for(unFCSP=(pAsm->FCSP-1); unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--) { if(FC_LOOP == pAsm->fc_stack[unFCSP].type) { + breakLoopOnFlag(pAsm, unFCSP); break; } + else if(FC_IF == pAsm->fc_stack[unFCSP].type) + { + unIF++; + } } if(unFCSP <= pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry) { - unFCSP = 0; - - returnOnFlag(pAsm); +#ifdef USE_CF_FOR_POP_AFTER + returnOnFlag(pAsm, unIF); +#else + returnOnFlag(pAsm, 0); +#endif /* USE_CF_FOR_POP_AFTER */ pAsm->unCFflags &= ~HAS_CURRENT_LOOPRET; } } - pAsm->branch_depth--; pAsm->FCSP--; - if(unFCSP > 0) - { - breakLoopOnFlag(pAsm, unFCSP); - } + decreaseCurrent(pAsm, FC_LOOP); return GL_TRUE; } @@ -5401,22 +5090,22 @@ void add_return_inst(r700_AssemblerBase *pAsm) { if(GL_FALSE == add_cf_instruction(pAsm) ) { - return GL_FALSE; + return; } //pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_RETURN; pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; } -GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex) +GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex, GLuint uiIL_Shift) { /* Put in sub */ if( (pAsm->unSubArrayPointer + 1) > pAsm->unSubArraySize ) @@ -5431,16 +5120,18 @@ GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex) pAsm->unSubArraySize += 10; } - pAsm->subs[pAsm->unSubArrayPointer].subIL_Offset = nILindex; + pAsm->subs[pAsm->unSubArrayPointer].subIL_Offset = nILindex + uiIL_Shift; pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pHead=NULL; - pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pTail=NULL; - pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.uNumOfNode=0; + pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pTail=NULL; + pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.uNumOfNode=0; pAsm->CALLSP++; + pAsm->CALLSTACK[pAsm->CALLSP].subDescIndex = pAsm->unSubArrayPointer; pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry = pAsm->FCSP; pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local = &(pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local); - pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.bits = 0; + pAsm->CALLSTACK[pAsm->CALLSP].max = 0; + pAsm->CALLSTACK[pAsm->CALLSP].current = 0; SetActiveCFlist(pAsm->pR700Shader, pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local); @@ -5449,25 +5140,46 @@ GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex) /* start sub */ pAsm->alu_x_opcode = SQ_CF_INST_ALU; + pAsm->FCSP++; + pAsm->fc_stack[pAsm->FCSP].type = FC_REP; + + checkStackDepth(pAsm, FC_REP, GL_FALSE); + return GL_TRUE; } GLboolean assemble_ENDSUB(r700_AssemblerBase *pAsm) { + if(pAsm->fc_stack[pAsm->FCSP].type != FC_REP) + { + radeon_error("BGNSUB/ENDSUB in shader code are not paired. \n"); + return GL_FALSE; + } + + /* copy max to sub structure */ + pAsm->subs[pAsm->CALLSTACK[pAsm->CALLSP].subDescIndex].unStackDepthMax + = pAsm->CALLSTACK[pAsm->CALLSP].max; + + decreaseCurrent(pAsm, FC_REP); + pAsm->CALLSP--; SetActiveCFlist(pAsm->pR700Shader, pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local); pAsm->alu_x_opcode = SQ_CF_INST_ALU; + pAsm->FCSP--; + return GL_TRUE; } GLboolean assemble_RET(r700_AssemblerBase *pAsm) { + GLuint unIF = 0; + if(pAsm->CALLSP > 0) { /* in sub */ - unsigned int unFCSP; + GLuint unFCSP; for(unFCSP=pAsm->FCSP; unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--) { if(FC_LOOP == pAsm->fc_stack[unFCSP].type) @@ -5478,9 +5190,20 @@ GLboolean assemble_RET(r700_AssemblerBase *pAsm) return GL_TRUE; } + else if(FC_IF == pAsm->fc_stack[unFCSP].type) + { + unIF++; + } } } - + +#ifdef USE_CF_FOR_POP_AFTER + if(unIF > 0) + { + pops(pAsm, unIF); + } +#endif /* USE_CF_FOR_POP_AFTER */ + add_return_inst(pAsm); return GL_TRUE; @@ -5488,9 +5211,13 @@ GLboolean assemble_RET(r700_AssemblerBase *pAsm) GLboolean assemble_CAL(r700_AssemblerBase *pAsm, GLint nILindex, + GLuint uiIL_Shift, GLuint uiNumberInsts, - struct prog_instruction *pILInst) + struct prog_instruction *pILInst, + PRESUB_DESC * pPresubDesc) { + GLint uiIL_Offset; + pAsm->alu_x_opcode = SQ_CF_INST_ALU; if(GL_FALSE == add_cf_instruction(pAsm) ) @@ -5523,29 +5250,59 @@ GLboolean assemble_CAL(r700_AssemblerBase *pAsm, pAsm->unCallerArraySize += 10; } - pAsm->callers[pAsm->unCallerArrayPointer].subIL_Offset = nILindex; - pAsm->callers[pAsm->unCallerArrayPointer].cf_ptr = pAsm->cf_current_cf_clause_ptr; + uiIL_Offset = nILindex + uiIL_Shift; + pAsm->callers[pAsm->unCallerArrayPointer].subIL_Offset = uiIL_Offset; + pAsm->callers[pAsm->unCallerArrayPointer].cf_ptr = pAsm->cf_current_cf_clause_ptr; + + pAsm->callers[pAsm->unCallerArrayPointer].finale_cf_ptr = NULL; + pAsm->callers[pAsm->unCallerArrayPointer].prelude_cf_ptr = NULL; pAsm->unCallerArrayPointer++; int j; + GLuint max; + GLuint unSubID; + GLboolean bRet; for(j=0; junSubArrayPointer; j++) { - if(nILindex == pAsm->subs[j].subIL_Offset) + if(uiIL_Offset == pAsm->subs[j].subIL_Offset) { /* compiled before */ + + max = pAsm->subs[j].unStackDepthMax + + pAsm->CALLSTACK[pAsm->CALLSP].current; + if(max > pAsm->CALLSTACK[pAsm->CALLSP].max) + { + pAsm->CALLSTACK[pAsm->CALLSP].max = max; + } + pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = j; return GL_TRUE; } } pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = pAsm->unSubArrayPointer; + unSubID = pAsm->unSubArrayPointer; + + bRet = AssembleInstr(nILindex, uiIL_Shift, uiNumberInsts, pILInst, pAsm); + + if(GL_TRUE == bRet) + { + max = pAsm->subs[unSubID].unStackDepthMax + + pAsm->CALLSTACK[pAsm->CALLSP].current; + if(max > pAsm->CALLSTACK[pAsm->CALLSP].max) + { + pAsm->CALLSTACK[pAsm->CALLSP].max = max; + } + + pAsm->subs[unSubID].pPresubDesc = pPresubDesc; + } - return AssembleInstr(nILindex, uiNumberInsts, pILInst, pAsm); + return bRet; } GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue) { - GLfloat fLiteral[2] = {0.1, 0.0}; + /*GLfloat fLiteral[2] = {0.1, 0.0};*/ pAsm->D.dst.opcode = SQ_OP2_INST_MOV; pAsm->D.dst.op3 = 0; @@ -5555,9 +5312,12 @@ GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue) pAsm->D.dst.writey = 0; pAsm->D.dst.writez = 0; pAsm->D.dst.writew = 0; - pAsm->D2.dst2.literal = 1; + pAsm->D2.dst2.literal_slots = 1; pAsm->D2.dst2.SaturateMode = SATURATE_OFF; pAsm->D.dst.predicated = 0; + /* in reloc where dislink flag init inst, only one slot alu inst is handled. */ + pAsm->D.dst.math = 1; /* TODO : not math really, but one channel op, more generic alu assembler needed */ + pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */ #if 0 pAsm->S[0].src.rtype = SRC_REC_LITERAL; //pAsm->S[0].src.reg = 0; @@ -5582,7 +5342,7 @@ GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue) pAsm->S[0].src.swizzlez = flagValue; pAsm->S[0].src.swizzlew = flagValue; - if( GL_FALSE == next_ins2(pAsm) ) + if( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; } @@ -5593,7 +5353,7 @@ GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue) GLboolean testFlag(r700_AssemblerBase *pAsm) { - GLfloat fLiteral[2] = {0.1, 0.0}; + /*GLfloat fLiteral[2] = {0.1, 0.0};*/ //Test flag GLuint tmp = gethelpr(pAsm); @@ -5607,9 +5367,10 @@ GLboolean testFlag(r700_AssemblerBase *pAsm) pAsm->D.dst.writey = 0; pAsm->D.dst.writez = 0; pAsm->D.dst.writew = 0; - pAsm->D2.dst2.literal = 1; + pAsm->D2.dst2.literal_slots = 1; pAsm->D2.dst2.SaturateMode = SATURATE_OFF; pAsm->D.dst.predicated = 1; + pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */ pAsm->S[0].src.rtype = DST_REG_TEMPORARY; pAsm->S[0].src.reg = pAsm->flag_reg_index; @@ -5643,21 +5404,23 @@ GLboolean testFlag(r700_AssemblerBase *pAsm) pAsm->S[1].src.swizzlez = SQ_SEL_1; pAsm->S[1].src.swizzlew = SQ_SEL_1; - if( GL_FALSE == next_ins2(pAsm) ) + if( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; } #endif + checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE); + return GL_TRUE; } -GLboolean returnOnFlag(r700_AssemblerBase *pAsm) +GLboolean returnOnFlag(r700_AssemblerBase *pAsm, GLuint unIF) { testFlag(pAsm); jumpToOffest(pAsm, 1, 4); setRetInLoopFlag(pAsm, SQ_SEL_0); - pops(pAsm, 1); + pops(pAsm, unIF + 1); add_return_inst(pAsm); return GL_TRUE; @@ -5666,7 +5429,7 @@ GLboolean returnOnFlag(r700_AssemblerBase *pAsm) GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP) { testFlag(pAsm); - + //break if(GL_FALSE == add_cf_instruction(pAsm) ) { @@ -5697,6 +5460,7 @@ GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP) } GLboolean AssembleInstr(GLuint uiFirstInst, + GLuint uiIL_Shift, GLuint uiNumberInsts, struct prog_instruction *pILInst, r700_AssemblerBase *pR700AsmCode) @@ -5736,6 +5500,12 @@ GLboolean AssembleInstr(GLuint uiFirstInst, } } #endif + if(pILInst[i].CondUpdate == 1) + { + /* remember dest register used for cond evaluation */ + /* XXX also handle PROGRAM_OUTPUT registers here? */ + pR700AsmCode->last_cond_register = pILInst[i].DstReg.Index; + } switch (pILInst[i].Opcode) { @@ -5764,7 +5534,7 @@ GLboolean AssembleInstr(GLuint uiFirstInst, return GL_FALSE; break; case OPCODE_COS: - if ( GL_FALSE == assemble_COS(pR700AsmCode) ) + if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_COS) ) return GL_FALSE; break; @@ -5793,7 +5563,8 @@ GLboolean AssembleInstr(GLuint uiFirstInst, if ( GL_FALSE == assemble_FLR(pR700AsmCode) ) return GL_FALSE; break; - //case OP_FLR_INT: + //case OP_FLR_INT: ; + // if ( GL_FALSE == assemble_FLR_INT() ) // return GL_FALSE; // break; @@ -5804,7 +5575,8 @@ GLboolean AssembleInstr(GLuint uiFirstInst, break; case OPCODE_KIL: - if ( GL_FALSE == assemble_KIL(pR700AsmCode) ) + case OPCODE_KIL_NV: + if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGT) ) return GL_FALSE; break; case OPCODE_LG2: @@ -5844,6 +5616,26 @@ GLboolean AssembleInstr(GLuint uiFirstInst, case OPCODE_MUL: if ( GL_FALSE == assemble_MUL(pR700AsmCode) ) return GL_FALSE; + break; + + case OPCODE_NOISE1: + { + callPreSub(pR700AsmCode, + GLSL_NOISE1, + &noise1_presub, + pILInst->DstReg.Index + pR700AsmCode->starting_temp_register_number, + 1); + radeon_error("noise1: not yet supported shader instruction\n"); + }; + break; + case OPCODE_NOISE2: + radeon_error("noise2: not yet supported shader instruction\n"); + break; + case OPCODE_NOISE3: + radeon_error("noise3: not yet supported shader instruction\n"); + break; + case OPCODE_NOISE4: + radeon_error("noise4: not yet supported shader instruction\n"); break; case OPCODE_POW: @@ -5859,7 +5651,7 @@ GLboolean AssembleInstr(GLuint uiFirstInst, return GL_FALSE; break; case OPCODE_SIN: - if ( GL_FALSE == assemble_SIN(pR700AsmCode) ) + if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_SIN) ) return GL_FALSE; break; case OPCODE_SCS: @@ -5868,130 +5660,23 @@ GLboolean AssembleInstr(GLuint uiFirstInst, break; case OPCODE_SEQ: - if(OPCODE_IF == pILInst[i+1].Opcode) - { - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETE) ) - { - return GL_FALSE; - } - } - else if(OPCODE_BRK == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK; -#endif - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETE) ) - { - return GL_FALSE; - } - } - else if(OPCODE_CONT == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE; -#endif - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETE) ) - { - return GL_FALSE; - } - } - else - { - if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETE) ) - { - return GL_FALSE; - } - } - break; - - case OPCODE_SGT: - if(OPCODE_IF == pILInst[i+1].Opcode) - { - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) ) - { - return GL_FALSE; - } - } - else if(OPCODE_BRK == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK; -#endif - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) ) - { - return GL_FALSE; - } - } - else if(OPCODE_CONT == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE; -#endif - - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) ) - { - return GL_FALSE; - } - } - else + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETE) ) { - if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) ) - { - return GL_FALSE; - } + return GL_FALSE; } break; - case OPCODE_SGE: - if(OPCODE_IF == pILInst[i+1].Opcode) - { - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) ) - { - return GL_FALSE; - } - } - else if(OPCODE_BRK == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK; -#endif - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) ) - { - return GL_FALSE; - } - } - else if(OPCODE_CONT == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE; -#endif - - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) ) - { - return GL_FALSE; - } - } - else + case OPCODE_SGT: + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) ) { - if ( GL_FALSE == assemble_SGE(pR700AsmCode) ) - { - return GL_FALSE; - } + return GL_FALSE; + } + break; + + case OPCODE_SGE: + if ( GL_FALSE == assemble_SGE(pR700AsmCode) ) + { + return GL_FALSE; } break; @@ -6003,54 +5688,12 @@ GLboolean AssembleInstr(GLuint uiFirstInst, SrcRegSave[1] = pILInst[i].SrcReg[1]; pILInst[i].SrcReg[0] = SrcRegSave[1]; pILInst[i].SrcReg[1] = SrcRegSave[0]; - if(OPCODE_IF == pILInst[i+1].Opcode) - { - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) ) - { - pILInst[i].SrcReg[0] = SrcRegSave[0]; - pILInst[i].SrcReg[1] = SrcRegSave[1]; - return GL_FALSE; - } - } - else if(OPCODE_BRK == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK; -#endif - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) ) - { - pILInst[i].SrcReg[0] = SrcRegSave[0]; - pILInst[i].SrcReg[1] = SrcRegSave[1]; - return GL_FALSE; - } - } - else if(OPCODE_CONT == pILInst[i+1].Opcode) + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) ) { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE; -#endif - - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) ) - { - pILInst[i].SrcReg[0] = SrcRegSave[0]; - pILInst[i].SrcReg[1] = SrcRegSave[1]; - return GL_FALSE; - } + pILInst[i].SrcReg[0] = SrcRegSave[0]; + pILInst[i].SrcReg[1] = SrcRegSave[1]; + return GL_FALSE; } - else - { - if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) ) - { - pILInst[i].SrcReg[0] = SrcRegSave[0]; - pILInst[i].SrcReg[1] = SrcRegSave[1]; - return GL_FALSE; - } - } pILInst[i].SrcReg[0] = SrcRegSave[0]; pILInst[i].SrcReg[1] = SrcRegSave[1]; } @@ -6063,53 +5706,11 @@ GLboolean AssembleInstr(GLuint uiFirstInst, SrcRegSave[1] = pILInst[i].SrcReg[1]; pILInst[i].SrcReg[0] = SrcRegSave[1]; pILInst[i].SrcReg[1] = SrcRegSave[0]; - if(OPCODE_IF == pILInst[i+1].Opcode) - { - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) ) - { - pILInst[i].SrcReg[0] = SrcRegSave[0]; - pILInst[i].SrcReg[1] = SrcRegSave[1]; - return GL_FALSE; - } - } - else if(OPCODE_BRK == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK; -#endif - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) ) - { - pILInst[i].SrcReg[0] = SrcRegSave[0]; - pILInst[i].SrcReg[1] = SrcRegSave[1]; - return GL_FALSE; - } - } - else if(OPCODE_CONT == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE; -#endif - - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) ) - { - pILInst[i].SrcReg[0] = SrcRegSave[0]; - pILInst[i].SrcReg[1] = SrcRegSave[1]; - return GL_FALSE; - } - } - else + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGE) ) { - if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGE) ) - { - pILInst[i].SrcReg[0] = SrcRegSave[0]; - pILInst[i].SrcReg[1] = SrcRegSave[1]; - return GL_FALSE; - } + pILInst[i].SrcReg[0] = SrcRegSave[0]; + pILInst[i].SrcReg[1] = SrcRegSave[1]; + return GL_FALSE; } pILInst[i].SrcReg[0] = SrcRegSave[0]; pILInst[i].SrcReg[1] = SrcRegSave[1]; @@ -6117,44 +5718,9 @@ GLboolean AssembleInstr(GLuint uiFirstInst, break; case OPCODE_SNE: - if(OPCODE_IF == pILInst[i+1].Opcode) - { - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETNE) ) - { - return GL_FALSE; - } - } - else if(OPCODE_BRK == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK; -#endif - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETNE) ) - { - return GL_FALSE; - } - } - else if(OPCODE_CONT == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE; -#endif - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETNE) ) - { - return GL_FALSE; - } - } - else + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETNE) ) { - if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETNE) ) - { - return GL_FALSE; - } + return GL_FALSE; } break; @@ -6182,7 +5748,8 @@ GLboolean AssembleInstr(GLuint uiFirstInst, } } break; - + case OPCODE_DDX: + case OPCODE_DDY: case OPCODE_TEX: case OPCODE_TXB: case OPCODE_TXP: @@ -6190,16 +5757,21 @@ GLboolean AssembleInstr(GLuint uiFirstInst, return GL_FALSE; break; + case OPCODE_TRUNC: + if ( GL_FALSE == assemble_math_function(pR700AsmCode, SQ_OP2_INST_TRUNC) ) + return GL_FALSE; + break; + case OPCODE_XPD: if ( GL_FALSE == assemble_XPD(pR700AsmCode) ) return GL_FALSE; break; - case OPCODE_IF : + case OPCODE_IF: { GLboolean bHasElse = GL_FALSE; - if(pILInst[pILInst[i].BranchTarget - 1].Opcode == OPCODE_ELSE) + if(pILInst[pILInst[i].BranchTarget].Opcode == OPCODE_ELSE) { bHasElse = GL_TRUE; } @@ -6250,7 +5822,7 @@ GLboolean AssembleInstr(GLuint uiFirstInst, break; case OPCODE_BGNSUB: - if( GL_FALSE == assemble_BGNSUB(pR700AsmCode, i) ) + if( GL_FALSE == assemble_BGNSUB(pR700AsmCode, i, uiIL_Shift) ) { return GL_FALSE; } @@ -6265,9 +5837,11 @@ GLboolean AssembleInstr(GLuint uiFirstInst, case OPCODE_CAL: if( GL_FALSE == assemble_CAL(pR700AsmCode, - pILInst[i].BranchTarget, + pILInst[i].BranchTarget, + uiIL_Shift, uiNumberInsts, - pILInst) ) + pILInst, + NULL) ) { return GL_FALSE; } @@ -6300,10 +5874,11 @@ GLboolean AssembleInstr(GLuint uiFirstInst, GLboolean InitShaderProgram(r700_AssemblerBase * pAsm) { setRetInLoopFlag(pAsm, SQ_SEL_0); + pAsm->alu_x_opcode = SQ_CF_INST_ALU; return GL_TRUE; } -GLboolean RelocProgram(r700_AssemblerBase * pAsm) +GLboolean RelocProgram(r700_AssemblerBase * pAsm, struct gl_program * pILProg) { GLuint i; GLuint unCFoffset; @@ -6313,14 +5888,62 @@ GLboolean RelocProgram(r700_AssemblerBase * pAsm) R700ShaderInstruction * pInst; R700ControlFlowGenericClause * pCFInst; + R700ControlFlowALUClause * pCF_ALU; + R700ALUInstruction * pALU; + GLuint unConstOffset = 0; + GLuint unRegOffset; + GLuint unMinRegIndex; + + plstCFmain = pAsm->CALLSTACK[0].plstCFInstructions_local; + + /* remove flags init if they are not used */ + if((pAsm->unCFflags & HAS_LOOPRET) == 0) + { + R700ControlFlowALUClause * pCF_ALU; + pInst = plstCFmain->pHead; + while(pInst) + { + if(SIT_CF_ALU == pInst->m_ShaderInstType) + { + pCF_ALU = (R700ControlFlowALUClause *)pInst; + if(0 == pCF_ALU->m_Word1.f.count) + { + pCF_ALU->m_Word1.f.cf_inst = SQ_CF_INST_NOP; + } + else + { + R700ALUInstruction * pALU = pCF_ALU->m_pLinkedALUInstruction; + + pALU->m_pLinkedALUClause = NULL; + pALU = (R700ALUInstruction *)(pALU->pNextInst); + pALU->m_pLinkedALUClause = pCF_ALU; + pCF_ALU->m_pLinkedALUInstruction = pALU; + + pCF_ALU->m_Word1.f.count--; + } + break; + } + pInst = pInst->pNextInst; + }; + } + + if(pAsm->CALLSTACK[0].max > 0) + { + pAsm->pR700Shader->uStackSize = ((pAsm->CALLSTACK[0].max + 3)>>2) + 2; + } + if(0 == pAsm->unSubArrayPointer) { return GL_TRUE; } - plstCFmain = pAsm->CALLSTACK[0].plstCFInstructions_local; unCFoffset = plstCFmain->uNumOfNode; + if(NULL != pILProg->Parameters) + { + unConstOffset = pILProg->Parameters->NumParameters; + } + /* Reloc subs */ for(i=0; iunSubArrayPointer; i++) { @@ -6358,6 +5981,84 @@ GLboolean RelocProgram(r700_AssemblerBase * pAsm) pInst = pInst->pNextInst; }; + if(NULL != pAsm->subs[i].pPresubDesc) + { + GLuint uNumSrc; + + unMinRegIndex = pAsm->subs[i].pPresubDesc->pCompiledSub->MinRegIndex; + unRegOffset = pAsm->subs[i].pPresubDesc->maxStartReg; + unConstOffset += pAsm->subs[i].pPresubDesc->unConstantsStart; + + pInst = plstCFsub->pHead; + while(pInst) + { + if(SIT_CF_ALU == pInst->m_ShaderInstType) + { + pCF_ALU = (R700ControlFlowALUClause *)pInst; + + pALU = pCF_ALU->m_pLinkedALUInstruction; + for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++) + { + pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex; + + if(pALU->m_Word0.f.src0_sel < SQ_ALU_SRC_GPR_SIZE) + { + pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex; + } + else if(pALU->m_Word0.f.src0_sel >= SQ_ALU_SRC_CFILE_BASE) + { + pALU->m_Word0.f.src0_sel += unConstOffset; + } + + if( ((pALU->m_Word1.val >> SQ_ALU_WORD1_OP3_ALU_INST_SHIFT) & 0x0000001F) + >= SQ_OP3_INST_MUL_LIT ) + { /* op3 : 3 srcs */ + if(pALU->m_Word1_OP3.f.src2_sel < SQ_ALU_SRC_GPR_SIZE) + { + pALU->m_Word1_OP3.f.src2_sel = pALU->m_Word1_OP3.f.src2_sel + unRegOffset - unMinRegIndex; + } + else if(pALU->m_Word1_OP3.f.src2_sel >= SQ_ALU_SRC_CFILE_BASE) + { + pALU->m_Word1_OP3.f.src2_sel += unConstOffset; + } + if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE) + { + pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex; + } + else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE) + { + pALU->m_Word0.f.src1_sel += unConstOffset; + } + } + else + { + if(pAsm->bR6xx) + { + uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f6.alu_inst, 0); + } + else + { + uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f.alu_inst, 0); + } + if(2 == uNumSrc) + { /* 2 srcs */ + if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE) + { + pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex; + } + else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE) + { + pALU->m_Word0.f.src1_sel += unConstOffset; + } + } + } + pALU = (R700ALUInstruction*)(pALU->pNextInst); + } + } + pInst = pInst->pNextInst; + }; + } + /* Put sub into main */ plstCFmain->pTail->pNextInst = plstCFsub->pHead; plstCFmain->pTail = plstCFsub->pTail; @@ -6371,42 +6072,216 @@ GLboolean RelocProgram(r700_AssemblerBase * pAsm) { pAsm->callers[i].cf_ptr->m_Word0.f.addr = pAsm->subs[pAsm->callers[i].subDescIndex].unCFoffset; - } - /* remove flags init if they are not used */ - if((pAsm->unCFflags & HAS_LOOPRET) == 0) - { - R700ControlFlowALUClause * pCF_ALU; - pInst = plstCFmain->pHead; - while(pInst) - { - if(SIT_CF_ALU == pInst->m_ShaderInstType) - { - pCF_ALU = (R700ControlFlowALUClause *)pInst; - if(1 == pCF_ALU->m_Word1.f.count) + if(NULL != pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc) + { + unMinRegIndex = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->pCompiledSub->MinRegIndex; + unRegOffset = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->maxStartReg; + + if(NULL != pAsm->callers[i].prelude_cf_ptr) + { + pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].prelude_cf_ptr); + pALU = pCF_ALU->m_pLinkedALUInstruction; + for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++) { - pCF_ALU->m_Word1.f.cf_inst = SQ_CF_INST_NOP; + pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex; + pALU = (R700ALUInstruction*)(pALU->pNextInst); } - else + } + if(NULL != pAsm->callers[i].finale_cf_ptr) + { + pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].finale_cf_ptr); + pALU = pCF_ALU->m_pLinkedALUInstruction; + for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++) { - R700ALUInstruction * pALU = pCF_ALU->m_pLinkedALUInstruction; - - pALU->m_pLinkedALUClause = NULL; - pALU = (R700ALUInstruction *)(pALU->pNextInst); - pALU->m_pLinkedALUClause = pCF_ALU; - pCF_ALU->m_pLinkedALUInstruction = pALU; - - pCF_ALU->m_Word1.f.count--; + pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex; + pALU = (R700ALUInstruction*)(pALU->pNextInst); } - break; } - pInst = pInst->pNextInst; - }; + } } return GL_TRUE; } +GLboolean callPreSub(r700_AssemblerBase* pAsm, + LOADABLE_SCRIPT_SIGNITURE scriptSigniture, + COMPILED_SUB * pCompiledSub, + GLshort uOutReg, + GLshort uNumValidSrc) +{ + /* save assemble context */ + GLuint starting_temp_register_number_save; + GLuint number_used_registers_save; + GLuint uFirstHelpReg_save; + GLuint uHelpReg_save; + GLuint uiCurInst_save; + struct prog_instruction *pILInst_save; + PRESUB_DESC * pPresubDesc; + GLboolean bRet; + int i; + + R700ControlFlowGenericClause* prelude_cf_ptr = NULL; + + /* copy srcs to presub inputs */ + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + for(i=0; iD.dst.opcode = SQ_OP2_INST_MOV; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = pCompiledSub->srcRegIndex[i]; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 1; + pAsm->D.dst.writez = 1; + pAsm->D.dst.writew = 1; + + if( GL_FALSE == assemble_src(pAsm, i, 0) ) + { + return GL_FALSE; + } + + next_ins(pAsm); + } + if(uNumValidSrc > 0) + { + prelude_cf_ptr = pAsm->cf_current_alu_clause_ptr; + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + } + + /* browse thro existing presubs. */ + for(i=0; iunNumPresub; i++) + { + if(pAsm->presubs[i].sptSigniture == scriptSigniture) + { + break; + } + } + + if(i == pAsm->unNumPresub) + { /* not loaded yet */ + /* save assemble context */ + number_used_registers_save = pAsm->number_used_registers; + uFirstHelpReg_save = pAsm->uFirstHelpReg; + uHelpReg_save = pAsm->uHelpReg; + starting_temp_register_number_save = pAsm->starting_temp_register_number; + pILInst_save = pAsm->pILInst; + uiCurInst_save = pAsm->uiCurInst; + + /* alloc in presub */ + if( (pAsm->unNumPresub + 1) > pAsm->unPresubArraySize ) + { + pAsm->presubs = (PRESUB_DESC*)_mesa_realloc( (void *)pAsm->presubs, + sizeof(PRESUB_DESC) * pAsm->unPresubArraySize, + sizeof(PRESUB_DESC) * (pAsm->unPresubArraySize + 4) ); + if(NULL == pAsm->presubs) + { + radeon_error("No memeory to allocate built in shader function description structures. \n"); + return GL_FALSE; + } + pAsm->unPresubArraySize += 4; + } + + pPresubDesc = &(pAsm->presubs[i]); + pPresubDesc->sptSigniture = scriptSigniture; + + /* constants offsets need to be final resolved at reloc. */ + if(0 == pAsm->unNumPresub) + { + pPresubDesc->unConstantsStart = 0; + } + else + { + pPresubDesc->unConstantsStart = pAsm->presubs[i-1].unConstantsStart + + pAsm->presubs[i-1].pCompiledSub->NumParameters; + } + + pPresubDesc->pCompiledSub = pCompiledSub; + + pPresubDesc->subIL_Shift = pAsm->unCurNumILInsts; + pPresubDesc->maxStartReg = uFirstHelpReg_save; + pAsm->unCurNumILInsts += pCompiledSub->NumInstructions; + + pAsm->unNumPresub++; + + /* setup new assemble context */ + pAsm->starting_temp_register_number = 0; + pAsm->number_used_registers = pCompiledSub->NumTemporaries; + pAsm->uFirstHelpReg = pAsm->number_used_registers; + pAsm->uHelpReg = pAsm->uFirstHelpReg; + + bRet = assemble_CAL(pAsm, + 0, + pPresubDesc->subIL_Shift, + pCompiledSub->NumInstructions, + pCompiledSub->Instructions, + pPresubDesc); + + + pPresubDesc->number_used_registers = pAsm->number_used_registers; + + /* restore assemble context */ + pAsm->number_used_registers = number_used_registers_save; + pAsm->uFirstHelpReg = uFirstHelpReg_save; + pAsm->uHelpReg = uHelpReg_save; + pAsm->starting_temp_register_number = starting_temp_register_number_save; + pAsm->pILInst = pILInst_save; + pAsm->uiCurInst = uiCurInst_save; + } + else + { /* was loaded */ + pPresubDesc = &(pAsm->presubs[i]); + + bRet = assemble_CAL(pAsm, + 0, + pPresubDesc->subIL_Shift, + pCompiledSub->NumInstructions, + pCompiledSub->Instructions, + pPresubDesc); + } + + if(GL_FALSE == bRet) + { + radeon_error("Shader presub assemble failed. \n"); + } + else + { + /* copy presub output to real dst */ + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = pCompiledSub->dstRegIndex; + pAsm->S[0].src.swizzlex = pCompiledSub->outputSwizzleX; + pAsm->S[0].src.swizzley = pCompiledSub->outputSwizzleY; + pAsm->S[0].src.swizzlez = pCompiledSub->outputSwizzleZ; + pAsm->S[0].src.swizzlew = pCompiledSub->outputSwizzleW; + + next_ins(pAsm); + + pAsm->callers[pAsm->unCallerArrayPointer - 1].finale_cf_ptr = pAsm->cf_current_alu_clause_ptr; + pAsm->callers[pAsm->unCallerArrayPointer - 1].prelude_cf_ptr = prelude_cf_ptr; + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + } + + if( (pPresubDesc->number_used_registers + pAsm->uFirstHelpReg) > pAsm->number_used_registers ) + { + pAsm->number_used_registers = pPresubDesc->number_used_registers + pAsm->uFirstHelpReg; + } + if(pAsm->uFirstHelpReg > pPresubDesc->maxStartReg) + { + pPresubDesc->maxStartReg = pAsm->uFirstHelpReg; + } + + return bRet; +} + GLboolean Process_Export(r700_AssemblerBase* pAsm, GLuint type, GLuint export_starting_index, @@ -6701,7 +6576,7 @@ GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode, export_starting_index++; } } - + for(i=VERT_RESULT_VAR0; iucVP_OutputMap[i], GL_FALSE) ) - { + { return GL_FALSE; } @@ -6764,6 +6639,11 @@ GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode) FREE(pR700AsmCode->callers); } + if(NULL != pR700AsmCode->presubs) + { + FREE(pR700AsmCode->presubs); + } + return GL_TRUE; }