#include "main/mtypes.h"
#include "main/imports.h"
+#include "shader/prog_parameter.h"
#include "radeon_debug.h"
#include "r600_context.h"
#define USE_CF_FOR_CONTINUE_BREAK 1
#define USE_CF_FOR_POP_AFTER 1
+struct prog_instruction noise1_insts[12] = {
+ {OPCODE_BGNSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_MOV , {{0, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 2, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_MOV , {{8, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 4, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_MOV , {{8, 0, 585, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 8, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_SGT , {{0, 0, 585, 0, 0, 0}, {8, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 1, 1, 0, 8, 1672, 0}, 1, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_IF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 7, 0, 0}, 0, 0, 0, 1, 0, 0, 0, 15, 0, 0, 0},
+ {OPCODE_MOV , {{0, 0, 1755, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_ENDIF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_MOV , {{0, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_ENDSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}
+};
+float noise1_const[2][4] = {
+ {0.300000f, 0.900000f, 0.500000f, 0.300000f}
+};
+
+COMPILED_SUB noise1_presub = {
+ &(noise1_insts[0]),
+ 12,
+ 2,
+ 1,
+ 0,
+ &(noise1_const[0]),
+ SWIZZLE_X,
+ SWIZZLE_X,
+ SWIZZLE_X,
+ SWIZZLE_X,
+ {0,0,0},
+ 0
+};
+
BITS addrmode_PVSDST(PVSDST * pPVSDST)
{
return pPVSDST->addrmode0 | ((BITS)pPVSDST->addrmode1 << 1);
return(format);
}
-unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm)
+unsigned int r700GetNumOperands(GLuint opcode, GLuint nIsOp3)
{
- if(pAsm->D.dst.op3)
+ if(nIsOp3 > 0)
{
return 3;
}
- switch (pAsm->D.dst.opcode)
+ switch (opcode)
{
case SQ_OP2_INST_ADD:
case SQ_OP2_INST_KILLE:
return 1;
default: radeon_error(
- "Need instruction operand number for %x.\n", pAsm->D.dst.opcode);
+ "Need instruction operand number for %x.\n", opcode);
};
return 3;
pAsm->unCFflags = 0;
+ pAsm->presubs = NULL;
+ pAsm->unPresubArraySize = 0;
+ pAsm->unNumPresub = 0;
+ pAsm->unCurNumILInsts = 0;
+
+ pAsm->unVetTexBits = 0;
+
return 0;
}
GLboolean IsTex(gl_inst_opcode Opcode)
{
- if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) )
+ if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) ||
+ (OPCODE_DDX==Opcode) || (OPCODE_DDY==Opcode) )
{
return GL_TRUE;
}
GLubyte element,
GLuint _signed,
GLboolean normalize,
+ GLenum format,
VTX_FETCH_METHOD * pFetchMethod)
{
GLuint client_size_inbyte;
vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X;
vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count;
- vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_X;
- vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
- vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z;
- vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
+ if(format == GL_BGRA)
+ {
+ vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_Z;
+ vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
+ vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_X;
+ vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
+ }
+ else
+ {
+ vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_X;
+ vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
+ vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z;
+ vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
+
+ }
vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1;
vfetch_instruction_ptr->m_Word1.f.data_format = data_format;
}
pAsm->S[fld].src.rtype = SRC_REG_CONSTANT;
- pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index;
+ if(pILInst->SrcReg[src].Index < 0)
+ {
+ WARN_ONCE("Negative register offsets not supported yet!\n");
+ pAsm->S[fld].src.reg = 0;
+ }
+ else
+ {
+ pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index;
+ }
break;
case PROGRAM_INPUT:
setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
break;
case PROGRAM_INPUT:
- switch (pILInst->SrcReg[0].Index)
+ if(SPT_VP == pAsm->currentShaderType)
{
- case FRAG_ATTRIB_WPOS:
- case FRAG_ATTRIB_COL0:
- case FRAG_ATTRIB_COL1:
- case FRAG_ATTRIB_FOGC:
- case FRAG_ATTRIB_TEX0:
- case FRAG_ATTRIB_TEX1:
- case FRAG_ATTRIB_TEX2:
- case FRAG_ATTRIB_TEX3:
- case FRAG_ATTRIB_TEX4:
- case FRAG_ATTRIB_TEX5:
- case FRAG_ATTRIB_TEX6:
- case FRAG_ATTRIB_TEX7:
- bValidTexCoord = GL_TRUE;
+ switch (pILInst->SrcReg[0].Index)
+ {
+ case VERT_ATTRIB_TEX0:
+ case VERT_ATTRIB_TEX1:
+ case VERT_ATTRIB_TEX2:
+ case VERT_ATTRIB_TEX3:
+ case VERT_ATTRIB_TEX4:
+ case VERT_ATTRIB_TEX5:
+ case VERT_ATTRIB_TEX6:
+ case VERT_ATTRIB_TEX7:
+ bValidTexCoord = GL_TRUE;
+ pAsm->S[0].src.reg =
+ pAsm->ucVP_AttributeMap[pILInst->SrcReg[0].Index];
+ pAsm->S[0].src.rtype = SRC_REG_INPUT;
+ break;
+ }
+ }
+ else
+ {
+ switch (pILInst->SrcReg[0].Index)
+ {
+ case FRAG_ATTRIB_WPOS:
+ case FRAG_ATTRIB_COL0:
+ case FRAG_ATTRIB_COL1:
+ case FRAG_ATTRIB_FOGC:
+ case FRAG_ATTRIB_TEX0:
+ case FRAG_ATTRIB_TEX1:
+ case FRAG_ATTRIB_TEX2:
+ case FRAG_ATTRIB_TEX3:
+ case FRAG_ATTRIB_TEX4:
+ case FRAG_ATTRIB_TEX5:
+ case FRAG_ATTRIB_TEX6:
+ case FRAG_ATTRIB_TEX7:
+ bValidTexCoord = GL_TRUE;
+ pAsm->S[0].src.reg =
+ pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
+ pAsm->S[0].src.rtype = SRC_REG_INPUT;
+ break;
+ case FRAG_ATTRIB_FACE:
+ fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n");
+ break;
+ case FRAG_ATTRIB_PNTC:
+ fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n");
+ break;
+ }
+
+ if( (pILInst->SrcReg[0].Index >= FRAG_ATTRIB_VAR0) ||
+ (pILInst->SrcReg[0].Index < FRAG_ATTRIB_MAX) )
+ {
+ bValidTexCoord = GL_TRUE;
pAsm->S[0].src.reg =
pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
pAsm->S[0].src.rtype = SRC_REG_INPUT;
- break;
- case FRAG_ATTRIB_FACE:
- fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n");
- break;
- case FRAG_ATTRIB_PNTC:
- fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n");
- break;
- }
-
- if( (pILInst->SrcReg[0].Index >= FRAG_ATTRIB_VAR0) ||
- (pILInst->SrcReg[0].Index < FRAG_ATTRIB_MAX) )
- {
- bValidTexCoord = GL_TRUE;
- pAsm->S[0].src.reg =
- pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
- pAsm->S[0].src.rtype = SRC_REG_INPUT;
+ }
}
- break;
+ break;
}
}
tex_instruction_ptr->m_Word0.f.tex_inst = pAsm->D.dst.opcode;
tex_instruction_ptr->m_Word0.f.bc_frac_mode = 0x0;
tex_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
+ tex_instruction_ptr->m_Word0.f.alt_const = 0;
- tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg;
+ if(SPT_VP == pAsm->currentShaderType)
+ {
+ tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg + VERT_ATTRIB_MAX;
+ pAsm->unVetTexBits |= 1 << texture_unit_source->reg;
+ }
+ else
+ {
+ tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg;
+ }
tex_instruction_ptr->m_Word1.f.lod_bias = 0x0;
if (normalized) {
tex_instruction_ptr->m_Word2.f.offset_x = 0x0;
tex_instruction_ptr->m_Word2.f.offset_y = 0x0;
tex_instruction_ptr->m_Word2.f.offset_z = 0x0;
-
tex_instruction_ptr->m_Word2.f.sampler_id = texture_unit_source->reg;
// dst
GLuint swizzle_key;
- GLuint number_of_operands = r700GetNumOperands(pAsm);
+ GLuint number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
for (src=0; src<number_of_operands; src++)
{
GLuint swizzle_key;
- GLuint number_of_operands = r700GetNumOperands(pAsm);
+ GLuint number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
for (src=0; src<number_of_operands; src++)
{
int current_source_index;
GLuint contiguous_slots_needed;
- GLuint uNumSrc = r700GetNumOperands(pAsm);
+ GLuint uNumSrc = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
//GLuint channel_swizzle, j;
//GLuint chan_counter[4] = {0, 0, 0, 0};
//PVSSRC * pSource[3];
contiguous_slots_needed = 0;
- if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) )
+ if(!is_single_scalar_operation)
{
contiguous_slots_needed = 4;
}
{
BITS tmp;
- checkop1(pAsm);
-
- tmp = gethelpr(pAsm);
-
- // COS tmp.x, a.x
- pAsm->D.dst.opcode = SQ_OP2_INST_COS;
- pAsm->D.dst.math = 1;
+ checkop1(pAsm);
- setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
- pAsm->D.dst.rtype = DST_REG_TEMPORARY;
- pAsm->D.dst.reg = tmp;
- pAsm->D.dst.writex = 1;
+ tmp = gethelpr(pAsm);
+ /* tmp.x = src /2*PI */
+ pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+ pAsm->D.dst.writex = 1;
- if( GL_FALSE == assemble_src(pAsm, 0, -1) )
- {
- return GL_FALSE;
- }
+ assemble_src(pAsm, 0, -1);
- if ( GL_FALSE == next_ins(pAsm) )
- {
- return GL_FALSE;
- }
+ pAsm->S[1].src.rtype = SRC_REC_LITERAL;
+ setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
+ pAsm->D2.dst2.literal_slots = 1;
+ pAsm->C[0].f = 1/(3.1415926535 * 2);
+ pAsm->C[1].f = 0.0F;
- // SIN tmp.y, a.x
- pAsm->D.dst.opcode = SQ_OP2_INST_SIN;
- pAsm->D.dst.math = 1;
+ next_ins(pAsm);
- setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
- pAsm->D.dst.rtype = DST_REG_TEMPORARY;
- pAsm->D.dst.reg = tmp;
- pAsm->D.dst.writey = 1;
+ // COS dst.x, a.x
+ pAsm->D.dst.opcode = SQ_OP2_INST_COS;
+ pAsm->D.dst.math = 1;
- if( GL_FALSE == assemble_src(pAsm, 0, -1) )
- {
- return GL_FALSE;
- }
+ assemble_dst(pAsm);
+ /* mask y */
+ pAsm->D.dst.writey = 0;
- if( GL_FALSE == next_ins(pAsm) )
- {
- return GL_FALSE;
- }
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+ noneg_PVSSRC(&(pAsm->S[0].src));
- // MOV dst.mask, tmp
- pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+ if ( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
- if( GL_FALSE == assemble_dst(pAsm) )
- {
- return GL_FALSE;
- }
+ // SIN dst.y, a.x
+ pAsm->D.dst.opcode = SQ_OP2_INST_SIN;
+ pAsm->D.dst.math = 1;
- setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
- pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
- pAsm->S[0].src.reg = tmp;
+ assemble_dst(pAsm);
+ /* mask x */
+ pAsm->D.dst.writex = 0;
- noswizzle_PVSSRC(&(pAsm->S[0].src));
- pAsm->S[0].src.swizzlez = SQ_SEL_0;
- pAsm->S[0].src.swizzlew = SQ_SEL_0;
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+ noneg_PVSSRC(&(pAsm->S[0].src));
- if ( GL_FALSE == next_ins(pAsm) )
- {
- return GL_FALSE;
- }
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
return GL_TRUE;
}
}
- if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXB)
- {
- pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_L;
- }
- else
+ switch(pAsm->pILInst[pAsm->uiCurInst].Opcode)
{
- pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;
+ case OPCODE_DDX:
+ /* will these need WQM(1) on CF inst ? */
+ pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_H;
+ break;
+ case OPCODE_DDY:
+ pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_V;
+ break;
+ case OPCODE_TXB:
+ pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_L;
+ break;
+ default:
+ if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1)
+ pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_C;
+ else
+ pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;
}
pAsm->is_tex = GL_TRUE;
pAsm->S[0].src.swizzlew = SQ_SEL_Y;
}
+ if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1)
+ {
+ /* compare value goes to w chan ? */
+ pAsm->S[0].src.swizzlew = SQ_SEL_Z;
+ }
+
if ( GL_FALSE == next_ins(pAsm) )
{
return GL_FALSE;
}
+ /* add ARB shadow ambient but clamp to 0..1 */
+ if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1)
+ {
+ /* ADD_SAT dst, dst, ambient[texunit] */
+ pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+ pAsm->D2.dst2.SaturateMode = 1;
+
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = pAsm->D.dst.reg;
+ noswizzle_PVSSRC(&(pAsm->S[0].src));
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ pAsm->S[1].src.rtype = SRC_REG_CONSTANT;
+ pAsm->S[1].src.reg = pAsm->shadow_regs[pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit];
+ noswizzle_PVSSRC(&(pAsm->S[1].src));
+ noneg_PVSSRC(&(pAsm->S[1].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ }
+
return GL_TRUE;
}
{
if(GL_FALSE == add_cf_instruction(pAsm) )
{
- return GL_FALSE;
+ return;
}
//pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_RETURN;
pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
}
-GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex)
+GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex, GLuint uiIL_Shift)
{
/* Put in sub */
if( (pAsm->unSubArrayPointer + 1) > pAsm->unSubArraySize )
pAsm->unSubArraySize += 10;
}
- pAsm->subs[pAsm->unSubArrayPointer].subIL_Offset = nILindex;
+ pAsm->subs[pAsm->unSubArrayPointer].subIL_Offset = nILindex + uiIL_Shift;
pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pHead=NULL;
pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pTail=NULL;
pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.uNumOfNode=0;
GLboolean assemble_CAL(r700_AssemblerBase *pAsm,
GLint nILindex,
+ GLuint uiIL_Shift,
GLuint uiNumberInsts,
- struct prog_instruction *pILInst)
+ struct prog_instruction *pILInst,
+ PRESUB_DESC * pPresubDesc)
{
+ GLint uiIL_Offset;
+
pAsm->alu_x_opcode = SQ_CF_INST_ALU;
if(GL_FALSE == add_cf_instruction(pAsm) )
pAsm->unCallerArraySize += 10;
}
- pAsm->callers[pAsm->unCallerArrayPointer].subIL_Offset = nILindex;
- pAsm->callers[pAsm->unCallerArrayPointer].cf_ptr = pAsm->cf_current_cf_clause_ptr;
+ uiIL_Offset = nILindex + uiIL_Shift;
+ pAsm->callers[pAsm->unCallerArrayPointer].subIL_Offset = uiIL_Offset;
+ pAsm->callers[pAsm->unCallerArrayPointer].cf_ptr = pAsm->cf_current_cf_clause_ptr;
+
+ pAsm->callers[pAsm->unCallerArrayPointer].finale_cf_ptr = NULL;
+ pAsm->callers[pAsm->unCallerArrayPointer].prelude_cf_ptr = NULL;
pAsm->unCallerArrayPointer++;
GLboolean bRet;
for(j=0; j<pAsm->unSubArrayPointer; j++)
{
- if(nILindex == pAsm->subs[j].subIL_Offset)
+ if(uiIL_Offset == pAsm->subs[j].subIL_Offset)
{ /* compiled before */
max = pAsm->subs[j].unStackDepthMax
pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = pAsm->unSubArrayPointer;
unSubID = pAsm->unSubArrayPointer;
- bRet = AssembleInstr(nILindex, uiNumberInsts, pILInst, pAsm);
+ bRet = AssembleInstr(nILindex, uiIL_Shift, uiNumberInsts, pILInst, pAsm);
if(GL_TRUE == bRet)
{
{
pAsm->CALLSTACK[pAsm->CALLSP].max = max;
}
+
+ pAsm->subs[unSubID].pPresubDesc = pPresubDesc;
}
return bRet;
GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue)
{
- GLfloat fLiteral[2] = {0.1, 0.0};
+ /*GLfloat fLiteral[2] = {0.1, 0.0};*/
pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
pAsm->D.dst.op3 = 0;
GLboolean testFlag(r700_AssemblerBase *pAsm)
{
- GLfloat fLiteral[2] = {0.1, 0.0};
+ /*GLfloat fLiteral[2] = {0.1, 0.0};*/
//Test flag
GLuint tmp = gethelpr(pAsm);
}
GLboolean AssembleInstr(GLuint uiFirstInst,
+ GLuint uiIL_Shift,
GLuint uiNumberInsts,
struct prog_instruction *pILInst,
r700_AssemblerBase *pR700AsmCode)
case OPCODE_MUL:
if ( GL_FALSE == assemble_MUL(pR700AsmCode) )
return GL_FALSE;
+ break;
+
+ case OPCODE_NOISE1:
+ {
+ callPreSub(pR700AsmCode,
+ GLSL_NOISE1,
+ &noise1_presub,
+ pILInst->DstReg.Index + pR700AsmCode->starting_temp_register_number,
+ 1);
+ radeon_error("noise1: not yet supported shader instruction\n");
+ };
+ break;
+ case OPCODE_NOISE2:
+ radeon_error("noise2: not yet supported shader instruction\n");
+ break;
+ case OPCODE_NOISE3:
+ radeon_error("noise3: not yet supported shader instruction\n");
+ break;
+ case OPCODE_NOISE4:
+ radeon_error("noise4: not yet supported shader instruction\n");
break;
case OPCODE_POW:
}
}
break;
-
+ case OPCODE_DDX:
+ case OPCODE_DDY:
case OPCODE_TEX:
case OPCODE_TXB:
case OPCODE_TXP:
return GL_FALSE;
break;
- case OPCODE_IF :
+ case OPCODE_IF:
{
GLboolean bHasElse = GL_FALSE;
- if(pILInst[pILInst[i].BranchTarget - 1].Opcode == OPCODE_ELSE)
+ if(pILInst[pILInst[i].BranchTarget].Opcode == OPCODE_ELSE)
{
bHasElse = GL_TRUE;
}
break;
case OPCODE_BGNSUB:
- if( GL_FALSE == assemble_BGNSUB(pR700AsmCode, i) )
+ if( GL_FALSE == assemble_BGNSUB(pR700AsmCode, i, uiIL_Shift) )
{
return GL_FALSE;
}
case OPCODE_CAL:
if( GL_FALSE == assemble_CAL(pR700AsmCode,
- pILInst[i].BranchTarget,
+ pILInst[i].BranchTarget,
+ uiIL_Shift,
uiNumberInsts,
- pILInst) )
+ pILInst,
+ NULL) )
{
return GL_FALSE;
}
return GL_TRUE;
}
-GLboolean RelocProgram(r700_AssemblerBase * pAsm)
+GLboolean RelocProgram(r700_AssemblerBase * pAsm, struct gl_program * pILProg)
{
GLuint i;
GLuint unCFoffset;
R700ShaderInstruction * pInst;
R700ControlFlowGenericClause * pCFInst;
+ R700ControlFlowALUClause * pCF_ALU;
+ R700ALUInstruction * pALU;
+ GLuint unConstOffset = 0;
+ GLuint unRegOffset;
+ GLuint unMinRegIndex;
+
plstCFmain = pAsm->CALLSTACK[0].plstCFInstructions_local;
/* remove flags init if they are not used */
unCFoffset = plstCFmain->uNumOfNode;
+ if(NULL != pILProg->Parameters)
+ {
+ unConstOffset = pILProg->Parameters->NumParameters;
+ }
+
/* Reloc subs */
for(i=0; i<pAsm->unSubArrayPointer; i++)
{
pInst = pInst->pNextInst;
};
+ if(NULL != pAsm->subs[i].pPresubDesc)
+ {
+ GLuint uNumSrc;
+
+ unMinRegIndex = pAsm->subs[i].pPresubDesc->pCompiledSub->MinRegIndex;
+ unRegOffset = pAsm->subs[i].pPresubDesc->maxStartReg;
+ unConstOffset += pAsm->subs[i].pPresubDesc->unConstantsStart;
+
+ pInst = plstCFsub->pHead;
+ while(pInst)
+ {
+ if(SIT_CF_ALU == pInst->m_ShaderInstType)
+ {
+ pCF_ALU = (R700ControlFlowALUClause *)pInst;
+
+ pALU = pCF_ALU->m_pLinkedALUInstruction;
+ for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
+ {
+ pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex;
+
+ if(pALU->m_Word0.f.src0_sel < SQ_ALU_SRC_GPR_SIZE)
+ {
+ pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex;
+ }
+ else if(pALU->m_Word0.f.src0_sel >= SQ_ALU_SRC_CFILE_BASE)
+ {
+ pALU->m_Word0.f.src0_sel += unConstOffset;
+ }
+
+ if( ((pALU->m_Word1.val >> SQ_ALU_WORD1_OP3_ALU_INST_SHIFT) & 0x0000001F)
+ >= SQ_OP3_INST_MUL_LIT )
+ { /* op3 : 3 srcs */
+ if(pALU->m_Word1_OP3.f.src2_sel < SQ_ALU_SRC_GPR_SIZE)
+ {
+ pALU->m_Word1_OP3.f.src2_sel = pALU->m_Word1_OP3.f.src2_sel + unRegOffset - unMinRegIndex;
+ }
+ else if(pALU->m_Word1_OP3.f.src2_sel >= SQ_ALU_SRC_CFILE_BASE)
+ {
+ pALU->m_Word1_OP3.f.src2_sel += unConstOffset;
+ }
+ if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE)
+ {
+ pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex;
+ }
+ else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE)
+ {
+ pALU->m_Word0.f.src1_sel += unConstOffset;
+ }
+ }
+ else
+ {
+ if(pAsm->bR6xx)
+ {
+ uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f6.alu_inst, 0);
+ }
+ else
+ {
+ uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f.alu_inst, 0);
+ }
+ if(2 == uNumSrc)
+ { /* 2 srcs */
+ if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE)
+ {
+ pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex;
+ }
+ else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE)
+ {
+ pALU->m_Word0.f.src1_sel += unConstOffset;
+ }
+ }
+ }
+ pALU = (R700ALUInstruction*)(pALU->pNextInst);
+ }
+ }
+ pInst = pInst->pNextInst;
+ };
+ }
+
/* Put sub into main */
plstCFmain->pTail->pNextInst = plstCFsub->pHead;
plstCFmain->pTail = plstCFsub->pTail;
{
pAsm->callers[i].cf_ptr->m_Word0.f.addr
= pAsm->subs[pAsm->callers[i].subDescIndex].unCFoffset;
+
+ if(NULL != pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc)
+ {
+ unMinRegIndex = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->pCompiledSub->MinRegIndex;
+ unRegOffset = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->maxStartReg;
+
+ if(NULL != pAsm->callers[i].prelude_cf_ptr)
+ {
+ pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].prelude_cf_ptr);
+ pALU = pCF_ALU->m_pLinkedALUInstruction;
+ for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
+ {
+ pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex;
+ pALU = (R700ALUInstruction*)(pALU->pNextInst);
+ }
+ }
+ if(NULL != pAsm->callers[i].finale_cf_ptr)
+ {
+ pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].finale_cf_ptr);
+ pALU = pCF_ALU->m_pLinkedALUInstruction;
+ for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
+ {
+ pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex;
+ pALU = (R700ALUInstruction*)(pALU->pNextInst);
+ }
+ }
+ }
}
return GL_TRUE;
}
+GLboolean callPreSub(r700_AssemblerBase* pAsm,
+ LOADABLE_SCRIPT_SIGNITURE scriptSigniture,
+ COMPILED_SUB * pCompiledSub,
+ GLshort uOutReg,
+ GLshort uNumValidSrc)
+{
+ /* save assemble context */
+ GLuint starting_temp_register_number_save;
+ GLuint number_used_registers_save;
+ GLuint uFirstHelpReg_save;
+ GLuint uHelpReg_save;
+ GLuint uiCurInst_save;
+ struct prog_instruction *pILInst_save;
+ PRESUB_DESC * pPresubDesc;
+ GLboolean bRet;
+ int i;
+
+ R700ControlFlowGenericClause* prelude_cf_ptr = NULL;
+
+ /* copy srcs to presub inputs */
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+ for(i=0; i<uNumValidSrc; i++)
+ {
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = pCompiledSub->srcRegIndex[i];
+ pAsm->D.dst.writex = 1;
+ pAsm->D.dst.writey = 1;
+ pAsm->D.dst.writez = 1;
+ pAsm->D.dst.writew = 1;
+
+ if( GL_FALSE == assemble_src(pAsm, i, 0) )
+ {
+ return GL_FALSE;
+ }
+
+ next_ins(pAsm);
+ }
+ if(uNumValidSrc > 0)
+ {
+ prelude_cf_ptr = pAsm->cf_current_alu_clause_ptr;
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+ }
+
+ /* browse thro existing presubs. */
+ for(i=0; i<pAsm->unNumPresub; i++)
+ {
+ if(pAsm->presubs[i].sptSigniture == scriptSigniture)
+ {
+ break;
+ }
+ }
+
+ if(i == pAsm->unNumPresub)
+ { /* not loaded yet */
+ /* save assemble context */
+ number_used_registers_save = pAsm->number_used_registers;
+ uFirstHelpReg_save = pAsm->uFirstHelpReg;
+ uHelpReg_save = pAsm->uHelpReg;
+ starting_temp_register_number_save = pAsm->starting_temp_register_number;
+ pILInst_save = pAsm->pILInst;
+ uiCurInst_save = pAsm->uiCurInst;
+
+ /* alloc in presub */
+ if( (pAsm->unNumPresub + 1) > pAsm->unPresubArraySize )
+ {
+ pAsm->presubs = (PRESUB_DESC*)_mesa_realloc( (void *)pAsm->presubs,
+ sizeof(PRESUB_DESC) * pAsm->unPresubArraySize,
+ sizeof(PRESUB_DESC) * (pAsm->unPresubArraySize + 4) );
+ if(NULL == pAsm->presubs)
+ {
+ radeon_error("No memeory to allocate built in shader function description structures. \n");
+ return GL_FALSE;
+ }
+ pAsm->unPresubArraySize += 4;
+ }
+
+ pPresubDesc = &(pAsm->presubs[i]);
+ pPresubDesc->sptSigniture = scriptSigniture;
+
+ /* constants offsets need to be final resolved at reloc. */
+ if(0 == pAsm->unNumPresub)
+ {
+ pPresubDesc->unConstantsStart = 0;
+ }
+ else
+ {
+ pPresubDesc->unConstantsStart = pAsm->presubs[i-1].unConstantsStart
+ + pAsm->presubs[i-1].pCompiledSub->NumParameters;
+ }
+
+ pPresubDesc->pCompiledSub = pCompiledSub;
+
+ pPresubDesc->subIL_Shift = pAsm->unCurNumILInsts;
+ pPresubDesc->maxStartReg = uFirstHelpReg_save;
+ pAsm->unCurNumILInsts += pCompiledSub->NumInstructions;
+
+ pAsm->unNumPresub++;
+
+ /* setup new assemble context */
+ pAsm->starting_temp_register_number = 0;
+ pAsm->number_used_registers = pCompiledSub->NumTemporaries;
+ pAsm->uFirstHelpReg = pAsm->number_used_registers;
+ pAsm->uHelpReg = pAsm->uFirstHelpReg;
+
+ bRet = assemble_CAL(pAsm,
+ 0,
+ pPresubDesc->subIL_Shift,
+ pCompiledSub->NumInstructions,
+ pCompiledSub->Instructions,
+ pPresubDesc);
+
+
+ pPresubDesc->number_used_registers = pAsm->number_used_registers;
+
+ /* restore assemble context */
+ pAsm->number_used_registers = number_used_registers_save;
+ pAsm->uFirstHelpReg = uFirstHelpReg_save;
+ pAsm->uHelpReg = uHelpReg_save;
+ pAsm->starting_temp_register_number = starting_temp_register_number_save;
+ pAsm->pILInst = pILInst_save;
+ pAsm->uiCurInst = uiCurInst_save;
+ }
+ else
+ { /* was loaded */
+ pPresubDesc = &(pAsm->presubs[i]);
+
+ bRet = assemble_CAL(pAsm,
+ 0,
+ pPresubDesc->subIL_Shift,
+ pCompiledSub->NumInstructions,
+ pCompiledSub->Instructions,
+ pPresubDesc);
+ }
+
+ if(GL_FALSE == bRet)
+ {
+ radeon_error("Shader presub assemble failed. \n");
+ }
+ else
+ {
+ /* copy presub output to real dst */
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = pCompiledSub->dstRegIndex;
+ pAsm->S[0].src.swizzlex = pCompiledSub->outputSwizzleX;
+ pAsm->S[0].src.swizzley = pCompiledSub->outputSwizzleY;
+ pAsm->S[0].src.swizzlez = pCompiledSub->outputSwizzleZ;
+ pAsm->S[0].src.swizzlew = pCompiledSub->outputSwizzleW;
+
+ next_ins(pAsm);
+
+ pAsm->callers[pAsm->unCallerArrayPointer - 1].finale_cf_ptr = pAsm->cf_current_alu_clause_ptr;
+ pAsm->callers[pAsm->unCallerArrayPointer - 1].prelude_cf_ptr = prelude_cf_ptr;
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+ }
+
+ if( (pPresubDesc->number_used_registers + pAsm->uFirstHelpReg) > pAsm->number_used_registers )
+ {
+ pAsm->number_used_registers = pPresubDesc->number_used_registers + pAsm->uFirstHelpReg;
+ }
+ if(pAsm->uFirstHelpReg > pPresubDesc->maxStartReg)
+ {
+ pPresubDesc->maxStartReg = pAsm->uFirstHelpReg;
+ }
+
+ return bRet;
+}
+
GLboolean Process_Export(r700_AssemblerBase* pAsm,
GLuint type,
GLuint export_starting_index,
FREE(pR700AsmCode->callers);
}
+ if(NULL != pR700AsmCode->presubs)
+ {
+ FREE(pR700AsmCode->presubs);
+ }
+
return GL_TRUE;
}