2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
33 #include "main/mtypes.h"
34 #include "main/imports.h"
36 #include "radeon_debug.h"
37 #include "r600_context.h"
39 #include "r700_assembler.h"
41 #define USE_CF_FOR_CONTINUE_BREAK 1
42 #define USE_CF_FOR_POP_AFTER 1
44 BITS
addrmode_PVSDST(PVSDST
* pPVSDST
)
46 return pPVSDST
->addrmode0
| ((BITS
)pPVSDST
->addrmode1
<< 1);
49 void setaddrmode_PVSDST(PVSDST
* pPVSDST
, BITS addrmode
)
51 pPVSDST
->addrmode0
= addrmode
& 1;
52 pPVSDST
->addrmode1
= (addrmode
>> 1) & 1;
55 void nomask_PVSDST(PVSDST
* pPVSDST
)
57 pPVSDST
->writex
= pPVSDST
->writey
= pPVSDST
->writez
= pPVSDST
->writew
= 1;
60 BITS
addrmode_PVSSRC(PVSSRC
* pPVSSRC
)
62 return pPVSSRC
->addrmode0
| ((BITS
)pPVSSRC
->addrmode1
<< 1);
65 void setaddrmode_PVSSRC(PVSSRC
* pPVSSRC
, BITS addrmode
)
67 pPVSSRC
->addrmode0
= addrmode
& 1;
68 pPVSSRC
->addrmode1
= (addrmode
>> 1) & 1;
72 void setswizzle_PVSSRC(PVSSRC
* pPVSSRC
, BITS swz
)
77 pPVSSRC
->swizzlew
= swz
;
80 void noswizzle_PVSSRC(PVSSRC
* pPVSSRC
)
82 pPVSSRC
->swizzlex
= SQ_SEL_X
;
83 pPVSSRC
->swizzley
= SQ_SEL_Y
;
84 pPVSSRC
->swizzlez
= SQ_SEL_Z
;
85 pPVSSRC
->swizzlew
= SQ_SEL_W
;
89 swizzleagain_PVSSRC(PVSSRC
* pPVSSRC
, BITS x
, BITS y
, BITS z
, BITS w
)
93 case SQ_SEL_X
: x
= pPVSSRC
->swizzlex
;
95 case SQ_SEL_Y
: x
= pPVSSRC
->swizzley
;
97 case SQ_SEL_Z
: x
= pPVSSRC
->swizzlez
;
99 case SQ_SEL_W
: x
= pPVSSRC
->swizzlew
;
106 case SQ_SEL_X
: y
= pPVSSRC
->swizzlex
;
108 case SQ_SEL_Y
: y
= pPVSSRC
->swizzley
;
110 case SQ_SEL_Z
: y
= pPVSSRC
->swizzlez
;
112 case SQ_SEL_W
: y
= pPVSSRC
->swizzlew
;
119 case SQ_SEL_X
: z
= pPVSSRC
->swizzlex
;
121 case SQ_SEL_Y
: z
= pPVSSRC
->swizzley
;
123 case SQ_SEL_Z
: z
= pPVSSRC
->swizzlez
;
125 case SQ_SEL_W
: z
= pPVSSRC
->swizzlew
;
132 case SQ_SEL_X
: w
= pPVSSRC
->swizzlex
;
134 case SQ_SEL_Y
: w
= pPVSSRC
->swizzley
;
136 case SQ_SEL_Z
: w
= pPVSSRC
->swizzlez
;
138 case SQ_SEL_W
: w
= pPVSSRC
->swizzlew
;
143 pPVSSRC
->swizzlex
= x
;
144 pPVSSRC
->swizzley
= y
;
145 pPVSSRC
->swizzlez
= z
;
146 pPVSSRC
->swizzlew
= w
;
149 void neg_PVSSRC(PVSSRC
* pPVSSRC
)
157 void noneg_PVSSRC(PVSSRC
* pPVSSRC
)
165 // negate argument (for SUB instead of ADD and alike)
166 void flipneg_PVSSRC(PVSSRC
* pPVSSRC
)
168 pPVSSRC
->negx
= !pPVSSRC
->negx
;
169 pPVSSRC
->negy
= !pPVSSRC
->negy
;
170 pPVSSRC
->negz
= !pPVSSRC
->negz
;
171 pPVSSRC
->negw
= !pPVSSRC
->negw
;
174 void zerocomp_PVSSRC(PVSSRC
* pPVSSRC
, int c
)
178 case 0: pPVSSRC
->swizzlex
= SQ_SEL_0
; pPVSSRC
->negx
= 0; break;
179 case 1: pPVSSRC
->swizzley
= SQ_SEL_0
; pPVSSRC
->negy
= 0; break;
180 case 2: pPVSSRC
->swizzlez
= SQ_SEL_0
; pPVSSRC
->negz
= 0; break;
181 case 3: pPVSSRC
->swizzlew
= SQ_SEL_0
; pPVSSRC
->negw
= 0; break;
186 void onecomp_PVSSRC(PVSSRC
* pPVSSRC
, int c
)
190 case 0: pPVSSRC
->swizzlex
= SQ_SEL_1
; pPVSSRC
->negx
= 0; break;
191 case 1: pPVSSRC
->swizzley
= SQ_SEL_1
; pPVSSRC
->negy
= 0; break;
192 case 2: pPVSSRC
->swizzlez
= SQ_SEL_1
; pPVSSRC
->negz
= 0; break;
193 case 3: pPVSSRC
->swizzlew
= SQ_SEL_1
; pPVSSRC
->negw
= 0; break;
198 BITS
is_misc_component_exported(VAP_OUT_VTX_FMT_0
* pOutVTXFmt0
)
200 return (pOutVTXFmt0
->point_size
|
201 pOutVTXFmt0
->edge_flag
|
202 pOutVTXFmt0
->rta_index
|
203 pOutVTXFmt0
->kill_flag
|
204 pOutVTXFmt0
->viewport_index
);
207 BITS
is_depth_component_exported(OUT_FRAGMENT_FMT_0
* pFPOutFmt
)
209 return (pFPOutFmt
->depth
|
210 pFPOutFmt
->stencil_ref
|
212 pFPOutFmt
->coverage_to_mask
);
215 GLboolean
is_reduction_opcode(PVSDWORD
* dest
)
217 if (dest
->dst
.op3
== 0)
219 if ( (dest
->dst
.opcode
== SQ_OP2_INST_DOT4
|| dest
->dst
.opcode
== SQ_OP2_INST_DOT4_IEEE
|| dest
->dst
.opcode
== SQ_OP2_INST_CUBE
) )
227 GLuint
GetSurfaceFormat(GLenum eType
, GLuint nChannels
, GLuint
* pClient_size
)
229 GLuint format
= FMT_INVALID
;
230 GLuint uiElemSize
= 0;
235 case GL_UNSIGNED_BYTE
:
240 format
= FMT_8
; break;
242 format
= FMT_8_8
; break;
244 format
= FMT_8_8_8
; break;
246 format
= FMT_8_8_8_8
; break;
252 case GL_UNSIGNED_SHORT
:
258 format
= FMT_16
; break;
260 format
= FMT_16_16
; break;
262 format
= FMT_16_16_16
; break;
264 format
= FMT_16_16_16_16
; break;
270 case GL_UNSIGNED_INT
:
276 format
= FMT_32
; break;
278 format
= FMT_32_32
; break;
280 format
= FMT_32_32_32
; break;
282 format
= FMT_32_32_32_32
; break;
293 format
= FMT_32_FLOAT
; break;
295 format
= FMT_32_32_FLOAT
; break;
297 format
= FMT_32_32_32_FLOAT
; break;
299 format
= FMT_32_32_32_32_FLOAT
; break;
309 format
= FMT_32_FLOAT
; break;
311 format
= FMT_32_32_FLOAT
; break;
313 format
= FMT_32_32_32_FLOAT
; break;
315 format
= FMT_32_32_32_32_FLOAT
; break;
322 //GL_ASSERT_NO_CASE();
325 if(NULL
!= pClient_size
)
327 *pClient_size
= uiElemSize
* nChannels
;
333 unsigned int r700GetNumOperands(r700_AssemblerBase
* pAsm
)
340 switch (pAsm
->D
.dst
.opcode
)
342 case SQ_OP2_INST_ADD
:
343 case SQ_OP2_INST_KILLE
:
344 case SQ_OP2_INST_KILLGT
:
345 case SQ_OP2_INST_KILLGE
:
346 case SQ_OP2_INST_KILLNE
:
347 case SQ_OP2_INST_MUL
:
348 case SQ_OP2_INST_MAX
:
349 case SQ_OP2_INST_MIN
:
350 //case SQ_OP2_INST_MAX_DX10:
351 //case SQ_OP2_INST_MIN_DX10:
352 case SQ_OP2_INST_SETE
:
353 case SQ_OP2_INST_SETNE
:
354 case SQ_OP2_INST_SETGT
:
355 case SQ_OP2_INST_SETGE
:
356 case SQ_OP2_INST_PRED_SETE
:
357 case SQ_OP2_INST_PRED_SETGT
:
358 case SQ_OP2_INST_PRED_SETGE
:
359 case SQ_OP2_INST_PRED_SETNE
:
360 case SQ_OP2_INST_DOT4
:
361 case SQ_OP2_INST_DOT4_IEEE
:
362 case SQ_OP2_INST_CUBE
:
365 case SQ_OP2_INST_MOV
:
366 case SQ_OP2_INST_MOVA_FLOOR
:
367 case SQ_OP2_INST_FRACT
:
368 case SQ_OP2_INST_FLOOR
:
369 case SQ_OP2_INST_TRUNC
:
370 case SQ_OP2_INST_EXP_IEEE
:
371 case SQ_OP2_INST_LOG_CLAMPED
:
372 case SQ_OP2_INST_LOG_IEEE
:
373 case SQ_OP2_INST_RECIP_IEEE
:
374 case SQ_OP2_INST_RECIPSQRT_IEEE
:
375 case SQ_OP2_INST_FLT_TO_INT
:
376 case SQ_OP2_INST_SIN
:
377 case SQ_OP2_INST_COS
:
380 default: radeon_error(
381 "Need instruction operand number for %x.\n", pAsm
->D
.dst
.opcode
);
387 int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt
, r700_AssemblerBase
* pAsm
, R700_Shader
* pShader
)
391 Init_R700_Shader(pShader
);
392 pAsm
->pR700Shader
= pShader
;
393 pAsm
->currentShaderType
= spt
;
395 pAsm
->cf_last_export_ptr
= NULL
;
397 pAsm
->cf_current_export_clause_ptr
= NULL
;
398 pAsm
->cf_current_alu_clause_ptr
= NULL
;
399 pAsm
->cf_current_tex_clause_ptr
= NULL
;
400 pAsm
->cf_current_vtx_clause_ptr
= NULL
;
401 pAsm
->cf_current_cf_clause_ptr
= NULL
;
403 // No clause has been created yet
404 pAsm
->cf_current_clause_type
= CF_EMPTY_CLAUSE
;
406 pAsm
->number_of_colorandz_exports
= 0;
407 pAsm
->number_of_exports
= 0;
408 pAsm
->number_of_export_opcodes
= 0;
410 pAsm
->alu_x_opcode
= 0;
419 pAsm
->uLastPosUpdate
= 0;
421 *(BITS
*) &pAsm
->fp_stOutFmt0
= 0;
425 pAsm
->number_used_registers
= 0;
426 pAsm
->uUsedConsts
= 256;
430 pAsm
->uBoolConsts
= 0;
431 pAsm
->uIntConsts
= 0;
436 pAsm
->fc_stack
[0].type
= FC_NONE
;
441 pAsm
->aArgSubst
[3] = (-1);
445 for (i
=0; i
<NUMBER_OF_OUTPUT_COLORS
; i
++)
447 pAsm
->color_export_register_number
[i
] = (-1);
451 pAsm
->depth_export_register_number
= (-1);
452 pAsm
->stencil_export_register_number
= (-1);
453 pAsm
->coverage_to_mask_export_register_number
= (-1);
454 pAsm
->mask_export_register_number
= (-1);
456 pAsm
->starting_export_register_number
= 0;
457 pAsm
->starting_vfetch_register_number
= 0;
458 pAsm
->starting_temp_register_number
= 0;
459 pAsm
->uFirstHelpReg
= 0;
461 pAsm
->input_position_is_used
= GL_FALSE
;
462 pAsm
->input_normal_is_used
= GL_FALSE
;
464 for (i
=0; i
<NUMBER_OF_INPUT_COLORS
; i
++)
466 pAsm
->input_color_is_used
[ i
] = GL_FALSE
;
469 for (i
=0; i
<NUMBER_OF_TEXTURE_UNITS
; i
++)
471 pAsm
->input_texture_unit_is_used
[ i
] = GL_FALSE
;
474 for (i
=0; i
<VERT_ATTRIB_MAX
; i
++)
476 pAsm
->vfetch_instruction_ptr_array
[ i
] = NULL
;
479 pAsm
->number_of_inputs
= 0;
481 pAsm
->is_tex
= GL_FALSE
;
482 pAsm
->need_tex_barrier
= GL_FALSE
;
485 pAsm
->unSubArraySize
= 0;
486 pAsm
->unSubArrayPointer
= 0;
487 pAsm
->callers
= NULL
;
488 pAsm
->unCallerArraySize
= 0;
489 pAsm
->unCallerArrayPointer
= 0;
492 pAsm
->CALLSTACK
[0].FCSP_BeforeEntry
= 0;
493 pAsm
->CALLSTACK
[0].plstCFInstructions_local
494 = &(pAsm
->pR700Shader
->lstCFInstructions
);
496 pAsm
->CALLSTACK
[0].max
= 0;
497 pAsm
->CALLSTACK
[0].current
= 0;
499 SetActiveCFlist(pAsm
->pR700Shader
, pAsm
->CALLSTACK
[0].plstCFInstructions_local
);
506 GLboolean
IsTex(gl_inst_opcode Opcode
)
508 if( (OPCODE_TEX
==Opcode
) || (OPCODE_TXP
==Opcode
) || (OPCODE_TXB
==Opcode
) )
515 GLboolean
IsAlu(gl_inst_opcode Opcode
)
517 //TODO : more for fc and ex for higher spec.
525 int check_current_clause(r700_AssemblerBase
* pAsm
,
526 CF_CLAUSE_TYPE new_clause_type
)
528 if (pAsm
->cf_current_clause_type
!= new_clause_type
)
529 { //Close last open clause
530 switch (pAsm
->cf_current_clause_type
)
533 if ( pAsm
->cf_current_alu_clause_ptr
!= NULL
)
535 pAsm
->cf_current_alu_clause_ptr
= NULL
;
539 if ( pAsm
->cf_current_vtx_clause_ptr
!= NULL
)
541 pAsm
->cf_current_vtx_clause_ptr
= NULL
;
545 if ( pAsm
->cf_current_tex_clause_ptr
!= NULL
)
547 pAsm
->cf_current_tex_clause_ptr
= NULL
;
550 case CF_EXPORT_CLAUSE
:
551 if ( pAsm
->cf_current_export_clause_ptr
!= NULL
)
553 pAsm
->cf_current_export_clause_ptr
= NULL
;
556 case CF_OTHER_CLAUSE
:
557 if ( pAsm
->cf_current_cf_clause_ptr
!= NULL
)
559 pAsm
->cf_current_cf_clause_ptr
= NULL
;
562 case CF_EMPTY_CLAUSE
:
566 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type
);
570 pAsm
->cf_current_clause_type
= CF_EMPTY_CLAUSE
;
573 switch (new_clause_type
)
576 pAsm
->cf_current_clause_type
= CF_ALU_CLAUSE
;
579 pAsm
->cf_current_clause_type
= CF_VTX_CLAUSE
;
582 pAsm
->cf_current_clause_type
= CF_TEX_CLAUSE
;
584 case CF_EXPORT_CLAUSE
:
586 R700ControlFlowSXClause
* pR700ControlFlowSXClause
587 = (R700ControlFlowSXClause
*) CALLOC_STRUCT(R700ControlFlowSXClause
);
589 // Add new export instruction to control flow program
590 if (pR700ControlFlowSXClause
!= 0)
592 pAsm
->cf_current_export_clause_ptr
= pR700ControlFlowSXClause
;
593 Init_R700ControlFlowSXClause(pR700ControlFlowSXClause
);
594 AddCFInstruction( pAsm
->pR700Shader
,
595 (R700ControlFlowInstruction
*)pR700ControlFlowSXClause
);
600 "Error allocating new EXPORT CF instruction in check_current_clause. \n");
603 pAsm
->cf_current_clause_type
= CF_EXPORT_CLAUSE
;
606 case CF_EMPTY_CLAUSE
:
608 case CF_OTHER_CLAUSE
:
609 pAsm
->cf_current_clause_type
= CF_OTHER_CLAUSE
;
613 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type
);
621 GLboolean
add_cf_instruction(r700_AssemblerBase
* pAsm
)
623 if(GL_FALSE
== check_current_clause(pAsm
, CF_OTHER_CLAUSE
))
628 pAsm
->cf_current_cf_clause_ptr
=
629 (R700ControlFlowGenericClause
*) CALLOC_STRUCT(R700ControlFlowGenericClause
);
631 if (pAsm
->cf_current_cf_clause_ptr
!= NULL
)
633 Init_R700ControlFlowGenericClause(pAsm
->cf_current_cf_clause_ptr
);
634 AddCFInstruction( pAsm
->pR700Shader
,
635 (R700ControlFlowInstruction
*)pAsm
->cf_current_cf_clause_ptr
);
639 radeon_error("Could not allocate a new VFetch CF instruction.\n");
646 GLboolean
add_vfetch_instruction(r700_AssemblerBase
* pAsm
,
647 R700VertexInstruction
* vertex_instruction_ptr
)
649 if( GL_FALSE
== check_current_clause(pAsm
, CF_VTX_CLAUSE
) )
654 if( pAsm
->cf_current_vtx_clause_ptr
== NULL
||
655 ( (pAsm
->cf_current_vtx_clause_ptr
!= NULL
) &&
656 (pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.count
>= GetCFMaxInstructions(pAsm
->cf_current_vtx_clause_ptr
->m_ShaderInstType
)-1)
659 // Create new Vfetch control flow instruction for this new clause
660 pAsm
->cf_current_vtx_clause_ptr
= (R700ControlFlowGenericClause
*) CALLOC_STRUCT(R700ControlFlowGenericClause
);
662 if (pAsm
->cf_current_vtx_clause_ptr
!= NULL
)
664 Init_R700ControlFlowGenericClause(pAsm
->cf_current_vtx_clause_ptr
);
665 AddCFInstruction( pAsm
->pR700Shader
,
666 (R700ControlFlowInstruction
*)pAsm
->cf_current_vtx_clause_ptr
);
670 radeon_error("Could not allocate a new VFetch CF instruction.\n");
674 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.pop_count
= 0x0;
675 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
676 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
677 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.count
= 0x0;
678 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
679 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
680 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_VTX
;
681 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
682 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
684 LinkVertexInstruction(pAsm
->cf_current_vtx_clause_ptr
, vertex_instruction_ptr
);
688 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.count
++;
691 AddVTXInstruction(pAsm
->pR700Shader
, vertex_instruction_ptr
);
696 GLboolean
add_tex_instruction(r700_AssemblerBase
* pAsm
,
697 R700TextureInstruction
* tex_instruction_ptr
)
699 if ( GL_FALSE
== check_current_clause(pAsm
, CF_TEX_CLAUSE
) )
704 if ( pAsm
->cf_current_tex_clause_ptr
== NULL
||
705 ( (pAsm
->cf_current_tex_clause_ptr
!= NULL
) &&
706 (pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.count
>= GetCFMaxInstructions(pAsm
->cf_current_tex_clause_ptr
->m_ShaderInstType
)-1)
709 // new tex cf instruction for this new clause
710 pAsm
->cf_current_tex_clause_ptr
= (R700ControlFlowGenericClause
*) CALLOC_STRUCT(R700ControlFlowGenericClause
);
712 if (pAsm
->cf_current_tex_clause_ptr
!= NULL
)
714 Init_R700ControlFlowGenericClause(pAsm
->cf_current_tex_clause_ptr
);
715 AddCFInstruction( pAsm
->pR700Shader
,
716 (R700ControlFlowInstruction
*)pAsm
->cf_current_tex_clause_ptr
);
720 radeon_error("Could not allocate a new TEX CF instruction.\n");
724 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.pop_count
= 0x0;
725 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
726 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
728 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
729 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
730 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_TEX
;
731 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
732 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.barrier
= 0x0; //0x1;
736 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.count
++;
739 // If this clause constains any TEX instruction that is dependent on a previous instruction,
740 // set the barrier bit
741 if( pAsm
->pInstDeps
[pAsm
->uiCurInst
].nDstDep
> (-1) || pAsm
->need_tex_barrier
== GL_TRUE
)
743 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
746 if(NULL
== pAsm
->cf_current_tex_clause_ptr
->m_pLinkedTEXInstruction
)
748 pAsm
->cf_current_tex_clause_ptr
->m_pLinkedTEXInstruction
= tex_instruction_ptr
;
749 tex_instruction_ptr
->m_pLinkedGenericClause
= pAsm
->cf_current_tex_clause_ptr
;
752 AddTEXInstruction(pAsm
->pR700Shader
, tex_instruction_ptr
);
757 GLboolean
assemble_vfetch_instruction(r700_AssemblerBase
* pAsm
,
759 GLuint destination_register
,
760 GLuint number_of_elements
,
761 GLenum dataElementType
,
762 VTX_FETCH_METHOD
* pFetchMethod
)
764 GLuint client_size_inbyte
;
766 GLuint mega_fetch_count
;
767 GLuint is_mega_fetch_flag
;
769 R700VertexGenericFetch
* vfetch_instruction_ptr
;
770 R700VertexGenericFetch
* assembled_vfetch_instruction_ptr
= pAsm
->vfetch_instruction_ptr_array
[ gl_client_id
];
772 if (assembled_vfetch_instruction_ptr
== NULL
)
774 vfetch_instruction_ptr
= (R700VertexGenericFetch
*) CALLOC_STRUCT(R700VertexGenericFetch
);
775 if (vfetch_instruction_ptr
== NULL
)
779 Init_R700VertexGenericFetch(vfetch_instruction_ptr
);
783 vfetch_instruction_ptr
= assembled_vfetch_instruction_ptr
;
786 data_format
= GetSurfaceFormat(dataElementType
, number_of_elements
, &client_size_inbyte
);
788 if(GL_TRUE
== pFetchMethod
->bEnableMini
) //More conditions here
794 mega_fetch_count
= MEGA_FETCH_BYTES
- 1;
795 is_mega_fetch_flag
= 0x1;
796 pFetchMethod
->mega_fetch_remainder
= MEGA_FETCH_BYTES
- client_size_inbyte
;
799 vfetch_instruction_ptr
->m_Word0
.f
.vtx_inst
= SQ_VTX_INST_FETCH
;
800 vfetch_instruction_ptr
->m_Word0
.f
.fetch_type
= SQ_VTX_FETCH_VERTEX_DATA
;
801 vfetch_instruction_ptr
->m_Word0
.f
.fetch_whole_quad
= 0x0;
803 vfetch_instruction_ptr
->m_Word0
.f
.buffer_id
= gl_client_id
;
804 vfetch_instruction_ptr
->m_Word0
.f
.src_gpr
= 0x0;
805 vfetch_instruction_ptr
->m_Word0
.f
.src_rel
= SQ_ABSOLUTE
;
806 vfetch_instruction_ptr
->m_Word0
.f
.src_sel_x
= SQ_SEL_X
;
807 vfetch_instruction_ptr
->m_Word0
.f
.mega_fetch_count
= mega_fetch_count
;
809 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_x
= (number_of_elements
< 1) ? SQ_SEL_0
: SQ_SEL_X
;
810 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_y
= (number_of_elements
< 2) ? SQ_SEL_0
: SQ_SEL_Y
;
811 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_z
= (number_of_elements
< 3) ? SQ_SEL_0
: SQ_SEL_Z
;
812 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_w
= (number_of_elements
< 4) ? SQ_SEL_1
: SQ_SEL_W
;
814 vfetch_instruction_ptr
->m_Word1
.f
.use_const_fields
= 1;
816 // Destination register
817 vfetch_instruction_ptr
->m_Word1_GPR
.f
.dst_gpr
= destination_register
;
818 vfetch_instruction_ptr
->m_Word1_GPR
.f
.dst_rel
= SQ_ABSOLUTE
;
820 vfetch_instruction_ptr
->m_Word2
.f
.offset
= 0;
821 vfetch_instruction_ptr
->m_Word2
.f
.const_buf_no_stride
= 0x0;
823 vfetch_instruction_ptr
->m_Word2
.f
.mega_fetch
= is_mega_fetch_flag
;
825 if (assembled_vfetch_instruction_ptr
== NULL
)
827 if ( GL_FALSE
== add_vfetch_instruction(pAsm
, (R700VertexInstruction
*)vfetch_instruction_ptr
) )
832 if (pAsm
->vfetch_instruction_ptr_array
[ gl_client_id
] != NULL
)
838 pAsm
->vfetch_instruction_ptr_array
[ gl_client_id
] = vfetch_instruction_ptr
;
845 GLboolean
assemble_vfetch_instruction2(r700_AssemblerBase
* pAsm
,
846 GLuint destination_register
,
852 VTX_FETCH_METHOD
* pFetchMethod
)
854 GLuint client_size_inbyte
;
856 GLuint mega_fetch_count
;
857 GLuint is_mega_fetch_flag
;
859 R700VertexGenericFetch
* vfetch_instruction_ptr
;
860 R700VertexGenericFetch
* assembled_vfetch_instruction_ptr
861 = pAsm
->vfetch_instruction_ptr_array
[element
];
863 if (assembled_vfetch_instruction_ptr
== NULL
)
865 vfetch_instruction_ptr
= (R700VertexGenericFetch
*) CALLOC_STRUCT(R700VertexGenericFetch
);
866 if (vfetch_instruction_ptr
== NULL
)
870 Init_R700VertexGenericFetch(vfetch_instruction_ptr
);
874 vfetch_instruction_ptr
= assembled_vfetch_instruction_ptr
;
877 data_format
= GetSurfaceFormat(type
, size
, &client_size_inbyte
);
879 if(GL_TRUE
== pFetchMethod
->bEnableMini
) //More conditions here
885 mega_fetch_count
= MEGA_FETCH_BYTES
- 1;
886 is_mega_fetch_flag
= 0x1;
887 pFetchMethod
->mega_fetch_remainder
= MEGA_FETCH_BYTES
- client_size_inbyte
;
890 vfetch_instruction_ptr
->m_Word0
.f
.vtx_inst
= SQ_VTX_INST_FETCH
;
891 vfetch_instruction_ptr
->m_Word0
.f
.fetch_type
= SQ_VTX_FETCH_VERTEX_DATA
;
892 vfetch_instruction_ptr
->m_Word0
.f
.fetch_whole_quad
= 0x0;
894 vfetch_instruction_ptr
->m_Word0
.f
.buffer_id
= element
;
895 vfetch_instruction_ptr
->m_Word0
.f
.src_gpr
= 0x0;
896 vfetch_instruction_ptr
->m_Word0
.f
.src_rel
= SQ_ABSOLUTE
;
897 vfetch_instruction_ptr
->m_Word0
.f
.src_sel_x
= SQ_SEL_X
;
898 vfetch_instruction_ptr
->m_Word0
.f
.mega_fetch_count
= mega_fetch_count
;
900 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_x
= (size
< 1) ? SQ_SEL_0
: SQ_SEL_X
;
901 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_y
= (size
< 2) ? SQ_SEL_0
: SQ_SEL_Y
;
902 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_z
= (size
< 3) ? SQ_SEL_0
: SQ_SEL_Z
;
903 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_w
= (size
< 4) ? SQ_SEL_1
: SQ_SEL_W
;
905 vfetch_instruction_ptr
->m_Word1
.f
.use_const_fields
= 1;
906 vfetch_instruction_ptr
->m_Word1
.f
.data_format
= data_format
;
907 vfetch_instruction_ptr
->m_Word2
.f
.endian_swap
= SQ_ENDIAN_NONE
;
911 vfetch_instruction_ptr
->m_Word1
.f
.format_comp_all
= SQ_FORMAT_COMP_SIGNED
;
915 vfetch_instruction_ptr
->m_Word1
.f
.format_comp_all
= SQ_FORMAT_COMP_UNSIGNED
;
918 if(GL_TRUE
== normalize
)
920 vfetch_instruction_ptr
->m_Word1
.f
.num_format_all
= SQ_NUM_FORMAT_NORM
;
924 vfetch_instruction_ptr
->m_Word1
.f
.num_format_all
= SQ_NUM_FORMAT_INT
;
927 // Destination register
928 vfetch_instruction_ptr
->m_Word1_GPR
.f
.dst_gpr
= destination_register
;
929 vfetch_instruction_ptr
->m_Word1_GPR
.f
.dst_rel
= SQ_ABSOLUTE
;
931 vfetch_instruction_ptr
->m_Word2
.f
.offset
= 0;
932 vfetch_instruction_ptr
->m_Word2
.f
.const_buf_no_stride
= 0x0;
934 vfetch_instruction_ptr
->m_Word2
.f
.mega_fetch
= is_mega_fetch_flag
;
936 if (assembled_vfetch_instruction_ptr
== NULL
)
938 if ( GL_FALSE
== add_vfetch_instruction(pAsm
, (R700VertexInstruction
*)vfetch_instruction_ptr
) )
943 if (pAsm
->vfetch_instruction_ptr_array
[element
] != NULL
)
949 pAsm
->vfetch_instruction_ptr_array
[element
] = vfetch_instruction_ptr
;
956 GLboolean
cleanup_vfetch_instructions(r700_AssemblerBase
* pAsm
)
959 pAsm
->cf_current_clause_type
= CF_EMPTY_CLAUSE
;
960 pAsm
->cf_current_vtx_clause_ptr
= NULL
;
962 for (i
=0; i
<VERT_ATTRIB_MAX
; i
++)
964 pAsm
->vfetch_instruction_ptr_array
[ i
] = NULL
;
967 cleanup_vfetch_shaderinst(pAsm
->pR700Shader
);
972 GLuint
gethelpr(r700_AssemblerBase
* pAsm
)
974 GLuint r
= pAsm
->uHelpReg
;
976 if (pAsm
->uHelpReg
> pAsm
->number_used_registers
)
978 pAsm
->number_used_registers
= pAsm
->uHelpReg
;
982 void resethelpr(r700_AssemblerBase
* pAsm
)
984 pAsm
->uHelpReg
= pAsm
->uFirstHelpReg
;
987 void checkop_init(r700_AssemblerBase
* pAsm
)
993 pAsm
->aArgSubst
[3] = -1;
996 GLboolean
mov_temp(r700_AssemblerBase
* pAsm
, int src
)
998 GLuint tmp
= gethelpr(pAsm
);
1000 //mov src to temp helper gpr.
1001 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
1003 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1005 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
1006 pAsm
->D
.dst
.reg
= tmp
;
1008 nomask_PVSDST(&(pAsm
->D
.dst
));
1010 if( GL_FALSE
== assemble_src(pAsm
, src
, 0) )
1015 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
1016 noneg_PVSSRC(&(pAsm
->S
[0].src
));
1018 if( GL_FALSE
== next_ins(pAsm
) )
1023 pAsm
->aArgSubst
[1 + src
] = tmp
;
1028 GLboolean
checkop1(r700_AssemblerBase
* pAsm
)
1034 GLboolean
checkop2(r700_AssemblerBase
* pAsm
)
1036 GLboolean bSrcConst
[2];
1037 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1041 if( (pILInst
->SrcReg
[0].File
== PROGRAM_UNIFORM
) ||
1042 (pILInst
->SrcReg
[0].File
== PROGRAM_CONSTANT
) ||
1043 (pILInst
->SrcReg
[0].File
== PROGRAM_LOCAL_PARAM
) ||
1044 (pILInst
->SrcReg
[0].File
== PROGRAM_ENV_PARAM
) ||
1045 (pILInst
->SrcReg
[0].File
== PROGRAM_STATE_VAR
) )
1047 bSrcConst
[0] = GL_TRUE
;
1051 bSrcConst
[0] = GL_FALSE
;
1053 if( (pILInst
->SrcReg
[1].File
== PROGRAM_UNIFORM
) ||
1054 (pILInst
->SrcReg
[1].File
== PROGRAM_CONSTANT
) ||
1055 (pILInst
->SrcReg
[1].File
== PROGRAM_LOCAL_PARAM
) ||
1056 (pILInst
->SrcReg
[1].File
== PROGRAM_ENV_PARAM
) ||
1057 (pILInst
->SrcReg
[1].File
== PROGRAM_STATE_VAR
) )
1059 bSrcConst
[1] = GL_TRUE
;
1063 bSrcConst
[1] = GL_FALSE
;
1066 if( (bSrcConst
[0] == GL_TRUE
) && (bSrcConst
[1] == GL_TRUE
) )
1068 if(pILInst
->SrcReg
[0].Index
!= pILInst
->SrcReg
[1].Index
)
1070 if( GL_FALSE
== mov_temp(pAsm
, 1) )
1080 GLboolean
checkop3(r700_AssemblerBase
* pAsm
)
1082 GLboolean bSrcConst
[3];
1083 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1087 if( (pILInst
->SrcReg
[0].File
== PROGRAM_UNIFORM
) ||
1088 (pILInst
->SrcReg
[0].File
== PROGRAM_CONSTANT
) ||
1089 (pILInst
->SrcReg
[0].File
== PROGRAM_LOCAL_PARAM
) ||
1090 (pILInst
->SrcReg
[0].File
== PROGRAM_ENV_PARAM
) ||
1091 (pILInst
->SrcReg
[0].File
== PROGRAM_STATE_VAR
) )
1093 bSrcConst
[0] = GL_TRUE
;
1097 bSrcConst
[0] = GL_FALSE
;
1099 if( (pILInst
->SrcReg
[1].File
== PROGRAM_UNIFORM
) ||
1100 (pILInst
->SrcReg
[1].File
== PROGRAM_CONSTANT
) ||
1101 (pILInst
->SrcReg
[1].File
== PROGRAM_LOCAL_PARAM
) ||
1102 (pILInst
->SrcReg
[1].File
== PROGRAM_ENV_PARAM
) ||
1103 (pILInst
->SrcReg
[1].File
== PROGRAM_STATE_VAR
) )
1105 bSrcConst
[1] = GL_TRUE
;
1109 bSrcConst
[1] = GL_FALSE
;
1111 if( (pILInst
->SrcReg
[2].File
== PROGRAM_UNIFORM
) ||
1112 (pILInst
->SrcReg
[2].File
== PROGRAM_CONSTANT
) ||
1113 (pILInst
->SrcReg
[2].File
== PROGRAM_LOCAL_PARAM
) ||
1114 (pILInst
->SrcReg
[2].File
== PROGRAM_ENV_PARAM
) ||
1115 (pILInst
->SrcReg
[2].File
== PROGRAM_STATE_VAR
) )
1117 bSrcConst
[2] = GL_TRUE
;
1121 bSrcConst
[2] = GL_FALSE
;
1124 if( (GL_TRUE
== bSrcConst
[0]) &&
1125 (GL_TRUE
== bSrcConst
[1]) &&
1126 (GL_TRUE
== bSrcConst
[2]) )
1128 if( GL_FALSE
== mov_temp(pAsm
, 1) )
1132 if( GL_FALSE
== mov_temp(pAsm
, 2) )
1139 else if( (GL_TRUE
== bSrcConst
[0]) &&
1140 (GL_TRUE
== bSrcConst
[1]) )
1142 if(pILInst
->SrcReg
[0].Index
!= pILInst
->SrcReg
[1].Index
)
1144 if( GL_FALSE
== mov_temp(pAsm
, 1) )
1152 else if ( (GL_TRUE
== bSrcConst
[0]) &&
1153 (GL_TRUE
== bSrcConst
[2]) )
1155 if(pILInst
->SrcReg
[0].Index
!= pILInst
->SrcReg
[2].Index
)
1157 if( GL_FALSE
== mov_temp(pAsm
, 2) )
1165 else if( (GL_TRUE
== bSrcConst
[1]) &&
1166 (GL_TRUE
== bSrcConst
[2]) )
1168 if(pILInst
->SrcReg
[1].Index
!= pILInst
->SrcReg
[2].Index
)
1170 if( GL_FALSE
== mov_temp(pAsm
, 2) )
1182 GLboolean
assemble_src(r700_AssemblerBase
*pAsm
,
1186 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1193 if(pAsm
->aArgSubst
[1+src
] >= 0)
1195 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1196 pAsm
->S
[fld
].src
.rtype
= SRC_REG_TEMPORARY
;
1197 pAsm
->S
[fld
].src
.reg
= pAsm
->aArgSubst
[1+src
];
1201 switch (pILInst
->SrcReg
[src
].File
)
1203 case PROGRAM_TEMPORARY
:
1204 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1205 pAsm
->S
[fld
].src
.rtype
= SRC_REG_TEMPORARY
;
1206 pAsm
->S
[fld
].src
.reg
= pILInst
->SrcReg
[src
].Index
+ pAsm
->starting_temp_register_number
;
1208 case PROGRAM_CONSTANT
:
1209 case PROGRAM_LOCAL_PARAM
:
1210 case PROGRAM_ENV_PARAM
:
1211 case PROGRAM_STATE_VAR
:
1212 case PROGRAM_UNIFORM
:
1213 if (1 == pILInst
->SrcReg
[src
].RelAddr
)
1215 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_RELATIVE_A0
);
1219 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1222 pAsm
->S
[fld
].src
.rtype
= SRC_REG_CONSTANT
;
1223 pAsm
->S
[fld
].src
.reg
= pILInst
->SrcReg
[src
].Index
;
1226 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1227 pAsm
->S
[fld
].src
.rtype
= SRC_REG_INPUT
;
1228 switch (pAsm
->currentShaderType
)
1231 pAsm
->S
[fld
].src
.reg
= pAsm
->uiFP_AttributeMap
[pILInst
->SrcReg
[src
].Index
];
1234 pAsm
->S
[fld
].src
.reg
= pAsm
->ucVP_AttributeMap
[pILInst
->SrcReg
[src
].Index
];
1239 radeon_error("Invalid source argument type : %d \n", pILInst
->SrcReg
[src
].File
);
1244 pAsm
->S
[fld
].src
.swizzlex
= pILInst
->SrcReg
[src
].Swizzle
& 0x7;
1245 pAsm
->S
[fld
].src
.swizzley
= (pILInst
->SrcReg
[src
].Swizzle
>> 3) & 0x7;
1246 pAsm
->S
[fld
].src
.swizzlez
= (pILInst
->SrcReg
[src
].Swizzle
>> 6) & 0x7;
1247 pAsm
->S
[fld
].src
.swizzlew
= (pILInst
->SrcReg
[src
].Swizzle
>> 9) & 0x7;
1249 pAsm
->S
[fld
].src
.negx
= pILInst
->SrcReg
[src
].Negate
& 0x1;
1250 pAsm
->S
[fld
].src
.negy
= (pILInst
->SrcReg
[src
].Negate
>> 1) & 0x1;
1251 pAsm
->S
[fld
].src
.negz
= (pILInst
->SrcReg
[src
].Negate
>> 2) & 0x1;
1252 pAsm
->S
[fld
].src
.negw
= (pILInst
->SrcReg
[src
].Negate
>> 3) & 0x1;
1257 GLboolean
assemble_dst(r700_AssemblerBase
*pAsm
)
1259 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1260 switch (pILInst
->DstReg
.File
)
1262 case PROGRAM_TEMPORARY
:
1263 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1264 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
1265 pAsm
->D
.dst
.reg
= pILInst
->DstReg
.Index
+ pAsm
->starting_temp_register_number
;
1267 case PROGRAM_ADDRESS
:
1268 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1269 pAsm
->D
.dst
.rtype
= DST_REG_A0
;
1270 pAsm
->D
.dst
.reg
= 0;
1272 case PROGRAM_OUTPUT
:
1273 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1274 pAsm
->D
.dst
.rtype
= DST_REG_OUT
;
1275 switch (pAsm
->currentShaderType
)
1278 pAsm
->D
.dst
.reg
= pAsm
->uiFP_OutputMap
[pILInst
->DstReg
.Index
];
1281 pAsm
->D
.dst
.reg
= pAsm
->ucVP_OutputMap
[pILInst
->DstReg
.Index
];
1286 radeon_error("Invalid destination output argument type\n");
1290 pAsm
->D
.dst
.writex
= pILInst
->DstReg
.WriteMask
& 0x1;
1291 pAsm
->D
.dst
.writey
= (pILInst
->DstReg
.WriteMask
>> 1) & 0x1;
1292 pAsm
->D
.dst
.writez
= (pILInst
->DstReg
.WriteMask
>> 2) & 0x1;
1293 pAsm
->D
.dst
.writew
= (pILInst
->DstReg
.WriteMask
>> 3) & 0x1;
1295 if(pILInst
->SaturateMode
== SATURATE_ZERO_ONE
)
1297 pAsm
->D2
.dst2
.SaturateMode
= 1;
1301 pAsm
->D2
.dst2
.SaturateMode
= 0;
1307 GLboolean
tex_dst(r700_AssemblerBase
*pAsm
)
1309 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1311 if(PROGRAM_TEMPORARY
== pILInst
->DstReg
.File
)
1313 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
1314 pAsm
->D
.dst
.reg
= pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.Index
+ pAsm
->starting_temp_register_number
;
1316 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1318 else if(PROGRAM_OUTPUT
== pILInst
->DstReg
.File
)
1320 pAsm
->D
.dst
.rtype
= DST_REG_OUT
;
1321 switch (pAsm
->currentShaderType
)
1324 pAsm
->D
.dst
.reg
= pAsm
->uiFP_OutputMap
[pILInst
->DstReg
.Index
];
1327 pAsm
->D
.dst
.reg
= pAsm
->ucVP_OutputMap
[pILInst
->DstReg
.Index
];
1331 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1335 radeon_error("Invalid destination output argument type\n");
1339 pAsm
->D
.dst
.writex
= pILInst
->DstReg
.WriteMask
& 0x1;
1340 pAsm
->D
.dst
.writey
= (pILInst
->DstReg
.WriteMask
>> 1) & 0x1;
1341 pAsm
->D
.dst
.writez
= (pILInst
->DstReg
.WriteMask
>> 2) & 0x1;
1342 pAsm
->D
.dst
.writew
= (pILInst
->DstReg
.WriteMask
>> 3) & 0x1;
1347 GLboolean
tex_src(r700_AssemblerBase
*pAsm
)
1349 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1351 GLboolean bValidTexCoord
= GL_FALSE
;
1353 if(pAsm
->aArgSubst
[1] >= 0)
1355 bValidTexCoord
= GL_TRUE
;
1356 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
1357 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
1358 pAsm
->S
[0].src
.reg
= pAsm
->aArgSubst
[1];
1362 switch (pILInst
->SrcReg
[0].File
) {
1363 case PROGRAM_UNIFORM
:
1364 case PROGRAM_CONSTANT
:
1365 case PROGRAM_LOCAL_PARAM
:
1366 case PROGRAM_ENV_PARAM
:
1367 case PROGRAM_STATE_VAR
:
1369 case PROGRAM_TEMPORARY
:
1370 bValidTexCoord
= GL_TRUE
;
1371 pAsm
->S
[0].src
.reg
= pILInst
->SrcReg
[0].Index
+
1372 pAsm
->starting_temp_register_number
;
1373 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
1376 switch (pILInst
->SrcReg
[0].Index
)
1378 case FRAG_ATTRIB_WPOS
:
1379 case FRAG_ATTRIB_COL0
:
1380 case FRAG_ATTRIB_COL1
:
1381 case FRAG_ATTRIB_FOGC
:
1382 case FRAG_ATTRIB_TEX0
:
1383 case FRAG_ATTRIB_TEX1
:
1384 case FRAG_ATTRIB_TEX2
:
1385 case FRAG_ATTRIB_TEX3
:
1386 case FRAG_ATTRIB_TEX4
:
1387 case FRAG_ATTRIB_TEX5
:
1388 case FRAG_ATTRIB_TEX6
:
1389 case FRAG_ATTRIB_TEX7
:
1390 bValidTexCoord
= GL_TRUE
;
1391 pAsm
->S
[0].src
.reg
=
1392 pAsm
->uiFP_AttributeMap
[pILInst
->SrcReg
[0].Index
];
1393 pAsm
->S
[0].src
.rtype
= SRC_REG_INPUT
;
1395 case FRAG_ATTRIB_FACE
:
1396 fprintf(stderr
, "FRAG_ATTRIB_FACE unsupported\n");
1398 case FRAG_ATTRIB_PNTC
:
1399 fprintf(stderr
, "FRAG_ATTRIB_PNTC unsupported\n");
1403 if( (pILInst
->SrcReg
[0].Index
>= FRAG_ATTRIB_VAR0
) ||
1404 (pILInst
->SrcReg
[0].Index
< FRAG_ATTRIB_MAX
) )
1406 bValidTexCoord
= GL_TRUE
;
1407 pAsm
->S
[0].src
.reg
=
1408 pAsm
->uiFP_AttributeMap
[pILInst
->SrcReg
[0].Index
];
1409 pAsm
->S
[0].src
.rtype
= SRC_REG_INPUT
;
1416 if(GL_TRUE
== bValidTexCoord
)
1418 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
1422 radeon_error("Invalid source texcoord for TEX instruction\n");
1426 pAsm
->S
[0].src
.swizzlex
= pILInst
->SrcReg
[0].Swizzle
& 0x7;
1427 pAsm
->S
[0].src
.swizzley
= (pILInst
->SrcReg
[0].Swizzle
>> 3) & 0x7;
1428 pAsm
->S
[0].src
.swizzlez
= (pILInst
->SrcReg
[0].Swizzle
>> 6) & 0x7;
1429 pAsm
->S
[0].src
.swizzlew
= (pILInst
->SrcReg
[0].Swizzle
>> 9) & 0x7;
1431 pAsm
->S
[0].src
.negx
= pILInst
->SrcReg
[0].Negate
& 0x1;
1432 pAsm
->S
[0].src
.negy
= (pILInst
->SrcReg
[0].Negate
>> 1) & 0x1;
1433 pAsm
->S
[0].src
.negz
= (pILInst
->SrcReg
[0].Negate
>> 2) & 0x1;
1434 pAsm
->S
[0].src
.negw
= (pILInst
->SrcReg
[0].Negate
>> 3) & 0x1;
1439 GLboolean
assemble_tex_instruction(r700_AssemblerBase
*pAsm
, GLboolean normalized
)
1441 PVSSRC
* texture_coordinate_source
;
1442 PVSSRC
* texture_unit_source
;
1444 R700TextureInstruction
* tex_instruction_ptr
= (R700TextureInstruction
*) CALLOC_STRUCT(R700TextureInstruction
);
1445 if (tex_instruction_ptr
== NULL
)
1449 Init_R700TextureInstruction(tex_instruction_ptr
);
1451 texture_coordinate_source
= &(pAsm
->S
[0].src
);
1452 texture_unit_source
= &(pAsm
->S
[1].src
);
1454 tex_instruction_ptr
->m_Word0
.f
.tex_inst
= pAsm
->D
.dst
.opcode
;
1455 tex_instruction_ptr
->m_Word0
.f
.bc_frac_mode
= 0x0;
1456 tex_instruction_ptr
->m_Word0
.f
.fetch_whole_quad
= 0x0;
1458 tex_instruction_ptr
->m_Word0
.f
.resource_id
= texture_unit_source
->reg
;
1460 tex_instruction_ptr
->m_Word1
.f
.lod_bias
= 0x0;
1462 tex_instruction_ptr
->m_Word1
.f
.coord_type_x
= SQ_TEX_NORMALIZED
;
1463 tex_instruction_ptr
->m_Word1
.f
.coord_type_y
= SQ_TEX_NORMALIZED
;
1464 tex_instruction_ptr
->m_Word1
.f
.coord_type_z
= SQ_TEX_NORMALIZED
;
1465 tex_instruction_ptr
->m_Word1
.f
.coord_type_w
= SQ_TEX_NORMALIZED
;
1467 /* XXX: UNNORMALIZED tex coords have limited wrap modes */
1468 tex_instruction_ptr
->m_Word1
.f
.coord_type_x
= SQ_TEX_UNNORMALIZED
;
1469 tex_instruction_ptr
->m_Word1
.f
.coord_type_y
= SQ_TEX_UNNORMALIZED
;
1470 tex_instruction_ptr
->m_Word1
.f
.coord_type_z
= SQ_TEX_UNNORMALIZED
;
1471 tex_instruction_ptr
->m_Word1
.f
.coord_type_w
= SQ_TEX_UNNORMALIZED
;
1474 tex_instruction_ptr
->m_Word2
.f
.offset_x
= 0x0;
1475 tex_instruction_ptr
->m_Word2
.f
.offset_y
= 0x0;
1476 tex_instruction_ptr
->m_Word2
.f
.offset_z
= 0x0;
1478 tex_instruction_ptr
->m_Word2
.f
.sampler_id
= texture_unit_source
->reg
;
1481 if ( (pAsm
->D
.dst
.rtype
== DST_REG_TEMPORARY
) ||
1482 (pAsm
->D
.dst
.rtype
== DST_REG_OUT
) )
1484 tex_instruction_ptr
->m_Word0
.f
.src_gpr
= texture_coordinate_source
->reg
;
1485 tex_instruction_ptr
->m_Word0
.f
.src_rel
= SQ_ABSOLUTE
;
1487 tex_instruction_ptr
->m_Word1
.f
.dst_gpr
= pAsm
->D
.dst
.reg
;
1488 tex_instruction_ptr
->m_Word1
.f
.dst_rel
= SQ_ABSOLUTE
;
1490 tex_instruction_ptr
->m_Word1
.f
.dst_sel_x
= (pAsm
->D
.dst
.writex
? texture_unit_source
->swizzlex
: SQ_SEL_MASK
);
1491 tex_instruction_ptr
->m_Word1
.f
.dst_sel_y
= (pAsm
->D
.dst
.writey
? texture_unit_source
->swizzley
: SQ_SEL_MASK
);
1492 tex_instruction_ptr
->m_Word1
.f
.dst_sel_z
= (pAsm
->D
.dst
.writez
? texture_unit_source
->swizzlez
: SQ_SEL_MASK
);
1493 tex_instruction_ptr
->m_Word1
.f
.dst_sel_w
= (pAsm
->D
.dst
.writew
? texture_unit_source
->swizzlew
: SQ_SEL_MASK
);
1496 tex_instruction_ptr
->m_Word2
.f
.src_sel_x
= texture_coordinate_source
->swizzlex
;
1497 tex_instruction_ptr
->m_Word2
.f
.src_sel_y
= texture_coordinate_source
->swizzley
;
1498 tex_instruction_ptr
->m_Word2
.f
.src_sel_z
= texture_coordinate_source
->swizzlez
;
1499 tex_instruction_ptr
->m_Word2
.f
.src_sel_w
= texture_coordinate_source
->swizzlew
;
1503 radeon_error("Only temp destination registers supported for TEX dest regs.\n");
1507 if( GL_FALSE
== add_tex_instruction(pAsm
, tex_instruction_ptr
) )
1515 void initialize(r700_AssemblerBase
*pAsm
)
1517 GLuint cycle
, component
;
1519 for (cycle
=0; cycle
<NUMBER_OF_CYCLES
; cycle
++)
1521 for (component
=0; component
<NUMBER_OF_COMPONENTS
; component
++)
1523 pAsm
->hw_gpr
[cycle
][component
] = (-1);
1526 for (component
=0; component
<NUMBER_OF_COMPONENTS
; component
++)
1528 pAsm
->hw_cfile_addr
[component
] = (-1);
1529 pAsm
->hw_cfile_chan
[component
] = (-1);
1533 GLboolean
assemble_alu_src(R700ALUInstruction
* alu_instruction_ptr
,
1536 BITS scalar_channel_index
)
1543 //--------------------------------------------------------------------------
1544 // Source for operands src0, src1.
1545 // Values [0,127] correspond to GPR[0..127].
1546 // Values [256,511] correspond to cfile constants c[0..255].
1548 //--------------------------------------------------------------------------
1549 // Other special values are shown in the list below.
1551 // 248 SQ_ALU_SRC_0: special constant 0.0.
1552 // 249 SQ_ALU_SRC_1: special constant 1.0 float.
1554 // 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
1555 // 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
1557 // 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
1558 // 253 SQ_ALU_SRC_LITERAL: literal constant.
1560 // 254 SQ_ALU_SRC_PV: previous vector result.
1561 // 255 SQ_ALU_SRC_PS: previous scalar result.
1562 //--------------------------------------------------------------------------
1564 BITS channel_swizzle
;
1565 switch (scalar_channel_index
)
1567 case 0: channel_swizzle
= pSource
->swizzlex
; break;
1568 case 1: channel_swizzle
= pSource
->swizzley
; break;
1569 case 2: channel_swizzle
= pSource
->swizzlez
; break;
1570 case 3: channel_swizzle
= pSource
->swizzlew
; break;
1571 default: channel_swizzle
= SQ_SEL_MASK
; break;
1574 if(channel_swizzle
== SQ_SEL_0
)
1576 src_sel
= SQ_ALU_SRC_0
;
1578 else if (channel_swizzle
== SQ_SEL_1
)
1580 src_sel
= SQ_ALU_SRC_1
;
1584 if ( (pSource
->rtype
== SRC_REG_TEMPORARY
) ||
1585 (pSource
->rtype
== SRC_REG_INPUT
)
1588 src_sel
= pSource
->reg
;
1590 else if (pSource
->rtype
== SRC_REG_CONSTANT
)
1592 src_sel
= pSource
->reg
+ CFILE_REGISTER_OFFSET
;
1594 else if (pSource
->rtype
== SRC_REC_LITERAL
)
1596 src_sel
= SQ_ALU_SRC_LITERAL
;
1600 radeon_error("Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.\n",
1601 source_index
, pSource
->rtype
);
1606 if( ADDR_ABSOLUTE
== addrmode_PVSSRC(pSource
) )
1608 src_rel
= SQ_ABSOLUTE
;
1612 src_rel
= SQ_RELATIVE
;
1615 switch (channel_swizzle
)
1618 src_chan
= SQ_CHAN_X
;
1621 src_chan
= SQ_CHAN_Y
;
1624 src_chan
= SQ_CHAN_Z
;
1627 src_chan
= SQ_CHAN_W
;
1631 // Does not matter since src_sel controls
1632 src_chan
= SQ_CHAN_X
;
1635 radeon_error("Unknown source select value (%d) in assemble_alu_src().\n", channel_swizzle
);
1640 switch (scalar_channel_index
)
1642 case 0: src_neg
= pSource
->negx
; break;
1643 case 1: src_neg
= pSource
->negy
; break;
1644 case 2: src_neg
= pSource
->negz
; break;
1645 case 3: src_neg
= pSource
->negw
; break;
1646 default: src_neg
= 0; break;
1649 switch (source_index
)
1652 alu_instruction_ptr
->m_Word0
.f
.src0_sel
= src_sel
;
1653 alu_instruction_ptr
->m_Word0
.f
.src0_rel
= src_rel
;
1654 alu_instruction_ptr
->m_Word0
.f
.src0_chan
= src_chan
;
1655 alu_instruction_ptr
->m_Word0
.f
.src0_neg
= src_neg
;
1658 alu_instruction_ptr
->m_Word0
.f
.src1_sel
= src_sel
;
1659 alu_instruction_ptr
->m_Word0
.f
.src1_rel
= src_rel
;
1660 alu_instruction_ptr
->m_Word0
.f
.src1_chan
= src_chan
;
1661 alu_instruction_ptr
->m_Word0
.f
.src1_neg
= src_neg
;
1664 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_sel
= src_sel
;
1665 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_rel
= src_rel
;
1666 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_chan
= src_chan
;
1667 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_neg
= src_neg
;
1670 radeon_error("Only three sources allowed in ALU opcodes.\n");
1678 GLboolean
add_alu_instruction(r700_AssemblerBase
* pAsm
,
1679 R700ALUInstruction
* alu_instruction_ptr
,
1680 GLuint contiguous_slots_needed
)
1682 if( GL_FALSE
== check_current_clause(pAsm
, CF_ALU_CLAUSE
) )
1687 if ( pAsm
->alu_x_opcode
!= 0 ||
1688 pAsm
->cf_current_alu_clause_ptr
== NULL
||
1689 ( (pAsm
->cf_current_alu_clause_ptr
!= NULL
) &&
1690 (pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
>= (GetCFMaxInstructions(pAsm
->cf_current_alu_clause_ptr
->m_ShaderInstType
)-contiguous_slots_needed
-1) )
1694 //new cf inst for this clause
1695 pAsm
->cf_current_alu_clause_ptr
= (R700ControlFlowALUClause
*) CALLOC_STRUCT(R700ControlFlowALUClause
);
1697 // link the new cf to cf segment
1698 if(NULL
!= pAsm
->cf_current_alu_clause_ptr
)
1700 Init_R700ControlFlowALUClause(pAsm
->cf_current_alu_clause_ptr
);
1701 AddCFInstruction( pAsm
->pR700Shader
,
1702 (R700ControlFlowInstruction
*)pAsm
->cf_current_alu_clause_ptr
);
1706 radeon_error("Could not allocate a new ALU CF instruction.\n");
1710 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_bank0
= 0x0;
1711 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_bank1
= 0x0;
1712 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_mode0
= SQ_CF_KCACHE_NOP
;
1714 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_mode1
= SQ_CF_KCACHE_NOP
;
1715 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_addr0
= 0x0;
1716 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_addr1
= 0x0;
1718 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
= 0x0;
1720 if(pAsm
->alu_x_opcode
!= 0)
1722 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.cf_inst
= pAsm
->alu_x_opcode
;
1723 pAsm
->alu_x_opcode
= 0;
1727 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_ALU
;
1730 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
1732 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
1736 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
+= (GetInstructionSize(alu_instruction_ptr
->m_ShaderInstType
) / 2);
1739 // If this clause constains any instruction that is forward dependent on a TEX instruction,
1740 // set the whole_quad_mode for this clause
1741 if ( pAsm
->pInstDeps
[pAsm
->uiCurInst
].nDstDep
> (-1) )
1743 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x1;
1746 if (pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
>= (GetCFMaxInstructions(pAsm
->cf_current_alu_clause_ptr
->m_ShaderInstType
)-1) )
1748 alu_instruction_ptr
->m_Word0
.f
.last
= 1;
1751 if(NULL
== pAsm
->cf_current_alu_clause_ptr
->m_pLinkedALUInstruction
)
1753 pAsm
->cf_current_alu_clause_ptr
->m_pLinkedALUInstruction
= alu_instruction_ptr
;
1754 alu_instruction_ptr
->m_pLinkedALUClause
= pAsm
->cf_current_alu_clause_ptr
;
1757 AddALUInstruction(pAsm
->pR700Shader
, alu_instruction_ptr
);
1762 void get_src_properties(R700ALUInstruction
* alu_instruction_ptr
,
1769 switch (source_index
)
1772 *psrc_sel
= alu_instruction_ptr
->m_Word0
.f
.src0_sel
;
1773 *psrc_rel
= alu_instruction_ptr
->m_Word0
.f
.src0_rel
;
1774 *psrc_chan
= alu_instruction_ptr
->m_Word0
.f
.src0_chan
;
1775 *psrc_neg
= alu_instruction_ptr
->m_Word0
.f
.src0_neg
;
1779 *psrc_sel
= alu_instruction_ptr
->m_Word0
.f
.src1_sel
;
1780 *psrc_rel
= alu_instruction_ptr
->m_Word0
.f
.src1_rel
;
1781 *psrc_chan
= alu_instruction_ptr
->m_Word0
.f
.src1_chan
;
1782 *psrc_neg
= alu_instruction_ptr
->m_Word0
.f
.src1_neg
;
1786 *psrc_sel
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_sel
;
1787 *psrc_rel
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_rel
;
1788 *psrc_chan
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_chan
;
1789 *psrc_neg
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_neg
;
1794 int is_cfile(BITS sel
)
1796 if (sel
> 255 && sel
< 512)
1803 int is_const(BITS sel
)
1809 else if(sel
>= SQ_ALU_SRC_0
&& sel
<= SQ_ALU_SRC_LITERAL
)
1816 int is_gpr(BITS sel
)
1818 if (sel
>= 0 && sel
< 128)
1825 const GLuint BANK_SWIZZLE_VEC
[8] = {SQ_ALU_VEC_210
, //000
1826 SQ_ALU_VEC_120
, //001
1827 SQ_ALU_VEC_102
, //010
1829 SQ_ALU_VEC_201
, //011
1830 SQ_ALU_VEC_012
, //100
1831 SQ_ALU_VEC_021
, //101
1833 SQ_ALU_VEC_012
, //110
1834 SQ_ALU_VEC_012
}; //111
1836 const GLuint BANK_SWIZZLE_SCL
[8] = {SQ_ALU_SCL_210
, //000
1837 SQ_ALU_SCL_122
, //001
1838 SQ_ALU_SCL_122
, //010
1840 SQ_ALU_SCL_221
, //011
1841 SQ_ALU_SCL_212
, //100
1842 SQ_ALU_SCL_122
, //101
1844 SQ_ALU_SCL_122
, //110
1845 SQ_ALU_SCL_122
}; //111
1847 GLboolean
reserve_cfile(r700_AssemblerBase
* pAsm
,
1851 int res_match
= (-1);
1852 int res_empty
= (-1);
1856 for (res
=3; res
>=0; res
--)
1858 if(pAsm
->hw_cfile_addr
[ res
] < 0)
1862 else if( (pAsm
->hw_cfile_addr
[res
] == (int)sel
)
1864 (pAsm
->hw_cfile_chan
[ res
] == (int) chan
) )
1872 // Read for this scalar component already reserved, nothing to do here.
1875 else if(res_empty
>= 0)
1877 pAsm
->hw_cfile_addr
[ res_empty
] = sel
;
1878 pAsm
->hw_cfile_chan
[ res_empty
] = chan
;
1882 radeon_error("All cfile read ports are used, cannot reference C$sel, channel $chan.\n");
1888 GLboolean
reserve_gpr(r700_AssemblerBase
* pAsm
, GLuint sel
, GLuint chan
, GLuint cycle
)
1890 if(pAsm
->hw_gpr
[cycle
][chan
] < 0)
1892 pAsm
->hw_gpr
[cycle
][chan
] = sel
;
1894 else if(pAsm
->hw_gpr
[cycle
][chan
] != (int)sel
)
1896 radeon_error("Another scalar operation has already used GPR read port for given channel\n");
1903 GLboolean
cycle_for_scalar_bank_swizzle(const int swiz
, const int sel
, GLuint
* pCycle
)
1907 case SQ_ALU_SCL_210
:
1909 int table
[3] = {2, 1, 0};
1910 *pCycle
= table
[sel
];
1914 case SQ_ALU_SCL_122
:
1916 int table
[3] = {1, 2, 2};
1917 *pCycle
= table
[sel
];
1921 case SQ_ALU_SCL_212
:
1923 int table
[3] = {2, 1, 2};
1924 *pCycle
= table
[sel
];
1928 case SQ_ALU_SCL_221
:
1930 int table
[3] = {2, 2, 1};
1931 *pCycle
= table
[sel
];
1936 radeon_error("Bad Scalar bank swizzle value\n");
1943 GLboolean
cycle_for_vector_bank_swizzle(const int swiz
, const int sel
, GLuint
* pCycle
)
1947 case SQ_ALU_VEC_012
:
1949 int table
[3] = {0, 1, 2};
1950 *pCycle
= table
[sel
];
1953 case SQ_ALU_VEC_021
:
1955 int table
[3] = {0, 2, 1};
1956 *pCycle
= table
[sel
];
1959 case SQ_ALU_VEC_120
:
1961 int table
[3] = {1, 2, 0};
1962 *pCycle
= table
[sel
];
1965 case SQ_ALU_VEC_102
:
1967 int table
[3] = {1, 0, 2};
1968 *pCycle
= table
[sel
];
1971 case SQ_ALU_VEC_201
:
1973 int table
[3] = {2, 0, 1};
1974 *pCycle
= table
[sel
];
1977 case SQ_ALU_VEC_210
:
1979 int table
[3] = {2, 1, 0};
1980 *pCycle
= table
[sel
];
1984 radeon_error("Bad Vec bank swizzle value\n");
1992 GLboolean
check_scalar(r700_AssemblerBase
* pAsm
,
1993 R700ALUInstruction
* alu_instruction_ptr
)
1996 GLuint bank_swizzle
;
1997 GLuint const_count
= 0;
2006 BITS src_sel
[3] = {0,0,0};
2007 BITS src_chan
[3] = {0,0,0};
2008 BITS src_rel
[3] = {0,0,0};
2009 BITS src_neg
[3] = {0,0,0};
2013 GLuint number_of_operands
= r700GetNumOperands(pAsm
);
2015 for (src
=0; src
<number_of_operands
; src
++)
2017 get_src_properties(alu_instruction_ptr
,
2026 swizzle_key
= ( (is_const( src_sel
[0] ) ? 4 : 0) +
2027 (is_const( src_sel
[1] ) ? 2 : 0) +
2028 (is_const( src_sel
[2] ) ? 1 : 0) );
2030 alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
= BANK_SWIZZLE_SCL
[ swizzle_key
];
2032 for (src
=0; src
<number_of_operands
; src
++)
2034 sel
= src_sel
[src
];
2035 chan
= src_chan
[src
];
2036 rel
= src_rel
[src
];
2037 neg
= src_neg
[src
];
2039 if (is_const( sel
))
2041 // Any constant, including literal and inline constants
2044 if (is_cfile( sel
))
2046 reserve_cfile(pAsm
, sel
, chan
);
2052 for (src
=0; src
<number_of_operands
; src
++)
2054 sel
= src_sel
[src
];
2055 chan
= src_chan
[src
];
2056 rel
= src_rel
[src
];
2057 neg
= src_neg
[src
];
2061 bank_swizzle
= alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
;
2063 if( GL_FALSE
== cycle_for_scalar_bank_swizzle(bank_swizzle
, src
, &cycle
) )
2068 if(cycle
< const_count
)
2070 if( GL_FALSE
== reserve_gpr(pAsm
, sel
, chan
, cycle
) )
2081 GLboolean
check_vector(r700_AssemblerBase
* pAsm
,
2082 R700ALUInstruction
* alu_instruction_ptr
)
2085 GLuint bank_swizzle
;
2086 GLuint const_count
= 0;
2095 BITS src_sel
[3] = {0,0,0};
2096 BITS src_chan
[3] = {0,0,0};
2097 BITS src_rel
[3] = {0,0,0};
2098 BITS src_neg
[3] = {0,0,0};
2102 GLuint number_of_operands
= r700GetNumOperands(pAsm
);
2104 for (src
=0; src
<number_of_operands
; src
++)
2106 get_src_properties(alu_instruction_ptr
,
2115 swizzle_key
= ( (is_const( src_sel
[0] ) ? 4 : 0) +
2116 (is_const( src_sel
[1] ) ? 2 : 0) +
2117 (is_const( src_sel
[2] ) ? 1 : 0)
2120 alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
= BANK_SWIZZLE_VEC
[swizzle_key
];
2122 for (src
=0; src
<number_of_operands
; src
++)
2124 sel
= src_sel
[src
];
2125 chan
= src_chan
[src
];
2126 rel
= src_rel
[src
];
2127 neg
= src_neg
[src
];
2130 bank_swizzle
= alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
;
2134 if( GL_FALSE
== cycle_for_vector_bank_swizzle(bank_swizzle
, src
, &cycle
) )
2140 (sel
== src_sel
[0]) &&
2141 (chan
== src_chan
[0]) )
2146 if( GL_FALSE
== reserve_gpr(pAsm
, sel
, chan
, cycle
) )
2152 else if( is_const(sel
) )
2158 if( GL_FALSE
== reserve_cfile(pAsm
, sel
, chan
) )
2169 GLboolean
assemble_alu_instruction(r700_AssemblerBase
*pAsm
)
2171 R700ALUInstruction
* alu_instruction_ptr
;
2172 R700ALUInstructionHalfLiteral
* alu_instruction_ptr_hl
;
2173 R700ALUInstructionFullLiteral
* alu_instruction_ptr_fl
;
2175 GLuint number_of_scalar_operations
;
2176 GLboolean is_single_scalar_operation
;
2177 GLuint scalar_channel_index
;
2179 PVSSRC
* pcurrent_source
;
2180 int current_source_index
;
2181 GLuint contiguous_slots_needed
;
2183 GLuint uNumSrc
= r700GetNumOperands(pAsm
);
2184 //GLuint channel_swizzle, j;
2185 //GLuint chan_counter[4] = {0, 0, 0, 0};
2186 //PVSSRC * pSource[3];
2187 GLboolean bSplitInst
= GL_FALSE
;
2189 if (1 == pAsm
->D
.dst
.math
)
2191 is_single_scalar_operation
= GL_TRUE
;
2192 number_of_scalar_operations
= 1;
2196 is_single_scalar_operation
= GL_FALSE
;
2197 number_of_scalar_operations
= 4;
2199 /* current assembler doesn't do more than 1 register per source */
2201 /* check read port, only very preliminary algorithm, not count in
2202 src0/1 same comp case and prev slot repeat case; also not count relative
2203 addressing. TODO: improve performance. */
2204 for(j
=0; j
<uNumSrc
; j
++)
2206 pSource
[j
] = &(pAsm
->S
[j
].src
);
2208 for(scalar_channel_index
=0; scalar_channel_index
<4; scalar_channel_index
++)
2210 for(j
=0; j
<uNumSrc
; j
++)
2212 switch (scalar_channel_index
)
2214 case 0: channel_swizzle
= pSource
[j
]->swizzlex
; break;
2215 case 1: channel_swizzle
= pSource
[j
]->swizzley
; break;
2216 case 2: channel_swizzle
= pSource
[j
]->swizzlez
; break;
2217 case 3: channel_swizzle
= pSource
[j
]->swizzlew
; break;
2218 default: channel_swizzle
= SQ_SEL_MASK
; break;
2220 if ( ((pSource
[j
]->rtype
== SRC_REG_TEMPORARY
) ||
2221 (pSource
[j
]->rtype
== SRC_REG_INPUT
))
2222 && (channel_swizzle
<= SQ_SEL_W
) )
2224 chan_counter
[channel_swizzle
]++;
2228 if( (chan_counter
[SQ_SEL_X
] > 3)
2229 || (chan_counter
[SQ_SEL_Y
] > 3)
2230 || (chan_counter
[SQ_SEL_Z
] > 3)
2231 || (chan_counter
[SQ_SEL_W
] > 3) ) /* each chan bank has only 3 ports. */
2233 bSplitInst
= GL_TRUE
;
2238 contiguous_slots_needed
= 0;
2240 if(GL_TRUE
== is_reduction_opcode(&(pAsm
->D
)) )
2242 contiguous_slots_needed
= 4;
2245 contiguous_slots_needed
+= pAsm
->D2
.dst2
.literal_slots
;
2249 for (scalar_channel_index
=0;
2250 scalar_channel_index
< number_of_scalar_operations
;
2251 scalar_channel_index
++)
2253 if(scalar_channel_index
== (number_of_scalar_operations
-1))
2255 switch(pAsm
->D2
.dst2
.literal_slots
)
2258 alu_instruction_ptr
= (R700ALUInstruction
*) CALLOC_STRUCT(R700ALUInstruction
);
2259 Init_R700ALUInstruction(alu_instruction_ptr
);
2262 alu_instruction_ptr_hl
= (R700ALUInstructionHalfLiteral
*) CALLOC_STRUCT(R700ALUInstructionHalfLiteral
);
2263 Init_R700ALUInstructionHalfLiteral(alu_instruction_ptr_hl
, pAsm
->C
[0].f
, pAsm
->C
[1].f
);
2264 alu_instruction_ptr
= (R700ALUInstruction
*)alu_instruction_ptr_hl
;
2267 alu_instruction_ptr_fl
= (R700ALUInstructionFullLiteral
*) CALLOC_STRUCT(R700ALUInstructionFullLiteral
);
2268 Init_R700ALUInstructionFullLiteral(alu_instruction_ptr_fl
,pAsm
->C
[0].f
, pAsm
->C
[1].f
, pAsm
->C
[2].f
, pAsm
->C
[3].f
);
2269 alu_instruction_ptr
= (R700ALUInstruction
*)alu_instruction_ptr_fl
;
2275 alu_instruction_ptr
= (R700ALUInstruction
*) CALLOC_STRUCT(R700ALUInstruction
);
2276 Init_R700ALUInstruction(alu_instruction_ptr
);
2280 current_source_index
= 0;
2281 pcurrent_source
= &(pAsm
->S
[0].src
);
2283 if (GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2284 current_source_index
,
2286 scalar_channel_index
) )
2294 current_source_index
= 1;
2295 pcurrent_source
= &(pAsm
->S
[current_source_index
].src
);
2297 if (GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2298 current_source_index
,
2300 scalar_channel_index
) )
2307 alu_instruction_ptr
->m_Word0
.f
.index_mode
= pAsm
->D2
.dst2
.index_mode
;
2309 if( (is_single_scalar_operation
== GL_TRUE
)
2310 || (GL_TRUE
== bSplitInst
) )
2312 alu_instruction_ptr
->m_Word0
.f
.last
= 1;
2316 alu_instruction_ptr
->m_Word0
.f
.last
= (scalar_channel_index
== 3) ? 1 : 0;
2319 alu_instruction_ptr
->m_Word0
.f
.pred_sel
= (pAsm
->D
.dst
.pred_inv
> 0) ? 1 : 0;
2320 if(1 == pAsm
->D
.dst
.predicated
)
2322 alu_instruction_ptr
->m_Word1_OP2
.f
.update_pred
= 0x1;
2323 alu_instruction_ptr
->m_Word1_OP2
.f
.update_execute_mask
= 0x1;
2327 alu_instruction_ptr
->m_Word1_OP2
.f
.update_pred
= 0x0;
2328 alu_instruction_ptr
->m_Word1_OP2
.f
.update_execute_mask
= 0x0;
2332 if( (pAsm
->D
.dst
.rtype
== DST_REG_TEMPORARY
) ||
2333 (pAsm
->D
.dst
.rtype
== DST_REG_OUT
) )
2335 alu_instruction_ptr
->m_Word1
.f
.dst_gpr
= pAsm
->D
.dst
.reg
;
2339 radeon_error("Only temp destination registers supported for ALU dest regs.\n");
2343 alu_instruction_ptr
->m_Word1
.f
.dst_rel
= SQ_ABSOLUTE
; //D.rtype
2345 if ( is_single_scalar_operation
== GL_TRUE
)
2347 // Override scalar_channel_index since only one scalar value will be written
2348 if(pAsm
->D
.dst
.writex
)
2350 scalar_channel_index
= 0;
2352 else if(pAsm
->D
.dst
.writey
)
2354 scalar_channel_index
= 1;
2356 else if(pAsm
->D
.dst
.writez
)
2358 scalar_channel_index
= 2;
2360 else if(pAsm
->D
.dst
.writew
)
2362 scalar_channel_index
= 3;
2366 alu_instruction_ptr
->m_Word1
.f
.dst_chan
= scalar_channel_index
;
2368 alu_instruction_ptr
->m_Word1
.f
.clamp
= pAsm
->D2
.dst2
.SaturateMode
;
2370 if (pAsm
->D
.dst
.op3
)
2374 alu_instruction_ptr
->m_Word1_OP3
.f
.alu_inst
= pAsm
->D
.dst
.opcode
;
2376 //There's 3rd src for op3
2377 current_source_index
= 2;
2378 pcurrent_source
= &(pAsm
->S
[current_source_index
].src
);
2380 if ( GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2381 current_source_index
,
2383 scalar_channel_index
) )
2393 alu_instruction_ptr
->m_Word1_OP2
.f6
.alu_inst
= pAsm
->D
.dst
.opcode
;
2395 alu_instruction_ptr
->m_Word1_OP2
.f6
.src0_abs
= pAsm
->S
[0].src
.abs
;
2396 alu_instruction_ptr
->m_Word1_OP2
.f6
.src1_abs
= pAsm
->S
[1].src
.abs
;
2398 //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
2399 //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0;
2400 switch (scalar_channel_index
)
2403 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writex
;
2406 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writey
;
2409 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writez
;
2412 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writew
;
2415 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= 1; //SQ_SEL_MASK;
2418 alu_instruction_ptr
->m_Word1_OP2
.f6
.omod
= SQ_ALU_OMOD_OFF
;
2422 alu_instruction_ptr
->m_Word1_OP2
.f
.alu_inst
= pAsm
->D
.dst
.opcode
;
2424 alu_instruction_ptr
->m_Word1_OP2
.f
.src0_abs
= pAsm
->S
[0].src
.abs
;
2425 alu_instruction_ptr
->m_Word1_OP2
.f
.src1_abs
= pAsm
->S
[1].src
.abs
;
2427 //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2428 //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2429 switch (scalar_channel_index
)
2432 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writex
;
2435 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writey
;
2438 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writez
;
2441 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writew
;
2444 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= 1; //SQ_SEL_MASK;
2447 alu_instruction_ptr
->m_Word1_OP2
.f
.omod
= SQ_ALU_OMOD_OFF
;
2451 if(GL_FALSE
== add_alu_instruction(pAsm
, alu_instruction_ptr
, contiguous_slots_needed
) )
2457 * Judge the type of current instruction, is it vector or scalar
2460 if (is_single_scalar_operation
)
2462 if(GL_FALSE
== check_scalar(pAsm
, alu_instruction_ptr
) )
2469 if(GL_FALSE
== check_vector(pAsm
, alu_instruction_ptr
) )
2475 contiguous_slots_needed
-= 1;
2481 GLboolean
next_ins(r700_AssemblerBase
*pAsm
)
2483 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
2485 if( GL_TRUE
== pAsm
->is_tex
)
2487 if (pILInst
->TexSrcTarget
== TEXTURE_RECT_INDEX
) {
2488 if( GL_FALSE
== assemble_tex_instruction(pAsm
, GL_FALSE
) )
2490 radeon_error("Error assembling TEX instruction\n");
2494 if( GL_FALSE
== assemble_tex_instruction(pAsm
, GL_TRUE
) )
2496 radeon_error("Error assembling TEX instruction\n");
2503 if( GL_FALSE
== assemble_alu_instruction(pAsm
) )
2505 radeon_error("Error assembling ALU instruction\n");
2510 if(pAsm
->D
.dst
.rtype
== DST_REG_OUT
)
2514 // There is no mask for OP3 instructions, so all channels are written
2515 pAsm
->pucOutMask
[pAsm
->D
.dst
.reg
- pAsm
->starting_export_register_number
] = 0xF;
2519 pAsm
->pucOutMask
[pAsm
->D
.dst
.reg
- pAsm
->starting_export_register_number
]
2520 |= (unsigned char)pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
;
2524 //reset for next inst.
2527 pAsm
->S
[0].bits
= 0;
2528 pAsm
->S
[1].bits
= 0;
2529 pAsm
->S
[2].bits
= 0;
2530 pAsm
->is_tex
= GL_FALSE
;
2531 pAsm
->need_tex_barrier
= GL_FALSE
;
2533 pAsm
->C
[0].bits
= pAsm
->C
[1].bits
= pAsm
->C
[2].bits
= pAsm
->C
[3].bits
= 0;
2537 GLboolean
assemble_math_function(r700_AssemblerBase
* pAsm
, BITS opcode
)
2543 tmp
= gethelpr(pAsm
);
2545 // opcode tmp.x, a.x
2548 pAsm
->D
.dst
.opcode
= opcode
;
2549 pAsm
->D
.dst
.math
= 1;
2551 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2552 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2553 pAsm
->D
.dst
.reg
= tmp
;
2554 pAsm
->D
.dst
.writex
= 1;
2556 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2561 if ( GL_FALSE
== next_ins(pAsm
) )
2566 // Now replicate result to all necessary channels in destination
2567 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
2569 if( GL_FALSE
== assemble_dst(pAsm
) )
2574 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2575 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
2576 pAsm
->S
[0].src
.reg
= tmp
;
2578 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
2579 noneg_PVSSRC(&(pAsm
->S
[0].src
));
2581 if( GL_FALSE
== next_ins(pAsm
) )
2589 GLboolean
assemble_ABS(r700_AssemblerBase
*pAsm
)
2593 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
2595 if( GL_FALSE
== assemble_dst(pAsm
) )
2599 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2604 pAsm
->S
[1].bits
= pAsm
->S
[0].bits
;
2605 flipneg_PVSSRC(&(pAsm
->S
[1].src
));
2607 if ( GL_FALSE
== next_ins(pAsm
) )
2615 GLboolean
assemble_ADD(r700_AssemblerBase
*pAsm
)
2617 if( GL_FALSE
== checkop2(pAsm
) )
2622 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
2624 if( GL_FALSE
== assemble_dst(pAsm
) )
2629 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2634 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
2639 if(pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_SUB
)
2641 flipneg_PVSSRC(&(pAsm
->S
[1].src
));
2644 if( GL_FALSE
== next_ins(pAsm
) )
2652 GLboolean
assemble_ARL(r700_AssemblerBase
*pAsm
)
2653 { /* TODO: ar values dont' persist between clauses */
2654 if( GL_FALSE
== checkop1(pAsm
) )
2659 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOVA_FLOOR
;
2660 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2661 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2662 pAsm
->D
.dst
.reg
= 0;
2663 pAsm
->D
.dst
.writex
= 0;
2664 pAsm
->D
.dst
.writey
= 0;
2665 pAsm
->D
.dst
.writez
= 0;
2666 pAsm
->D
.dst
.writew
= 0;
2668 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2673 if( GL_FALSE
== next_ins(pAsm
) )
2681 GLboolean
assemble_BAD(char *opcode_str
)
2683 radeon_error("Not yet implemented instruction (%s)\n", opcode_str
);
2687 GLboolean
assemble_CMP(r700_AssemblerBase
*pAsm
)
2691 if( GL_FALSE
== checkop3(pAsm
) )
2696 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_CNDGE
;
2697 pAsm
->D
.dst
.op3
= 1;
2701 if(0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
2703 //OP3 has no support for write mask
2704 tmp
= gethelpr(pAsm
);
2706 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2707 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2708 pAsm
->D
.dst
.reg
= tmp
;
2710 nomask_PVSDST(&(pAsm
->D
.dst
));
2714 if( GL_FALSE
== assemble_dst(pAsm
) )
2720 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2725 if( GL_FALSE
== assemble_src(pAsm
, 2, 1) )
2730 if( GL_FALSE
== assemble_src(pAsm
, 1, 2) )
2735 if ( GL_FALSE
== next_ins(pAsm
) )
2740 if (0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
2742 if( GL_FALSE
== assemble_dst(pAsm
) )
2747 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
2750 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2751 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
2752 pAsm
->S
[0].src
.reg
= tmp
;
2754 noneg_PVSSRC(&(pAsm
->S
[0].src
));
2755 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
2757 if( GL_FALSE
== next_ins(pAsm
) )
2766 GLboolean
assemble_TRIG(r700_AssemblerBase
*pAsm
, BITS opcode
)
2771 tmp
= gethelpr(pAsm
);
2773 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
2774 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2775 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2776 pAsm
->D
.dst
.reg
= tmp
;
2777 pAsm
->D
.dst
.writex
= 1;
2779 assemble_src(pAsm
, 0, -1);
2781 pAsm
->S
[1].src
.rtype
= SRC_REC_LITERAL
;
2782 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_X
);
2783 pAsm
->D2
.dst2
.literal_slots
= 1;
2784 pAsm
->C
[0].f
= 1/(3.1415926535 * 2);
2785 pAsm
->C
[1].f
= 0.0F
;
2788 pAsm
->D
.dst
.opcode
= opcode
;
2789 pAsm
->D
.dst
.math
= 1;
2793 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2794 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
2795 pAsm
->S
[0].src
.reg
= tmp
;
2796 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
2797 noneg_PVSSRC(&(pAsm
->S
[0].src
));
2801 //TODO - replicate if more channels set in WriteMask
2806 GLboolean
assemble_DOT(r700_AssemblerBase
*pAsm
)
2808 if( GL_FALSE
== checkop2(pAsm
) )
2813 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_DOT4
;
2815 if( GL_FALSE
== assemble_dst(pAsm
) )
2820 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2825 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
2830 if(OPCODE_DP3
== pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
)
2832 zerocomp_PVSSRC(&(pAsm
->S
[0].src
), 3);
2833 zerocomp_PVSSRC(&(pAsm
->S
[1].src
), 3);
2835 else if(pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_DPH
)
2837 onecomp_PVSSRC(&(pAsm
->S
[0].src
), 3);
2840 if ( GL_FALSE
== next_ins(pAsm
) )
2848 GLboolean
assemble_DST(r700_AssemblerBase
*pAsm
)
2850 if( GL_FALSE
== checkop2(pAsm
) )
2855 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
2857 if( GL_FALSE
== assemble_dst(pAsm
) )
2862 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2867 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
2872 onecomp_PVSSRC(&(pAsm
->S
[0].src
), 0);
2873 onecomp_PVSSRC(&(pAsm
->S
[0].src
), 3);
2875 onecomp_PVSSRC(&(pAsm
->S
[1].src
), 0);
2876 onecomp_PVSSRC(&(pAsm
->S
[1].src
), 2);
2878 if ( GL_FALSE
== next_ins(pAsm
) )
2886 GLboolean
assemble_EX2(r700_AssemblerBase
*pAsm
)
2888 return assemble_math_function(pAsm
, SQ_OP2_INST_EXP_IEEE
);
2891 GLboolean
assemble_EXP(r700_AssemblerBase
*pAsm
)
2897 tmp
= gethelpr(pAsm
);
2902 if (pAsm
->pILInst
->DstReg
.WriteMask
& 0x1) {
2903 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FLOOR
;
2905 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2906 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2907 pAsm
->D
.dst
.reg
= tmp
;
2908 pAsm
->D
.dst
.writex
= 1;
2910 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2915 if( GL_FALSE
== next_ins(pAsm
) )
2920 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
2921 pAsm
->D
.dst
.math
= 1;
2923 if( GL_FALSE
== assemble_dst(pAsm
) )
2928 pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writez
= pAsm
->D
.dst
.writew
= 0;
2930 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2931 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
2932 pAsm
->S
[0].src
.reg
= tmp
;
2934 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
2935 noneg_PVSSRC(&(pAsm
->S
[0].src
));
2937 if( GL_FALSE
== next_ins(pAsm
) )
2945 if ((pAsm
->pILInst
->DstReg
.WriteMask
>> 1) & 0x1) {
2946 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FRACT
;
2948 if( GL_FALSE
== assemble_dst(pAsm
) )
2953 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2958 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writez
= pAsm
->D
.dst
.writew
= 0;
2960 if( GL_FALSE
== next_ins(pAsm
) )
2968 if ((pAsm
->pILInst
->DstReg
.WriteMask
>> 2) & 0x1) {
2969 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
2970 pAsm
->D
.dst
.math
= 1;
2972 if( GL_FALSE
== assemble_dst(pAsm
) )
2977 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2982 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writew
= 0;
2984 if( GL_FALSE
== next_ins(pAsm
) )
2992 if ((pAsm
->pILInst
->DstReg
.WriteMask
>> 3) & 0x1) {
2993 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
2995 if( GL_FALSE
== assemble_dst(pAsm
) )
3000 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writez
= 0;
3002 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3003 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3004 pAsm
->S
[0].src
.reg
= tmp
;
3006 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_1
);
3007 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3009 if( GL_FALSE
== next_ins(pAsm
) )
3018 GLboolean
assemble_FLR(r700_AssemblerBase
*pAsm
)
3022 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FLOOR
;
3024 if ( GL_FALSE
== assemble_dst(pAsm
) )
3029 if ( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3034 if ( GL_FALSE
== next_ins(pAsm
) )
3042 GLboolean
assemble_FLR_INT(r700_AssemblerBase
*pAsm
)
3044 return assemble_math_function(pAsm
, SQ_OP2_INST_FLT_TO_INT
);
3047 GLboolean
assemble_FRC(r700_AssemblerBase
*pAsm
)
3051 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FRACT
;
3053 if ( GL_FALSE
== assemble_dst(pAsm
) )
3058 if ( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3063 if ( GL_FALSE
== next_ins(pAsm
) )
3071 GLboolean
assemble_KIL(r700_AssemblerBase
*pAsm
, GLuint opcode
)
3073 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
3075 if(pILInst
->Opcode
== OPCODE_KIL
)
3078 pAsm
->D
.dst
.opcode
= opcode
;
3079 //pAsm->D.dst.math = 1;
3081 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3082 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3083 pAsm
->D
.dst
.reg
= 0;
3084 pAsm
->D
.dst
.writex
= 0;
3085 pAsm
->D
.dst
.writey
= 0;
3086 pAsm
->D
.dst
.writez
= 0;
3087 pAsm
->D
.dst
.writew
= 0;
3089 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3090 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3091 pAsm
->S
[0].src
.reg
= 0;
3092 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_0
);
3093 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3095 if(pILInst
->Opcode
== OPCODE_KIL_NV
)
3097 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
3098 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
3099 pAsm
->S
[1].src
.reg
= 0;
3100 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_1
);
3101 neg_PVSSRC(&(pAsm
->S
[1].src
));
3105 if( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
3112 if ( GL_FALSE
== next_ins(pAsm
) )
3117 /* Doc says KILL has to be last(end) ALU clause */
3118 pAsm
->pR700Shader
->killIsUsed
= GL_TRUE
;
3119 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
3124 GLboolean
assemble_LG2(r700_AssemblerBase
*pAsm
)
3126 return assemble_math_function(pAsm
, SQ_OP2_INST_LOG_IEEE
);
3129 GLboolean
assemble_LRP(r700_AssemblerBase
*pAsm
)
3133 if( GL_FALSE
== checkop3(pAsm
) )
3138 tmp
= gethelpr(pAsm
);
3140 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
3142 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3143 pAsm
->D
.dst
.reg
= tmp
;
3144 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3145 nomask_PVSDST(&(pAsm
->D
.dst
));
3148 if( GL_FALSE
== assemble_src(pAsm
, 1, 0) )
3153 if ( GL_FALSE
== assemble_src(pAsm
, 2, 1) )
3158 neg_PVSSRC(&(pAsm
->S
[1].src
));
3160 if( GL_FALSE
== next_ins(pAsm
) )
3165 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
3166 pAsm
->D
.dst
.op3
= 1;
3168 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3169 pAsm
->D
.dst
.reg
= tmp
;
3170 nomask_PVSDST(&(pAsm
->D
.dst
));
3171 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3173 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3174 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3175 pAsm
->S
[0].src
.reg
= tmp
;
3176 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3179 if( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
3184 if( GL_FALSE
== assemble_src(pAsm
, 2, -1) )
3189 if( GL_FALSE
== next_ins(pAsm
) )
3194 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3196 if( GL_FALSE
== assemble_dst(pAsm
) )
3201 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3202 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3203 pAsm
->S
[0].src
.reg
= tmp
;
3204 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3206 if( GL_FALSE
== next_ins(pAsm
) )
3214 GLboolean
assemble_LOG(r700_AssemblerBase
*pAsm
)
3216 BITS tmp1
, tmp2
, tmp3
;
3220 tmp1
= gethelpr(pAsm
);
3221 tmp2
= gethelpr(pAsm
);
3222 tmp3
= gethelpr(pAsm
);
3224 // FIXME: The hardware can do fabs() directly on input
3225 // elements, but the compiler doesn't have the
3226 // capability to use that.
3228 // MAX tmp1.x, a.x, -a.x (fabs(a.x))
3230 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
3232 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3233 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3234 pAsm
->D
.dst
.reg
= tmp1
;
3235 pAsm
->D
.dst
.writex
= 1;
3237 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3242 pAsm
->S
[1].bits
= pAsm
->S
[0].bits
;
3243 flipneg_PVSSRC(&(pAsm
->S
[1].src
));
3245 if ( GL_FALSE
== next_ins(pAsm
) )
3252 // LG2 tmp2.x, tmp1.x
3253 // FLOOR tmp3.x, tmp2.x
3254 // MOV dst.x, tmp3.x
3255 // ADD tmp3.x, tmp2.x, -tmp3.x
3256 // EX2 dst.y, tmp3.x
3257 // MOV dst.z, tmp2.x
3260 // LG2 tmp2.x, tmp1.x
3261 // FLOOR tmp3.x, tmp2.x
3263 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_LOG_IEEE
;
3264 pAsm
->D
.dst
.math
= 1;
3266 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3267 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3268 pAsm
->D
.dst
.reg
= tmp2
;
3269 pAsm
->D
.dst
.writex
= 1;
3271 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3272 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3273 pAsm
->S
[0].src
.reg
= tmp1
;
3275 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3276 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3278 if( GL_FALSE
== next_ins(pAsm
) )
3283 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FLOOR
;
3285 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3286 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3287 pAsm
->D
.dst
.reg
= tmp3
;
3288 pAsm
->D
.dst
.writex
= 1;
3290 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3291 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3292 pAsm
->S
[0].src
.reg
= tmp2
;
3294 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3295 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3297 if( GL_FALSE
== next_ins(pAsm
) )
3302 // MOV dst.x, tmp3.x
3304 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3306 if( GL_FALSE
== assemble_dst(pAsm
) )
3311 pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writez
= pAsm
->D
.dst
.writew
= 0;
3313 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3314 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3315 pAsm
->S
[0].src
.reg
= tmp3
;
3317 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3318 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3320 if( GL_FALSE
== next_ins(pAsm
) )
3325 // ADD tmp3.x, tmp2.x, -tmp3.x
3326 // EX2 dst.y, tmp3.x
3328 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
3330 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3331 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3332 pAsm
->D
.dst
.reg
= tmp3
;
3333 pAsm
->D
.dst
.writex
= 1;
3335 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3336 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3337 pAsm
->S
[0].src
.reg
= tmp2
;
3339 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3340 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3342 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
3343 pAsm
->S
[1].src
.rtype
= DST_REG_TEMPORARY
;
3344 pAsm
->S
[1].src
.reg
= tmp3
;
3346 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_X
);
3347 neg_PVSSRC(&(pAsm
->S
[1].src
));
3349 if( GL_FALSE
== next_ins(pAsm
) )
3354 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
3355 pAsm
->D
.dst
.math
= 1;
3357 if( GL_FALSE
== assemble_dst(pAsm
) )
3362 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writez
= pAsm
->D
.dst
.writew
= 0;
3364 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3365 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3366 pAsm
->S
[0].src
.reg
= tmp3
;
3368 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3369 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3371 if( GL_FALSE
== next_ins(pAsm
) )
3376 // MOV dst.z, tmp2.x
3378 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3380 if( GL_FALSE
== assemble_dst(pAsm
) )
3385 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writew
= 0;
3387 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3388 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3389 pAsm
->S
[0].src
.reg
= tmp2
;
3391 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3392 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3394 if( GL_FALSE
== next_ins(pAsm
) )
3401 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3403 if( GL_FALSE
== assemble_dst(pAsm
) )
3408 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writez
= 0;
3410 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3411 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3412 pAsm
->S
[0].src
.reg
= tmp1
;
3414 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_1
);
3415 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3417 if( GL_FALSE
== next_ins(pAsm
) )
3425 GLboolean
assemble_MAD(struct r700_AssemblerBase
*pAsm
)
3428 GLboolean bReplaceDst
= GL_FALSE
;
3429 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
3431 if( GL_FALSE
== checkop3(pAsm
) )
3436 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
3437 pAsm
->D
.dst
.op3
= 1;
3441 if(PROGRAM_TEMPORARY
== pILInst
->DstReg
.File
)
3442 { /* TODO : more investigation on MAD src and dst using same register */
3443 for(ii
=0; ii
<3; ii
++)
3445 if( (PROGRAM_TEMPORARY
== pILInst
->SrcReg
[ii
].File
)
3446 && (pILInst
->DstReg
.Index
== pILInst
->SrcReg
[ii
].Index
) )
3448 bReplaceDst
= GL_TRUE
;
3453 if(0xF != pILInst
->DstReg
.WriteMask
)
3454 { /* OP3 has no support for write mask */
3455 bReplaceDst
= GL_TRUE
;
3458 if(GL_TRUE
== bReplaceDst
)
3460 tmp
= gethelpr(pAsm
);
3462 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3463 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3464 pAsm
->D
.dst
.reg
= tmp
;
3466 nomask_PVSDST(&(pAsm
->D
.dst
));
3470 if( GL_FALSE
== assemble_dst(pAsm
) )
3476 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3481 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3486 if( GL_FALSE
== assemble_src(pAsm
, 2, -1) )
3491 if ( GL_FALSE
== next_ins(pAsm
) )
3496 if (GL_TRUE
== bReplaceDst
)
3498 if( GL_FALSE
== assemble_dst(pAsm
) )
3503 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3506 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3507 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3508 pAsm
->S
[0].src
.reg
= tmp
;
3510 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3511 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3513 if( GL_FALSE
== next_ins(pAsm
) )
3523 GLboolean
assemble_LIT(r700_AssemblerBase
*pAsm
)
3525 unsigned int dstReg
;
3526 unsigned int dstType
;
3527 unsigned int srcReg
;
3528 unsigned int srcType
;
3530 int tmp
= gethelpr(pAsm
);
3532 if( GL_FALSE
== assemble_dst(pAsm
) )
3536 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3540 dstReg
= pAsm
->D
.dst
.reg
;
3541 dstType
= pAsm
->D
.dst
.rtype
;
3542 srcReg
= pAsm
->S
[0].src
.reg
;
3543 srcType
= pAsm
->S
[0].src
.rtype
;
3545 /* dst.xw, <- 1.0 */
3546 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3547 pAsm
->D
.dst
.rtype
= dstType
;
3548 pAsm
->D
.dst
.reg
= dstReg
;
3549 pAsm
->D
.dst
.writex
= 1;
3550 pAsm
->D
.dst
.writey
= 0;
3551 pAsm
->D
.dst
.writez
= 0;
3552 pAsm
->D
.dst
.writew
= 1;
3553 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3554 pAsm
->S
[0].src
.reg
= tmp
;
3555 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3556 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3557 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_1
;
3558 pAsm
->S
[0].src
.swizzley
= SQ_SEL_1
;
3559 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_1
;
3560 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_1
;
3561 if( GL_FALSE
== next_ins(pAsm
) )
3566 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3571 /* dst.y = max(src.x, 0.0) */
3572 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
3573 pAsm
->D
.dst
.rtype
= dstType
;
3574 pAsm
->D
.dst
.reg
= dstReg
;
3575 pAsm
->D
.dst
.writex
= 0;
3576 pAsm
->D
.dst
.writey
= 1;
3577 pAsm
->D
.dst
.writez
= 0;
3578 pAsm
->D
.dst
.writew
= 0;
3579 pAsm
->S
[0].src
.rtype
= srcType
;
3580 pAsm
->S
[0].src
.reg
= srcReg
;
3581 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3582 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
);
3583 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
3584 pAsm
->S
[1].src
.reg
= tmp
;
3585 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
3586 noneg_PVSSRC(&(pAsm
->S
[1].src
));
3587 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_0
;
3588 pAsm
->S
[1].src
.swizzley
= SQ_SEL_0
;
3589 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_0
;
3590 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_0
;
3591 if( GL_FALSE
== next_ins(pAsm
) )
3596 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3601 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Y
, SQ_SEL_Y
, SQ_SEL_Y
, SQ_SEL_Y
);
3603 /* dst.z = log(src.y) */
3604 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_LOG_CLAMPED
;
3605 pAsm
->D
.dst
.math
= 1;
3606 pAsm
->D
.dst
.rtype
= dstType
;
3607 pAsm
->D
.dst
.reg
= dstReg
;
3608 pAsm
->D
.dst
.writex
= 0;
3609 pAsm
->D
.dst
.writey
= 0;
3610 pAsm
->D
.dst
.writez
= 1;
3611 pAsm
->D
.dst
.writew
= 0;
3612 pAsm
->S
[0].src
.rtype
= srcType
;
3613 pAsm
->S
[0].src
.reg
= srcReg
;
3614 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3615 if( GL_FALSE
== next_ins(pAsm
) )
3620 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3625 if( GL_FALSE
== assemble_src(pAsm
, 0, 2) )
3630 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
);
3632 swizzleagain_PVSSRC(&(pAsm
->S
[2].src
), SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
);
3634 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
3635 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MUL_LIT
;
3636 pAsm
->D
.dst
.math
= 1;
3637 pAsm
->D
.dst
.op3
= 1;
3638 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3639 pAsm
->D
.dst
.reg
= tmp
;
3640 pAsm
->D
.dst
.writex
= 1;
3641 pAsm
->D
.dst
.writey
= 0;
3642 pAsm
->D
.dst
.writez
= 0;
3643 pAsm
->D
.dst
.writew
= 0;
3645 pAsm
->S
[0].src
.rtype
= srcType
;
3646 pAsm
->S
[0].src
.reg
= srcReg
;
3647 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3649 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
3650 pAsm
->S
[1].src
.reg
= dstReg
;
3651 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
3652 noneg_PVSSRC(&(pAsm
->S
[1].src
));
3653 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_Z
;
3654 pAsm
->S
[1].src
.swizzley
= SQ_SEL_Z
;
3655 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_Z
;
3656 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_Z
;
3658 pAsm
->S
[2].src
.rtype
= srcType
;
3659 pAsm
->S
[2].src
.reg
= srcReg
;
3660 setaddrmode_PVSSRC(&(pAsm
->S
[2].src
), ADDR_ABSOLUTE
);
3662 if( GL_FALSE
== next_ins(pAsm
) )
3667 /* dst.z = exp(tmp.x) */
3668 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
3669 pAsm
->D
.dst
.math
= 1;
3670 pAsm
->D
.dst
.rtype
= dstType
;
3671 pAsm
->D
.dst
.reg
= dstReg
;
3672 pAsm
->D
.dst
.writex
= 0;
3673 pAsm
->D
.dst
.writey
= 0;
3674 pAsm
->D
.dst
.writez
= 1;
3675 pAsm
->D
.dst
.writew
= 0;
3677 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3678 pAsm
->S
[0].src
.reg
= tmp
;
3679 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3680 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3681 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_X
;
3682 pAsm
->S
[0].src
.swizzley
= SQ_SEL_X
;
3683 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_X
;
3684 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_X
;
3686 if( GL_FALSE
== next_ins(pAsm
) )
3694 GLboolean
assemble_MAX(r700_AssemblerBase
*pAsm
)
3696 if( GL_FALSE
== checkop2(pAsm
) )
3701 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
3703 if( GL_FALSE
== assemble_dst(pAsm
) )
3708 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3713 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3718 if( GL_FALSE
== next_ins(pAsm
) )
3726 GLboolean
assemble_MIN(r700_AssemblerBase
*pAsm
)
3728 if( GL_FALSE
== checkop2(pAsm
) )
3733 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MIN
;
3735 if( GL_FALSE
== assemble_dst(pAsm
) )
3740 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3745 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3750 if( GL_FALSE
== next_ins(pAsm
) )
3758 GLboolean
assemble_MOV(r700_AssemblerBase
*pAsm
)
3762 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3764 if (GL_FALSE
== assemble_dst(pAsm
))
3769 if (GL_FALSE
== assemble_src(pAsm
, 0, -1))
3774 if ( GL_FALSE
== next_ins(pAsm
) )
3782 GLboolean
assemble_MUL(r700_AssemblerBase
*pAsm
)
3784 if( GL_FALSE
== checkop2(pAsm
) )
3789 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
3791 if( GL_FALSE
== assemble_dst(pAsm
) )
3796 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3801 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3806 if( GL_FALSE
== next_ins(pAsm
) )
3814 GLboolean
assemble_POW(r700_AssemblerBase
*pAsm
)
3820 tmp
= gethelpr(pAsm
);
3822 // LG2 tmp.x, a.swizzle
3823 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_LOG_IEEE
;
3824 pAsm
->D
.dst
.math
= 1;
3826 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3827 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3828 pAsm
->D
.dst
.reg
= tmp
;
3829 nomask_PVSDST(&(pAsm
->D
.dst
));
3831 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3836 if( GL_FALSE
== next_ins(pAsm
) )
3841 // MUL tmp.x, tmp.x, b.swizzle
3842 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
3844 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3845 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3846 pAsm
->D
.dst
.reg
= tmp
;
3847 nomask_PVSDST(&(pAsm
->D
.dst
));
3849 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3850 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3851 pAsm
->S
[0].src
.reg
= tmp
;
3852 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3853 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3855 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3860 if( GL_FALSE
== next_ins(pAsm
) )
3865 // EX2 dst.mask, tmp.x
3867 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
3868 pAsm
->D
.dst
.math
= 1;
3870 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3871 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3872 pAsm
->D
.dst
.reg
= tmp
;
3873 nomask_PVSDST(&(pAsm
->D
.dst
));
3875 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3876 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3877 pAsm
->S
[0].src
.reg
= tmp
;
3878 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3879 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3881 if( GL_FALSE
== next_ins(pAsm
) )
3886 // Now replicate result to all necessary channels in destination
3887 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3889 if( GL_FALSE
== assemble_dst(pAsm
) )
3894 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3895 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3896 pAsm
->S
[0].src
.reg
= tmp
;
3898 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3899 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3901 if( GL_FALSE
== next_ins(pAsm
) )
3909 GLboolean
assemble_RCP(r700_AssemblerBase
*pAsm
)
3911 return assemble_math_function(pAsm
, SQ_OP2_INST_RECIP_IEEE
);
3914 GLboolean
assemble_RSQ(r700_AssemblerBase
*pAsm
)
3916 return assemble_math_function(pAsm
, SQ_OP2_INST_RECIPSQRT_IEEE
);
3919 GLboolean
assemble_SCS(r700_AssemblerBase
*pAsm
)
3925 tmp
= gethelpr(pAsm
);
3928 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_COS
;
3929 pAsm
->D
.dst
.math
= 1;
3931 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3932 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3933 pAsm
->D
.dst
.reg
= tmp
;
3934 pAsm
->D
.dst
.writex
= 1;
3936 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3941 if ( GL_FALSE
== next_ins(pAsm
) )
3947 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_SIN
;
3948 pAsm
->D
.dst
.math
= 1;
3950 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3951 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3952 pAsm
->D
.dst
.reg
= tmp
;
3953 pAsm
->D
.dst
.writey
= 1;
3955 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3960 if( GL_FALSE
== next_ins(pAsm
) )
3965 // MOV dst.mask, tmp
3966 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3968 if( GL_FALSE
== assemble_dst(pAsm
) )
3973 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3974 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3975 pAsm
->S
[0].src
.reg
= tmp
;
3977 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3978 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_0
;
3979 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_0
;
3981 if ( GL_FALSE
== next_ins(pAsm
) )
3989 GLboolean
assemble_LOGIC(r700_AssemblerBase
*pAsm
, BITS opcode
)
3991 if( GL_FALSE
== checkop2(pAsm
) )
3996 pAsm
->D
.dst
.opcode
= opcode
;
3997 //pAsm->D.dst.math = 1;
3999 if( GL_FALSE
== assemble_dst(pAsm
) )
4004 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4009 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4014 if( GL_FALSE
== next_ins(pAsm
) )
4022 GLboolean
assemble_LOGIC_PRED(r700_AssemblerBase
*pAsm
, BITS opcode
)
4024 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
4026 pAsm
->D
.dst
.opcode
= opcode
;
4027 pAsm
->D
.dst
.math
= 1;
4028 pAsm
->D
.dst
.predicated
= 1;
4030 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4031 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4032 pAsm
->D
.dst
.reg
= pAsm
->uHelpReg
;
4033 pAsm
->D
.dst
.writex
= 1;
4034 pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writez
= pAsm
->D
.dst
.writew
= 0;
4036 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4037 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4038 pAsm
->S
[0].src
.reg
= pAsm
->last_cond_register
+ pAsm
->starting_temp_register_number
;
4039 pAsm
->S
[0].src
.swizzlex
= pILInst
->DstReg
.CondSwizzle
& 0x7;
4040 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4042 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
4043 pAsm
->S
[1].src
.reg
= pAsm
->uHelpReg
;
4044 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
4045 noneg_PVSSRC(&(pAsm
->S
[1].src
));
4046 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_0
;
4047 pAsm
->S
[1].src
.swizzley
= SQ_SEL_0
;
4048 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_0
;
4049 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_0
;
4051 if( GL_FALSE
== next_ins(pAsm
) )
4059 GLboolean
assemble_SGE(r700_AssemblerBase
*pAsm
)
4061 if( GL_FALSE
== checkop2(pAsm
) )
4066 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_SETGE
;
4068 if( GL_FALSE
== assemble_dst(pAsm
) )
4073 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4078 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4083 if( GL_FALSE
== next_ins(pAsm
) )
4091 GLboolean
assemble_SLT(r700_AssemblerBase
*pAsm
)
4093 if( GL_FALSE
== checkop2(pAsm
) )
4098 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_SETGT
;
4100 if( GL_FALSE
== assemble_dst(pAsm
) )
4105 if( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
4110 if( GL_FALSE
== assemble_src(pAsm
, 1, 0) )
4115 if( GL_FALSE
== next_ins(pAsm
) )
4123 GLboolean
assemble_STP(r700_AssemblerBase
*pAsm
)
4128 GLboolean
assemble_TEX(r700_AssemblerBase
*pAsm
)
4130 GLboolean src_const
;
4131 GLboolean need_barrier
= GL_FALSE
;
4135 switch (pAsm
->pILInst
[pAsm
->uiCurInst
].SrcReg
[0].File
)
4137 case PROGRAM_UNIFORM
:
4138 case PROGRAM_CONSTANT
:
4139 case PROGRAM_LOCAL_PARAM
:
4140 case PROGRAM_ENV_PARAM
:
4141 case PROGRAM_STATE_VAR
:
4142 src_const
= GL_TRUE
;
4144 case PROGRAM_TEMPORARY
:
4147 src_const
= GL_FALSE
;
4151 if (GL_TRUE
== src_const
)
4153 if ( GL_FALSE
== mov_temp(pAsm
, 0) )
4155 need_barrier
= GL_TRUE
;
4158 if (pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_TXP
)
4160 GLuint tmp
= gethelpr(pAsm
);
4161 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_RECIP_IEEE
;
4162 pAsm
->D
.dst
.math
= 1;
4163 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4164 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4165 pAsm
->D
.dst
.reg
= tmp
;
4166 pAsm
->D
.dst
.writew
= 1;
4168 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4172 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
);
4173 if( GL_FALSE
== next_ins(pAsm
) )
4178 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
4179 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4180 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4181 pAsm
->D
.dst
.reg
= tmp
;
4182 pAsm
->D
.dst
.writex
= 1;
4183 pAsm
->D
.dst
.writey
= 1;
4184 pAsm
->D
.dst
.writez
= 1;
4185 pAsm
->D
.dst
.writew
= 0;
4187 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4191 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
4192 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
4193 pAsm
->S
[1].src
.reg
= tmp
;
4194 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_W
);
4196 if( GL_FALSE
== next_ins(pAsm
) )
4201 pAsm
->aArgSubst
[1] = tmp
;
4202 need_barrier
= GL_TRUE
;
4205 if (pAsm
->pILInst
[pAsm
->uiCurInst
].TexSrcTarget
== TEXTURE_CUBE_INDEX
)
4207 GLuint tmp1
= gethelpr(pAsm
);
4208 GLuint tmp2
= gethelpr(pAsm
);
4210 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
4211 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_CUBE
;
4212 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4213 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4214 pAsm
->D
.dst
.reg
= tmp1
;
4215 nomask_PVSDST(&(pAsm
->D
.dst
));
4217 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4222 if( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
4227 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Z
, SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_Y
);
4228 swizzleagain_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Y
, SQ_SEL_X
, SQ_SEL_Z
, SQ_SEL_Z
);
4230 if( GL_FALSE
== next_ins(pAsm
) )
4235 /* tmp1.z = RCP_e(|tmp1.z|) */
4236 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_RECIP_IEEE
;
4237 pAsm
->D
.dst
.math
= 1;
4238 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4239 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4240 pAsm
->D
.dst
.reg
= tmp1
;
4241 pAsm
->D
.dst
.writez
= 1;
4243 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4244 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4245 pAsm
->S
[0].src
.reg
= tmp1
;
4246 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_Z
;
4247 pAsm
->S
[0].src
.abs
= 1;
4251 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
4252 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
4253 * muladd has no writemask, have to use another temp
4255 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
4256 pAsm
->D
.dst
.op3
= 1;
4257 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4258 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4259 pAsm
->D
.dst
.reg
= tmp2
;
4261 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4262 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4263 pAsm
->S
[0].src
.reg
= tmp1
;
4264 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
4265 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
4266 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
4267 pAsm
->S
[1].src
.reg
= tmp1
;
4268 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Z
);
4269 setaddrmode_PVSSRC(&(pAsm
->S
[2].src
), ADDR_ABSOLUTE
);
4270 /* immediate c 1.5 */
4271 pAsm
->D2
.dst2
.literal_slots
= 1;
4272 pAsm
->C
[0].f
= 1.5F
;
4273 pAsm
->S
[2].src
.rtype
= SRC_REC_LITERAL
;
4274 pAsm
->S
[2].src
.reg
= tmp1
;
4275 setswizzle_PVSSRC(&(pAsm
->S
[2].src
), SQ_SEL_X
);
4279 /* tmp1.xy = temp2.xy */
4280 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4281 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4282 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4283 pAsm
->D
.dst
.reg
= tmp1
;
4284 pAsm
->D
.dst
.writex
= 1;
4285 pAsm
->D
.dst
.writey
= 1;
4286 pAsm
->D
.dst
.writez
= 0;
4287 pAsm
->D
.dst
.writew
= 0;
4289 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4290 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4291 pAsm
->S
[0].src
.reg
= tmp2
;
4292 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
4295 pAsm
->aArgSubst
[1] = tmp1
;
4296 need_barrier
= GL_TRUE
;
4300 if(pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_TXB
)
4302 pAsm
->D
.dst
.opcode
= SQ_TEX_INST_SAMPLE_L
;
4306 pAsm
->D
.dst
.opcode
= SQ_TEX_INST_SAMPLE
;
4309 pAsm
->is_tex
= GL_TRUE
;
4310 if ( GL_TRUE
== need_barrier
)
4312 pAsm
->is_tex
= GL_TRUE
;
4313 if ( GL_TRUE
== need_barrier
)
4315 pAsm
->need_tex_barrier
= GL_TRUE
;
4317 // Set src1 to tex unit id
4318 pAsm
->S
[1].src
.reg
= pAsm
->SamplerUnits
[pAsm
->pILInst
[pAsm
->uiCurInst
].TexSrcUnit
];
4319 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
4321 //No sw info from mesa compiler, so hard code here.
4322 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_X
;
4323 pAsm
->S
[1].src
.swizzley
= SQ_SEL_Y
;
4324 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_Z
;
4325 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_W
;
4327 if( GL_FALSE
== tex_dst(pAsm
) )
4332 if( GL_FALSE
== tex_src(pAsm
) )
4337 if(pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_TXP
)
4339 /* hopefully did swizzles before */
4340 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
4343 if(pAsm
->pILInst
[pAsm
->uiCurInst
].TexSrcTarget
== TEXTURE_CUBE_INDEX
)
4345 /* SAMPLE dst, tmp.yxwy, CUBE */
4346 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_Y
;
4347 pAsm
->S
[0].src
.swizzley
= SQ_SEL_X
;
4348 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_W
;
4349 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_Y
;
4352 if ( GL_FALSE
== next_ins(pAsm
) )
4360 GLboolean
assemble_XPD(r700_AssemblerBase
*pAsm
)
4364 if( GL_FALSE
== checkop2(pAsm
) )
4369 tmp
= gethelpr(pAsm
);
4371 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
4373 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4374 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4375 pAsm
->D
.dst
.reg
= tmp
;
4376 nomask_PVSDST(&(pAsm
->D
.dst
));
4378 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4383 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4388 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_Y
, SQ_SEL_0
);
4389 swizzleagain_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Y
, SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_0
);
4391 if( GL_FALSE
== next_ins(pAsm
) )
4396 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
4397 pAsm
->D
.dst
.op3
= 1;
4399 if(0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
4401 tmp
= gethelpr(pAsm
);
4403 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4404 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4405 pAsm
->D
.dst
.reg
= tmp
;
4407 nomask_PVSDST(&(pAsm
->D
.dst
));
4411 if( GL_FALSE
== assemble_dst(pAsm
) )
4417 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4422 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4427 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Y
, SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_0
);
4428 swizzleagain_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_Y
, SQ_SEL_0
);
4430 // result1 + (neg) result0
4431 setaddrmode_PVSSRC(&(pAsm
->S
[2].src
),ADDR_ABSOLUTE
);
4432 pAsm
->S
[2].src
.rtype
= SRC_REG_TEMPORARY
;
4433 pAsm
->S
[2].src
.reg
= tmp
;
4435 neg_PVSSRC(&(pAsm
->S
[2].src
));
4436 noswizzle_PVSSRC(&(pAsm
->S
[2].src
));
4438 if( GL_FALSE
== next_ins(pAsm
) )
4444 if(0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
4446 if( GL_FALSE
== assemble_dst(pAsm
) )
4451 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4453 // Use tmp as source
4454 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4455 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4456 pAsm
->S
[0].src
.reg
= tmp
;
4458 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4459 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
4461 if( GL_FALSE
== next_ins(pAsm
) )
4470 GLboolean
assemble_EXPORT(r700_AssemblerBase
*pAsm
)
4475 static inline void decreaseCurrent(r700_AssemblerBase
*pAsm
, GLuint uReason
)
4480 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
--;
4483 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
-= 4;
4486 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
-= 4;
4489 /* TODO : for 16 vp asic, should -= 2; */
4490 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
-= 1;
4495 static inline void checkStackDepth(r700_AssemblerBase
*pAsm
, GLuint uReason
, GLboolean bCheckMaxOnly
)
4497 if(GL_TRUE
== bCheckMaxOnly
)
4502 if((pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+ 1)
4503 > pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
)
4505 pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
=
4506 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+ 1;
4510 if((pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+ 4)
4511 > pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
)
4513 pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
=
4514 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+ 4;
4524 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
++;
4527 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+= 4;
4530 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+= 4;
4533 /* TODO : for 16 vp asic, should += 2; */
4534 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+= 1;
4538 if(pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
4539 > pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
)
4541 pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
=
4542 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
;
4546 GLboolean
jumpToOffest(r700_AssemblerBase
*pAsm
, GLuint pops
, GLint offset
)
4548 if(GL_FALSE
== add_cf_instruction(pAsm
) )
4553 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= pops
;
4554 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
4555 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
4557 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
4558 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
4559 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_JUMP
;
4560 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
4562 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
4564 pAsm
->cf_current_cf_clause_ptr
->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
+ offset
;
4569 GLboolean
pops(r700_AssemblerBase
*pAsm
, GLuint pops
)
4571 if(GL_FALSE
== add_cf_instruction(pAsm
) )
4576 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= pops
;
4577 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
4578 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
4580 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
4581 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
4582 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_POP
;
4584 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
4586 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
4587 pAsm
->cf_current_cf_clause_ptr
->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
+ 1;
4592 GLboolean
assemble_IF(r700_AssemblerBase
*pAsm
, GLboolean bHasElse
)
4594 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
4596 assemble_LOGIC_PRED(pAsm
, SQ_OP2_INST_PRED_SETNE
);
4599 if(GL_FALSE
== add_cf_instruction(pAsm
) )
4604 if(GL_TRUE
!= bHasElse
)
4606 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
4610 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 0;
4612 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
4613 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
4615 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
4616 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
4617 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_JUMP
;
4618 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
4620 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
4623 pAsm
->fc_stack
[pAsm
->FCSP
].type
= FC_IF
;
4624 pAsm
->fc_stack
[pAsm
->FCSP
].mid
= NULL
;
4625 pAsm
->fc_stack
[pAsm
->FCSP
].midLen
= 0;
4626 pAsm
->fc_stack
[pAsm
->FCSP
].first
= pAsm
->cf_current_cf_clause_ptr
;
4628 #ifndef USE_CF_FOR_POP_AFTER
4629 if(GL_TRUE
!= bHasElse
)
4631 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU_POP_AFTER
;
4633 #endif /* USE_CF_FOR_POP_AFTER */
4635 checkStackDepth(pAsm
, FC_PUSH_VPM
, GL_FALSE
);
4640 GLboolean
assemble_ELSE(r700_AssemblerBase
*pAsm
)
4642 if(GL_FALSE
== add_cf_instruction(pAsm
) )
4647 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1; ///
4648 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
4649 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
4651 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
4652 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
4653 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_ELSE
;
4654 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
4656 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
4658 pAsm
->fc_stack
[pAsm
->FCSP
].mid
= (R700ControlFlowGenericClause
**)_mesa_realloc( (void *)pAsm
->fc_stack
[pAsm
->FCSP
].mid
,
4660 sizeof(R700ControlFlowGenericClause
*) );
4661 pAsm
->fc_stack
[pAsm
->FCSP
].mid
[0] = pAsm
->cf_current_cf_clause_ptr
;
4662 //pAsm->fc_stack[pAsm->FCSP].unNumMid = 1;
4664 #ifndef USE_CF_FOR_POP_AFTER
4665 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU_POP_AFTER
;
4666 #endif /* USE_CF_FOR_POP_AFTER */
4668 pAsm
->fc_stack
[pAsm
->FCSP
].first
->m_Word0
.f
.addr
= pAsm
->pR700Shader
->plstCFInstructions_active
->uNumOfNode
- 1;
4673 GLboolean
assemble_ENDIF(r700_AssemblerBase
*pAsm
)
4675 #ifdef USE_CF_FOR_POP_AFTER
4677 #endif /* USE_CF_FOR_POP_AFTER */
4679 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
4681 if(NULL
== pAsm
->fc_stack
[pAsm
->FCSP
].mid
)
4683 /* no else in between */
4684 pAsm
->fc_stack
[pAsm
->FCSP
].first
->m_Word0
.f
.addr
= pAsm
->pR700Shader
->plstCFInstructions_active
->uNumOfNode
;
4688 pAsm
->fc_stack
[pAsm
->FCSP
].mid
[0]->m_Word0
.f
.addr
= pAsm
->pR700Shader
->plstCFInstructions_active
->uNumOfNode
;
4691 if(NULL
!= pAsm
->fc_stack
[pAsm
->FCSP
].mid
)
4693 FREE(pAsm
->fc_stack
[pAsm
->FCSP
].mid
);
4696 if(pAsm
->fc_stack
[pAsm
->FCSP
].type
!= FC_IF
)
4698 radeon_error("if/endif in shader code are not paired. \n");
4704 decreaseCurrent(pAsm
, FC_PUSH_VPM
);
4709 GLboolean
assemble_BGNLOOP(r700_AssemblerBase
*pAsm
)
4711 if(GL_FALSE
== add_cf_instruction(pAsm
) )
4717 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 0;
4718 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
4719 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
4721 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
4722 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
4723 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_LOOP_START_NO_AL
;
4724 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
4726 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
4729 pAsm
->fc_stack
[pAsm
->FCSP
].type
= FC_LOOP
;
4730 pAsm
->fc_stack
[pAsm
->FCSP
].mid
= NULL
;
4731 pAsm
->fc_stack
[pAsm
->FCSP
].unNumMid
= 0;
4732 pAsm
->fc_stack
[pAsm
->FCSP
].midLen
= 0;
4733 pAsm
->fc_stack
[pAsm
->FCSP
].first
= pAsm
->cf_current_cf_clause_ptr
;
4735 checkStackDepth(pAsm
, FC_LOOP
, GL_FALSE
);
4740 GLboolean
assemble_BRK(r700_AssemblerBase
*pAsm
)
4742 #ifdef USE_CF_FOR_CONTINUE_BREAK
4744 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
4746 assemble_LOGIC_PRED(pAsm
, SQ_OP2_INST_PRED_SETNE
);
4748 unsigned int unFCSP
;
4749 for(unFCSP
=pAsm
->FCSP
; unFCSP
>0; unFCSP
--)
4751 if(FC_LOOP
== pAsm
->fc_stack
[unFCSP
].type
)
4758 radeon_error("Break is not inside loop/endloop pair.\n");
4762 if(GL_FALSE
== add_cf_instruction(pAsm
) )
4768 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
4769 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
4770 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
4772 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
4773 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
4774 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_LOOP_BREAK
;
4776 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
4778 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
4780 pAsm
->fc_stack
[unFCSP
].mid
= (R700ControlFlowGenericClause
**)_mesa_realloc(
4781 (void *)pAsm
->fc_stack
[unFCSP
].mid
,
4782 sizeof(R700ControlFlowGenericClause
*) * pAsm
->fc_stack
[unFCSP
].unNumMid
,
4783 sizeof(R700ControlFlowGenericClause
*) * (pAsm
->fc_stack
[unFCSP
].unNumMid
+ 1) );
4784 pAsm
->fc_stack
[unFCSP
].mid
[pAsm
->fc_stack
[unFCSP
].unNumMid
] = pAsm
->cf_current_cf_clause_ptr
;
4785 pAsm
->fc_stack
[unFCSP
].unNumMid
++;
4787 if(GL_FALSE
== add_cf_instruction(pAsm
) )
4792 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
4793 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
4794 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
4796 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
4797 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
4798 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_POP
;
4800 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
4802 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
4803 pAsm
->cf_current_cf_clause_ptr
->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
+ 1;
4805 checkStackDepth(pAsm
, FC_PUSH_VPM
, GL_TRUE
);
4807 #endif //USE_CF_FOR_CONTINUE_BREAK
4811 GLboolean
assemble_CONT(r700_AssemblerBase
*pAsm
)
4813 #ifdef USE_CF_FOR_CONTINUE_BREAK
4814 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
4816 assemble_LOGIC_PRED(pAsm
, SQ_OP2_INST_PRED_SETNE
);
4818 unsigned int unFCSP
;
4819 for(unFCSP
=pAsm
->FCSP
; unFCSP
>0; unFCSP
--)
4821 if(FC_LOOP
== pAsm
->fc_stack
[unFCSP
].type
)
4828 radeon_error("Continue is not inside loop/endloop pair.\n");
4832 if(GL_FALSE
== add_cf_instruction(pAsm
) )
4838 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
4839 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
4840 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
4842 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
4843 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
4844 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_LOOP_CONTINUE
;
4846 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
4848 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
4850 pAsm
->fc_stack
[unFCSP
].mid
= (R700ControlFlowGenericClause
**)_mesa_realloc(
4851 (void *)pAsm
->fc_stack
[unFCSP
].mid
,
4852 sizeof(R700ControlFlowGenericClause
*) * pAsm
->fc_stack
[unFCSP
].unNumMid
,
4853 sizeof(R700ControlFlowGenericClause
*) * (pAsm
->fc_stack
[unFCSP
].unNumMid
+ 1) );
4854 pAsm
->fc_stack
[unFCSP
].mid
[pAsm
->fc_stack
[unFCSP
].unNumMid
] = pAsm
->cf_current_cf_clause_ptr
;
4855 pAsm
->fc_stack
[unFCSP
].unNumMid
++;
4857 if(GL_FALSE
== add_cf_instruction(pAsm
) )
4862 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
4863 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
4864 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
4866 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
4867 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
4868 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_POP
;
4870 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
4872 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
4873 pAsm
->cf_current_cf_clause_ptr
->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
+ 1;
4875 checkStackDepth(pAsm
, FC_PUSH_VPM
, GL_TRUE
);
4877 #endif /* USE_CF_FOR_CONTINUE_BREAK */
4882 GLboolean
assemble_ENDLOOP(r700_AssemblerBase
*pAsm
)
4886 if(GL_FALSE
== add_cf_instruction(pAsm
) )
4892 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 0;
4893 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
4894 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
4896 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
4897 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
4898 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_LOOP_END
;
4899 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
4901 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
4903 pAsm
->cf_current_cf_clause_ptr
->m_Word0
.f
.addr
= pAsm
->fc_stack
[pAsm
->FCSP
].first
->m_uIndex
+ 1;
4904 pAsm
->fc_stack
[pAsm
->FCSP
].first
->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
+ 1;
4906 #ifdef USE_CF_FOR_CONTINUE_BREAK
4907 for(i
=0; i
<pAsm
->fc_stack
[pAsm
->FCSP
].unNumMid
; i
++)
4909 pAsm
->fc_stack
[pAsm
->FCSP
].mid
[i
]->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
;
4911 if(NULL
!= pAsm
->fc_stack
[pAsm
->FCSP
].mid
)
4913 FREE(pAsm
->fc_stack
[pAsm
->FCSP
].mid
);
4917 if(pAsm
->fc_stack
[pAsm
->FCSP
].type
!= FC_LOOP
)
4919 radeon_error("loop/endloop in shader code are not paired. \n");
4925 if((pAsm
->unCFflags
& HAS_CURRENT_LOOPRET
) > 0)
4927 for(unFCSP
=(pAsm
->FCSP
-1); unFCSP
>pAsm
->CALLSTACK
[pAsm
->CALLSP
].FCSP_BeforeEntry
; unFCSP
--)
4929 if(FC_LOOP
== pAsm
->fc_stack
[unFCSP
].type
)
4931 breakLoopOnFlag(pAsm
, unFCSP
);
4934 else if(FC_IF
== pAsm
->fc_stack
[unFCSP
].type
)
4939 if(unFCSP
<= pAsm
->CALLSTACK
[pAsm
->CALLSP
].FCSP_BeforeEntry
)
4941 #ifdef USE_CF_FOR_POP_AFTER
4942 returnOnFlag(pAsm
, unIF
);
4944 returnOnFlag(pAsm
, 0);
4945 #endif /* USE_CF_FOR_POP_AFTER */
4946 pAsm
->unCFflags
&= ~HAS_CURRENT_LOOPRET
;
4952 decreaseCurrent(pAsm
, FC_LOOP
);
4957 void add_return_inst(r700_AssemblerBase
*pAsm
)
4959 if(GL_FALSE
== add_cf_instruction(pAsm
) )
4963 //pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
4964 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 0;
4965 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
4966 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
4968 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
4969 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
4970 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_RETURN
;
4971 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
4973 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
4976 GLboolean
assemble_BGNSUB(r700_AssemblerBase
*pAsm
, GLint nILindex
)
4979 if( (pAsm
->unSubArrayPointer
+ 1) > pAsm
->unSubArraySize
)
4981 pAsm
->subs
= (SUB_OFFSET
*)_mesa_realloc( (void *)pAsm
->subs
,
4982 sizeof(SUB_OFFSET
) * pAsm
->unSubArraySize
,
4983 sizeof(SUB_OFFSET
) * (pAsm
->unSubArraySize
+ 10) );
4984 if(NULL
== pAsm
->subs
)
4988 pAsm
->unSubArraySize
+= 10;
4991 pAsm
->subs
[pAsm
->unSubArrayPointer
].subIL_Offset
= nILindex
;
4992 pAsm
->subs
[pAsm
->unSubArrayPointer
].lstCFInstructions_local
.pHead
=NULL
;
4993 pAsm
->subs
[pAsm
->unSubArrayPointer
].lstCFInstructions_local
.pTail
=NULL
;
4994 pAsm
->subs
[pAsm
->unSubArrayPointer
].lstCFInstructions_local
.uNumOfNode
=0;
4997 pAsm
->CALLSTACK
[pAsm
->CALLSP
].subDescIndex
= pAsm
->unSubArrayPointer
;
4998 pAsm
->CALLSTACK
[pAsm
->CALLSP
].FCSP_BeforeEntry
= pAsm
->FCSP
;
4999 pAsm
->CALLSTACK
[pAsm
->CALLSP
].plstCFInstructions_local
5000 = &(pAsm
->subs
[pAsm
->unSubArrayPointer
].lstCFInstructions_local
);
5001 pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
= 0;
5002 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
= 0;
5003 SetActiveCFlist(pAsm
->pR700Shader
,
5004 pAsm
->CALLSTACK
[pAsm
->CALLSP
].plstCFInstructions_local
);
5006 pAsm
->unSubArrayPointer
++;
5009 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
5012 pAsm
->fc_stack
[pAsm
->FCSP
].type
= FC_REP
;
5014 checkStackDepth(pAsm
, FC_REP
, GL_FALSE
);
5019 GLboolean
assemble_ENDSUB(r700_AssemblerBase
*pAsm
)
5021 if(pAsm
->fc_stack
[pAsm
->FCSP
].type
!= FC_REP
)
5023 radeon_error("BGNSUB/ENDSUB in shader code are not paired. \n");
5027 /* copy max to sub structure */
5028 pAsm
->subs
[pAsm
->CALLSTACK
[pAsm
->CALLSP
].subDescIndex
].unStackDepthMax
5029 = pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
;
5031 decreaseCurrent(pAsm
, FC_REP
);
5034 SetActiveCFlist(pAsm
->pR700Shader
,
5035 pAsm
->CALLSTACK
[pAsm
->CALLSP
].plstCFInstructions_local
);
5037 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
5044 GLboolean
assemble_RET(r700_AssemblerBase
*pAsm
)
5048 if(pAsm
->CALLSP
> 0)
5051 for(unFCSP
=pAsm
->FCSP
; unFCSP
>pAsm
->CALLSTACK
[pAsm
->CALLSP
].FCSP_BeforeEntry
; unFCSP
--)
5053 if(FC_LOOP
== pAsm
->fc_stack
[unFCSP
].type
)
5055 setRetInLoopFlag(pAsm
, SQ_SEL_1
);
5056 breakLoopOnFlag(pAsm
, unFCSP
);
5057 pAsm
->unCFflags
|= LOOPRET_FLAGS
;
5061 else if(FC_IF
== pAsm
->fc_stack
[unFCSP
].type
)
5068 #ifdef USE_CF_FOR_POP_AFTER
5073 #endif /* USE_CF_FOR_POP_AFTER */
5075 add_return_inst(pAsm
);
5080 GLboolean
assemble_CAL(r700_AssemblerBase
*pAsm
,
5082 GLuint uiNumberInsts
,
5083 struct prog_instruction
*pILInst
)
5085 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
5087 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5092 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.call_count
= 1;
5093 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 0;
5094 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5095 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5097 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5098 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5099 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_CALL
;
5100 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5102 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5105 if( (pAsm
->unCallerArrayPointer
+ 1) > pAsm
->unCallerArraySize
)
5107 pAsm
->callers
= (CALLER_POINTER
*)_mesa_realloc( (void *)pAsm
->callers
,
5108 sizeof(CALLER_POINTER
) * pAsm
->unCallerArraySize
,
5109 sizeof(CALLER_POINTER
) * (pAsm
->unCallerArraySize
+ 10) );
5110 if(NULL
== pAsm
->callers
)
5114 pAsm
->unCallerArraySize
+= 10;
5117 pAsm
->callers
[pAsm
->unCallerArrayPointer
].subIL_Offset
= nILindex
;
5118 pAsm
->callers
[pAsm
->unCallerArrayPointer
].cf_ptr
= pAsm
->cf_current_cf_clause_ptr
;
5120 pAsm
->unCallerArrayPointer
++;
5126 for(j
=0; j
<pAsm
->unSubArrayPointer
; j
++)
5128 if(nILindex
== pAsm
->subs
[j
].subIL_Offset
)
5129 { /* compiled before */
5131 max
= pAsm
->subs
[j
].unStackDepthMax
5132 + pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
;
5133 if(max
> pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
)
5135 pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
= max
;
5138 pAsm
->callers
[pAsm
->unCallerArrayPointer
- 1].subDescIndex
= j
;
5143 pAsm
->callers
[pAsm
->unCallerArrayPointer
- 1].subDescIndex
= pAsm
->unSubArrayPointer
;
5144 unSubID
= pAsm
->unSubArrayPointer
;
5146 bRet
= AssembleInstr(nILindex
, uiNumberInsts
, pILInst
, pAsm
);
5150 max
= pAsm
->subs
[unSubID
].unStackDepthMax
5151 + pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
;
5152 if(max
> pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
)
5154 pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
= max
;
5161 GLboolean
setRetInLoopFlag(r700_AssemblerBase
*pAsm
, GLuint flagValue
)
5163 GLfloat fLiteral
[2] = {0.1, 0.0};
5165 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
5166 pAsm
->D
.dst
.op3
= 0;
5167 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
5168 pAsm
->D
.dst
.reg
= pAsm
->flag_reg_index
;
5169 pAsm
->D
.dst
.writex
= 1;
5170 pAsm
->D
.dst
.writey
= 0;
5171 pAsm
->D
.dst
.writez
= 0;
5172 pAsm
->D
.dst
.writew
= 0;
5173 pAsm
->D2
.dst2
.literal_slots
= 1;
5174 pAsm
->D2
.dst2
.SaturateMode
= SATURATE_OFF
;
5175 pAsm
->D
.dst
.predicated
= 0;
5176 /* in reloc where dislink flag init inst, only one slot alu inst is handled. */
5177 pAsm
->D
.dst
.math
= 1; /* TODO : not math really, but one channel op, more generic alu assembler needed */
5178 pAsm
->D2
.dst2
.index_mode
= SQ_INDEX_LOOP
; /* Check this ! */
5180 pAsm
->S
[0].src
.rtype
= SRC_REC_LITERAL
;
5181 //pAsm->S[0].src.reg = 0;
5182 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
5183 noneg_PVSSRC(&(pAsm
->S
[0].src
));
5184 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_X
;
5185 pAsm
->S
[0].src
.swizzley
= SQ_SEL_Y
;
5186 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_Z
;
5187 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_W
;
5189 if( GL_FALSE
== next_ins_literal(pAsm
, &(fLiteral
[0])) )
5194 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
5195 pAsm
->S
[0].src
.reg
= 0;
5196 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
5197 noneg_PVSSRC(&(pAsm
->S
[0].src
));
5198 pAsm
->S
[0].src
.swizzlex
= flagValue
;
5199 pAsm
->S
[0].src
.swizzley
= flagValue
;
5200 pAsm
->S
[0].src
.swizzlez
= flagValue
;
5201 pAsm
->S
[0].src
.swizzlew
= flagValue
;
5203 if( GL_FALSE
== next_ins(pAsm
) )
5212 GLboolean
testFlag(r700_AssemblerBase
*pAsm
)
5214 GLfloat fLiteral
[2] = {0.1, 0.0};
5217 GLuint tmp
= gethelpr(pAsm
);
5218 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
5220 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_PRED_SETE
;
5221 pAsm
->D
.dst
.math
= 1;
5222 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
5223 pAsm
->D
.dst
.reg
= tmp
;
5224 pAsm
->D
.dst
.writex
= 1;
5225 pAsm
->D
.dst
.writey
= 0;
5226 pAsm
->D
.dst
.writez
= 0;
5227 pAsm
->D
.dst
.writew
= 0;
5228 pAsm
->D2
.dst2
.literal_slots
= 1;
5229 pAsm
->D2
.dst2
.SaturateMode
= SATURATE_OFF
;
5230 pAsm
->D
.dst
.predicated
= 1;
5231 pAsm
->D2
.dst2
.index_mode
= SQ_INDEX_LOOP
; /* Check this ! */
5233 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
5234 pAsm
->S
[0].src
.reg
= pAsm
->flag_reg_index
;
5235 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
5236 noneg_PVSSRC(&(pAsm
->S
[0].src
));
5237 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_X
;
5238 pAsm
->S
[0].src
.swizzley
= SQ_SEL_Y
;
5239 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_Z
;
5240 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_W
;
5242 pAsm
->S
[1].src
.rtype
= SRC_REC_LITERAL
;
5243 //pAsm->S[1].src.reg = 0;
5244 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
5245 noneg_PVSSRC(&(pAsm
->S
[1].src
));
5246 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_X
;
5247 pAsm
->S
[1].src
.swizzley
= SQ_SEL_Y
;
5248 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_Z
;
5249 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_W
;
5251 if( GL_FALSE
== next_ins_literal(pAsm
, &(fLiteral
[0])) )
5256 pAsm
->S
[1].src
.rtype
= DST_REG_TEMPORARY
;
5257 pAsm
->S
[1].src
.reg
= 0;
5258 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
5259 noneg_PVSSRC(&(pAsm
->S
[1].src
));
5260 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_1
;
5261 pAsm
->S
[1].src
.swizzley
= SQ_SEL_1
;
5262 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_1
;
5263 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_1
;
5265 if( GL_FALSE
== next_ins(pAsm
) )
5271 checkStackDepth(pAsm
, FC_PUSH_VPM
, GL_TRUE
);
5276 GLboolean
returnOnFlag(r700_AssemblerBase
*pAsm
, GLuint unIF
)
5279 jumpToOffest(pAsm
, 1, 4);
5280 setRetInLoopFlag(pAsm
, SQ_SEL_0
);
5281 pops(pAsm
, unIF
+ 1);
5282 add_return_inst(pAsm
);
5287 GLboolean
breakLoopOnFlag(r700_AssemblerBase
*pAsm
, GLuint unFCSP
)
5292 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5297 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
5298 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5299 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5301 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5302 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5303 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_LOOP_BREAK
;
5304 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5306 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5308 pAsm
->fc_stack
[unFCSP
].mid
= (R700ControlFlowGenericClause
**)_mesa_realloc(
5309 (void *)pAsm
->fc_stack
[unFCSP
].mid
,
5310 sizeof(R700ControlFlowGenericClause
*) * pAsm
->fc_stack
[unFCSP
].unNumMid
,
5311 sizeof(R700ControlFlowGenericClause
*) * (pAsm
->fc_stack
[unFCSP
].unNumMid
+ 1) );
5312 pAsm
->fc_stack
[unFCSP
].mid
[pAsm
->fc_stack
[unFCSP
].unNumMid
] = pAsm
->cf_current_cf_clause_ptr
;
5313 pAsm
->fc_stack
[unFCSP
].unNumMid
++;
5320 GLboolean
AssembleInstr(GLuint uiFirstInst
,
5321 GLuint uiNumberInsts
,
5322 struct prog_instruction
*pILInst
,
5323 r700_AssemblerBase
*pR700AsmCode
)
5327 pR700AsmCode
->pILInst
= pILInst
;
5328 for(i
=uiFirstInst
; i
<uiNumberInsts
; i
++)
5330 pR700AsmCode
->uiCurInst
= i
;
5332 #ifndef USE_CF_FOR_CONTINUE_BREAK
5333 if(OPCODE_BRK
== pILInst
[i
+1].Opcode
)
5335 switch(pILInst
[i
].Opcode
)
5338 pILInst
[i
].Opcode
= OPCODE_SGT
;
5341 pILInst
[i
].Opcode
= OPCODE_SGE
;
5344 pILInst
[i
].Opcode
= OPCODE_SLT
;
5347 pILInst
[i
].Opcode
= OPCODE_SLE
;
5350 pILInst
[i
].Opcode
= OPCODE_SNE
;
5353 pILInst
[i
].Opcode
= OPCODE_SEQ
;
5360 if(pILInst
[i
].CondUpdate
== 1)
5362 /* remember dest register used for cond evaluation */
5363 /* XXX also handle PROGRAM_OUTPUT registers here? */
5364 pR700AsmCode
->last_cond_register
= pILInst
[i
].DstReg
.Index
;
5367 switch (pILInst
[i
].Opcode
)
5370 if ( GL_FALSE
== assemble_ABS(pR700AsmCode
) )
5375 if ( GL_FALSE
== assemble_ADD(pR700AsmCode
) )
5380 if ( GL_FALSE
== assemble_ARL(pR700AsmCode
) )
5384 radeon_error("Not yet implemented instruction OPCODE_ARR \n");
5385 //if ( GL_FALSE == assemble_BAD("ARR") )
5390 if ( GL_FALSE
== assemble_CMP(pR700AsmCode
) )
5394 if ( GL_FALSE
== assemble_TRIG(pR700AsmCode
, SQ_OP2_INST_COS
) )
5401 if ( GL_FALSE
== assemble_DOT(pR700AsmCode
) )
5406 if ( GL_FALSE
== assemble_DST(pR700AsmCode
) )
5411 if ( GL_FALSE
== assemble_EX2(pR700AsmCode
) )
5415 if ( GL_FALSE
== assemble_EXP(pR700AsmCode
) )
5420 if ( GL_FALSE
== assemble_FLR(pR700AsmCode
) )
5423 //case OP_FLR_INT: ;
5425 // if ( GL_FALSE == assemble_FLR_INT() )
5430 if ( GL_FALSE
== assemble_FRC(pR700AsmCode
) )
5436 if ( GL_FALSE
== assemble_KIL(pR700AsmCode
, SQ_OP2_INST_KILLGT
) )
5440 if ( GL_FALSE
== assemble_LG2(pR700AsmCode
) )
5444 if ( GL_FALSE
== assemble_LIT(pR700AsmCode
) )
5448 if ( GL_FALSE
== assemble_LRP(pR700AsmCode
) )
5452 if ( GL_FALSE
== assemble_LOG(pR700AsmCode
) )
5457 if ( GL_FALSE
== assemble_MAD(pR700AsmCode
) )
5461 if ( GL_FALSE
== assemble_MAX(pR700AsmCode
) )
5465 if ( GL_FALSE
== assemble_MIN(pR700AsmCode
) )
5470 if ( GL_FALSE
== assemble_MOV(pR700AsmCode
) )
5474 if ( GL_FALSE
== assemble_MUL(pR700AsmCode
) )
5479 if ( GL_FALSE
== assemble_POW(pR700AsmCode
) )
5483 if ( GL_FALSE
== assemble_RCP(pR700AsmCode
) )
5487 if ( GL_FALSE
== assemble_RSQ(pR700AsmCode
) )
5491 if ( GL_FALSE
== assemble_TRIG(pR700AsmCode
, SQ_OP2_INST_SIN
) )
5495 if ( GL_FALSE
== assemble_SCS(pR700AsmCode
) )
5500 if ( GL_FALSE
== assemble_LOGIC(pR700AsmCode
, SQ_OP2_INST_SETE
) )
5507 if ( GL_FALSE
== assemble_LOGIC(pR700AsmCode
, SQ_OP2_INST_SETGT
) )
5514 if ( GL_FALSE
== assemble_SGE(pR700AsmCode
) )
5520 /* NO LT, LE, TODO : use GE => LE, GT => LT : reverse 2 src order would be simpliest. Or use SQ_CF_COND_FALSE for SQ_CF_COND_ACTIVE.*/
5523 struct prog_src_register SrcRegSave
[2];
5524 SrcRegSave
[0] = pILInst
[i
].SrcReg
[0];
5525 SrcRegSave
[1] = pILInst
[i
].SrcReg
[1];
5526 pILInst
[i
].SrcReg
[0] = SrcRegSave
[1];
5527 pILInst
[i
].SrcReg
[1] = SrcRegSave
[0];
5528 if ( GL_FALSE
== assemble_LOGIC(pR700AsmCode
, SQ_OP2_INST_SETGT
) )
5530 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
5531 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
5534 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
5535 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
5541 struct prog_src_register SrcRegSave
[2];
5542 SrcRegSave
[0] = pILInst
[i
].SrcReg
[0];
5543 SrcRegSave
[1] = pILInst
[i
].SrcReg
[1];
5544 pILInst
[i
].SrcReg
[0] = SrcRegSave
[1];
5545 pILInst
[i
].SrcReg
[1] = SrcRegSave
[0];
5546 if ( GL_FALSE
== assemble_LOGIC(pR700AsmCode
, SQ_OP2_INST_SETGE
) )
5548 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
5549 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
5552 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
5553 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
5558 if ( GL_FALSE
== assemble_LOGIC(pR700AsmCode
, SQ_OP2_INST_SETNE
) )
5565 // if ( GL_FALSE == assemble_STP(pR700AsmCode) )
5570 if ( GL_FALSE
== assemble_MOV(pR700AsmCode
) )
5576 if( (i
+1)<uiNumberInsts
)
5578 if(OPCODE_END
!= pILInst
[i
+1].Opcode
)
5580 if( GL_TRUE
== IsTex(pILInst
[i
+1].Opcode
) )
5582 pR700AsmCode
->pInstDeps
[i
+1].nDstDep
= i
+1; //=1?
5592 if ( GL_FALSE
== assemble_TEX(pR700AsmCode
) )
5597 if ( GL_FALSE
== assemble_math_function(pR700AsmCode
, SQ_OP2_INST_TRUNC
) )
5602 if ( GL_FALSE
== assemble_XPD(pR700AsmCode
) )
5608 GLboolean bHasElse
= GL_FALSE
;
5610 if(pILInst
[pILInst
[i
].BranchTarget
- 1].Opcode
== OPCODE_ELSE
)
5615 if ( GL_FALSE
== assemble_IF(pR700AsmCode
, bHasElse
) )
5623 if ( GL_FALSE
== assemble_ELSE(pR700AsmCode
) )
5628 if ( GL_FALSE
== assemble_ENDIF(pR700AsmCode
) )
5632 case OPCODE_BGNLOOP
:
5633 if( GL_FALSE
== assemble_BGNLOOP(pR700AsmCode
) )
5640 if( GL_FALSE
== assemble_BRK(pR700AsmCode
) )
5647 if( GL_FALSE
== assemble_CONT(pR700AsmCode
) )
5653 case OPCODE_ENDLOOP
:
5654 if( GL_FALSE
== assemble_ENDLOOP(pR700AsmCode
) )
5661 if( GL_FALSE
== assemble_BGNSUB(pR700AsmCode
, i
) )
5668 if( GL_FALSE
== assemble_RET(pR700AsmCode
) )
5675 if( GL_FALSE
== assemble_CAL(pR700AsmCode
,
5676 pILInst
[i
].BranchTarget
,
5684 //case OPCODE_EXPORT:
5685 // if ( GL_FALSE == assemble_EXPORT() )
5690 return assemble_ENDSUB(pR700AsmCode
);
5693 //pR700AsmCode->uiCurInst = i;
5694 //This is to remaind that if in later exoort there is depth/stencil
5695 //export, we need a mov to re-arrange DST channel, where using a
5696 //psuedo inst, we will use this end inst to do it.
5700 radeon_error("internal: unknown instruction\n");
5708 GLboolean
InitShaderProgram(r700_AssemblerBase
* pAsm
)
5710 setRetInLoopFlag(pAsm
, SQ_SEL_0
);
5711 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
5715 GLboolean
RelocProgram(r700_AssemblerBase
* pAsm
)
5719 TypedShaderList
* plstCFmain
;
5720 TypedShaderList
* plstCFsub
;
5722 R700ShaderInstruction
* pInst
;
5723 R700ControlFlowGenericClause
* pCFInst
;
5725 plstCFmain
= pAsm
->CALLSTACK
[0].plstCFInstructions_local
;
5727 /* remove flags init if they are not used */
5728 if((pAsm
->unCFflags
& HAS_LOOPRET
) == 0)
5730 R700ControlFlowALUClause
* pCF_ALU
;
5731 pInst
= plstCFmain
->pHead
;
5734 if(SIT_CF_ALU
== pInst
->m_ShaderInstType
)
5736 pCF_ALU
= (R700ControlFlowALUClause
*)pInst
;
5737 if(0 == pCF_ALU
->m_Word1
.f
.count
)
5739 pCF_ALU
->m_Word1
.f
.cf_inst
= SQ_CF_INST_NOP
;
5743 R700ALUInstruction
* pALU
= pCF_ALU
->m_pLinkedALUInstruction
;
5745 pALU
->m_pLinkedALUClause
= NULL
;
5746 pALU
= (R700ALUInstruction
*)(pALU
->pNextInst
);
5747 pALU
->m_pLinkedALUClause
= pCF_ALU
;
5748 pCF_ALU
->m_pLinkedALUInstruction
= pALU
;
5750 pCF_ALU
->m_Word1
.f
.count
--;
5754 pInst
= pInst
->pNextInst
;
5758 if(pAsm
->CALLSTACK
[0].max
> 0)
5760 pAsm
->pR700Shader
->uStackSize
= ((pAsm
->CALLSTACK
[0].max
+ 3)>>2) + 2;
5763 if(0 == pAsm
->unSubArrayPointer
)
5768 unCFoffset
= plstCFmain
->uNumOfNode
;
5771 for(i
=0; i
<pAsm
->unSubArrayPointer
; i
++)
5773 pAsm
->subs
[i
].unCFoffset
= unCFoffset
;
5774 plstCFsub
= &(pAsm
->subs
[i
].lstCFInstructions_local
);
5776 pInst
= plstCFsub
->pHead
;
5778 /* reloc instructions */
5781 if(SIT_CF_GENERIC
== pInst
->m_ShaderInstType
)
5783 pCFInst
= (R700ControlFlowGenericClause
*)pInst
;
5785 switch (pCFInst
->m_Word1
.f
.cf_inst
)
5787 case SQ_CF_INST_POP
:
5788 case SQ_CF_INST_JUMP
:
5789 case SQ_CF_INST_ELSE
:
5790 case SQ_CF_INST_LOOP_END
:
5791 case SQ_CF_INST_LOOP_START
:
5792 case SQ_CF_INST_LOOP_START_NO_AL
:
5793 case SQ_CF_INST_LOOP_CONTINUE
:
5794 case SQ_CF_INST_LOOP_BREAK
:
5795 pCFInst
->m_Word0
.f
.addr
+= unCFoffset
;
5802 pInst
->m_uIndex
+= unCFoffset
;
5804 pInst
= pInst
->pNextInst
;
5807 /* Put sub into main */
5808 plstCFmain
->pTail
->pNextInst
= plstCFsub
->pHead
;
5809 plstCFmain
->pTail
= plstCFsub
->pTail
;
5810 plstCFmain
->uNumOfNode
+= plstCFsub
->uNumOfNode
;
5812 unCFoffset
+= plstCFsub
->uNumOfNode
;
5816 for(i
=0; i
<pAsm
->unCallerArrayPointer
; i
++)
5818 pAsm
->callers
[i
].cf_ptr
->m_Word0
.f
.addr
5819 = pAsm
->subs
[pAsm
->callers
[i
].subDescIndex
].unCFoffset
;
5825 GLboolean
Process_Export(r700_AssemblerBase
* pAsm
,
5827 GLuint export_starting_index
,
5828 GLuint export_count
,
5829 GLuint starting_register_number
,
5830 GLboolean is_depth_export
)
5832 unsigned char ucWriteMask
;
5834 check_current_clause(pAsm
, CF_EMPTY_CLAUSE
);
5835 check_current_clause(pAsm
, CF_EXPORT_CLAUSE
); //alloc the cf_current_export_clause_ptr
5837 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.type
= type
;
5841 case SQ_EXPORT_PIXEL
:
5842 if(GL_TRUE
== is_depth_export
)
5844 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= SQ_CF_PIXEL_Z
;
5848 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= SQ_CF_PIXEL_MRT0
+ export_starting_index
;
5853 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= SQ_CF_POS_0
+ export_starting_index
;
5856 case SQ_EXPORT_PARAM
:
5857 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= 0x0 + export_starting_index
;
5861 radeon_error("Unknown export type: %d\n", type
);
5866 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.rw_gpr
= starting_register_number
;
5868 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.rw_rel
= SQ_ABSOLUTE
;
5869 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.index_gpr
= 0x0;
5870 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.elem_size
= 0x3;
5872 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.burst_count
= (export_count
- 1);
5873 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5874 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5875 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT
; // _DONE
5876 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5877 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5879 if (export_count
== 1)
5881 ucWriteMask
= pAsm
->pucOutMask
[starting_register_number
- pAsm
->starting_export_register_number
];
5882 /* exports Z as a float into Red channel */
5883 if (GL_TRUE
== is_depth_export
)
5886 if( (ucWriteMask
& 0x1) != 0)
5888 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_X
;
5892 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_MASK
;
5894 if( ((ucWriteMask
>>1) & 0x1) != 0)
5896 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_Y
;
5900 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_MASK
;
5902 if( ((ucWriteMask
>>2) & 0x1) != 0)
5904 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_Z
;
5908 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_MASK
;
5910 if( ((ucWriteMask
>>3) & 0x1) != 0)
5912 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_W
;
5916 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_MASK
;
5921 // This should only be used if all components for all registers have been written
5922 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_X
;
5923 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_Y
;
5924 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_Z
;
5925 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_W
;
5928 pAsm
->cf_last_export_ptr
= pAsm
->cf_current_export_clause_ptr
;
5933 GLboolean
Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase
*pAsm
, BITS depth_channel_select
)
5935 gl_inst_opcode Opcode_save
= pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
; //Should be OPCODE_END
5936 pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
= OPCODE_MOV
;
5938 // MOV depth_export_register.hw_depth_channel, depth_export_register.depth_channel_select
5940 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
5942 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
5943 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
5944 pAsm
->D
.dst
.reg
= pAsm
->depth_export_register_number
;
5946 pAsm
->D
.dst
.writex
= 1; // depth goes in R channel for HW
5948 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
5949 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
5950 pAsm
->S
[0].src
.reg
= pAsm
->depth_export_register_number
;
5952 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), depth_channel_select
);
5954 noneg_PVSSRC(&(pAsm
->S
[0].src
));
5956 if( GL_FALSE
== next_ins(pAsm
) )
5961 pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
= Opcode_save
;
5966 GLboolean
Process_Fragment_Exports(r700_AssemblerBase
*pR700AsmCode
,
5967 GLbitfield OutputsWritten
)
5970 GLuint export_count
= 0;
5972 if(pR700AsmCode
->depth_export_register_number
>= 0)
5974 if( GL_FALSE
== Move_Depth_Exports_To_Correct_Channels(pR700AsmCode
, SQ_SEL_Z
) ) // depth
5980 unBit
= 1 << FRAG_RESULT_COLOR
;
5981 if(OutputsWritten
& unBit
)
5983 if( GL_FALSE
== Process_Export(pR700AsmCode
,
5987 pR700AsmCode
->uiFP_OutputMap
[FRAG_RESULT_COLOR
],
5994 unBit
= 1 << FRAG_RESULT_DEPTH
;
5995 if(OutputsWritten
& unBit
)
5997 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6001 pR700AsmCode
->uiFP_OutputMap
[FRAG_RESULT_DEPTH
],
6008 /* Need to export something, otherwise we'll hang
6009 * results are undefined anyway */
6010 if(export_count
== 0)
6012 Process_Export(pR700AsmCode
, SQ_EXPORT_PIXEL
, 0, 1, 0, GL_FALSE
);
6015 if(pR700AsmCode
->cf_last_export_ptr
!= NULL
)
6017 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
6018 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.end_of_program
= 0x1;
6024 GLboolean
Process_Vertex_Exports(r700_AssemblerBase
*pR700AsmCode
,
6025 GLbitfield OutputsWritten
)
6030 GLuint export_starting_index
= 0;
6031 GLuint export_count
= pR700AsmCode
->number_of_exports
;
6033 unBit
= 1 << VERT_RESULT_HPOS
;
6034 if(OutputsWritten
& unBit
)
6036 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6038 export_starting_index
,
6040 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_HPOS
],
6048 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
6051 pR700AsmCode
->number_of_exports
= export_count
;
6053 unBit
= 1 << VERT_RESULT_COL0
;
6054 if(OutputsWritten
& unBit
)
6056 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6058 export_starting_index
,
6060 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_COL0
],
6066 export_starting_index
++;
6069 unBit
= 1 << VERT_RESULT_COL1
;
6070 if(OutputsWritten
& unBit
)
6072 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6074 export_starting_index
,
6076 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_COL1
],
6082 export_starting_index
++;
6085 unBit
= 1 << VERT_RESULT_FOGC
;
6086 if(OutputsWritten
& unBit
)
6088 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6090 export_starting_index
,
6092 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_FOGC
],
6098 export_starting_index
++;
6103 unBit
= 1 << (VERT_RESULT_TEX0
+ i
);
6104 if(OutputsWritten
& unBit
)
6106 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6108 export_starting_index
,
6110 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_TEX0
+ i
],
6116 export_starting_index
++;
6120 for(i
=VERT_RESULT_VAR0
; i
<VERT_RESULT_MAX
; i
++)
6123 if(OutputsWritten
& unBit
)
6125 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6127 export_starting_index
,
6129 pR700AsmCode
->ucVP_OutputMap
[i
],
6135 export_starting_index
++;
6139 // At least one param should be exported
6142 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
6146 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6150 pR700AsmCode
->starting_export_register_number
,
6156 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_0
;
6157 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_0
;
6158 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_0
;
6159 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_1
;
6160 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
6163 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.end_of_program
= 0x1;
6168 GLboolean
Clean_Up_Assembler(r700_AssemblerBase
*pR700AsmCode
)
6170 FREE(pR700AsmCode
->pucOutMask
);
6171 FREE(pR700AsmCode
->pInstDeps
);
6173 if(NULL
!= pR700AsmCode
->subs
)
6175 FREE(pR700AsmCode
->subs
);
6177 if(NULL
!= pR700AsmCode
->callers
)
6179 FREE(pR700AsmCode
->callers
);