2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
33 #include "main/mtypes.h"
34 #include "main/imports.h"
36 #include "radeon_debug.h"
37 #include "r600_context.h"
39 #include "r700_assembler.h"
41 #define USE_CF_FOR_CONTINUE_BREAK 1
42 #define USE_CF_FOR_POP_AFTER 1
44 BITS
addrmode_PVSDST(PVSDST
* pPVSDST
)
46 return pPVSDST
->addrmode0
| ((BITS
)pPVSDST
->addrmode1
<< 1);
49 void setaddrmode_PVSDST(PVSDST
* pPVSDST
, BITS addrmode
)
51 pPVSDST
->addrmode0
= addrmode
& 1;
52 pPVSDST
->addrmode1
= (addrmode
>> 1) & 1;
55 void nomask_PVSDST(PVSDST
* pPVSDST
)
57 pPVSDST
->writex
= pPVSDST
->writey
= pPVSDST
->writez
= pPVSDST
->writew
= 1;
60 BITS
addrmode_PVSSRC(PVSSRC
* pPVSSRC
)
62 return pPVSSRC
->addrmode0
| ((BITS
)pPVSSRC
->addrmode1
<< 1);
65 void setaddrmode_PVSSRC(PVSSRC
* pPVSSRC
, BITS addrmode
)
67 pPVSSRC
->addrmode0
= addrmode
& 1;
68 pPVSSRC
->addrmode1
= (addrmode
>> 1) & 1;
72 void setswizzle_PVSSRC(PVSSRC
* pPVSSRC
, BITS swz
)
77 pPVSSRC
->swizzlew
= swz
;
80 void noswizzle_PVSSRC(PVSSRC
* pPVSSRC
)
82 pPVSSRC
->swizzlex
= SQ_SEL_X
;
83 pPVSSRC
->swizzley
= SQ_SEL_Y
;
84 pPVSSRC
->swizzlez
= SQ_SEL_Z
;
85 pPVSSRC
->swizzlew
= SQ_SEL_W
;
89 swizzleagain_PVSSRC(PVSSRC
* pPVSSRC
, BITS x
, BITS y
, BITS z
, BITS w
)
93 case SQ_SEL_X
: x
= pPVSSRC
->swizzlex
;
95 case SQ_SEL_Y
: x
= pPVSSRC
->swizzley
;
97 case SQ_SEL_Z
: x
= pPVSSRC
->swizzlez
;
99 case SQ_SEL_W
: x
= pPVSSRC
->swizzlew
;
106 case SQ_SEL_X
: y
= pPVSSRC
->swizzlex
;
108 case SQ_SEL_Y
: y
= pPVSSRC
->swizzley
;
110 case SQ_SEL_Z
: y
= pPVSSRC
->swizzlez
;
112 case SQ_SEL_W
: y
= pPVSSRC
->swizzlew
;
119 case SQ_SEL_X
: z
= pPVSSRC
->swizzlex
;
121 case SQ_SEL_Y
: z
= pPVSSRC
->swizzley
;
123 case SQ_SEL_Z
: z
= pPVSSRC
->swizzlez
;
125 case SQ_SEL_W
: z
= pPVSSRC
->swizzlew
;
132 case SQ_SEL_X
: w
= pPVSSRC
->swizzlex
;
134 case SQ_SEL_Y
: w
= pPVSSRC
->swizzley
;
136 case SQ_SEL_Z
: w
= pPVSSRC
->swizzlez
;
138 case SQ_SEL_W
: w
= pPVSSRC
->swizzlew
;
143 pPVSSRC
->swizzlex
= x
;
144 pPVSSRC
->swizzley
= y
;
145 pPVSSRC
->swizzlez
= z
;
146 pPVSSRC
->swizzlew
= w
;
149 void neg_PVSSRC(PVSSRC
* pPVSSRC
)
157 void noneg_PVSSRC(PVSSRC
* pPVSSRC
)
165 // negate argument (for SUB instead of ADD and alike)
166 void flipneg_PVSSRC(PVSSRC
* pPVSSRC
)
168 pPVSSRC
->negx
= !pPVSSRC
->negx
;
169 pPVSSRC
->negy
= !pPVSSRC
->negy
;
170 pPVSSRC
->negz
= !pPVSSRC
->negz
;
171 pPVSSRC
->negw
= !pPVSSRC
->negw
;
174 void zerocomp_PVSSRC(PVSSRC
* pPVSSRC
, int c
)
178 case 0: pPVSSRC
->swizzlex
= SQ_SEL_0
; pPVSSRC
->negx
= 0; break;
179 case 1: pPVSSRC
->swizzley
= SQ_SEL_0
; pPVSSRC
->negy
= 0; break;
180 case 2: pPVSSRC
->swizzlez
= SQ_SEL_0
; pPVSSRC
->negz
= 0; break;
181 case 3: pPVSSRC
->swizzlew
= SQ_SEL_0
; pPVSSRC
->negw
= 0; break;
186 void onecomp_PVSSRC(PVSSRC
* pPVSSRC
, int c
)
190 case 0: pPVSSRC
->swizzlex
= SQ_SEL_1
; pPVSSRC
->negx
= 0; break;
191 case 1: pPVSSRC
->swizzley
= SQ_SEL_1
; pPVSSRC
->negy
= 0; break;
192 case 2: pPVSSRC
->swizzlez
= SQ_SEL_1
; pPVSSRC
->negz
= 0; break;
193 case 3: pPVSSRC
->swizzlew
= SQ_SEL_1
; pPVSSRC
->negw
= 0; break;
198 BITS
is_misc_component_exported(VAP_OUT_VTX_FMT_0
* pOutVTXFmt0
)
200 return (pOutVTXFmt0
->point_size
|
201 pOutVTXFmt0
->edge_flag
|
202 pOutVTXFmt0
->rta_index
|
203 pOutVTXFmt0
->kill_flag
|
204 pOutVTXFmt0
->viewport_index
);
207 BITS
is_depth_component_exported(OUT_FRAGMENT_FMT_0
* pFPOutFmt
)
209 return (pFPOutFmt
->depth
|
210 pFPOutFmt
->stencil_ref
|
212 pFPOutFmt
->coverage_to_mask
);
215 GLboolean
is_reduction_opcode(PVSDWORD
* dest
)
217 if (dest
->dst
.op3
== 0)
219 if ( (dest
->dst
.opcode
== SQ_OP2_INST_DOT4
|| dest
->dst
.opcode
== SQ_OP2_INST_DOT4_IEEE
|| dest
->dst
.opcode
== SQ_OP2_INST_CUBE
) )
227 GLuint
GetSurfaceFormat(GLenum eType
, GLuint nChannels
, GLuint
* pClient_size
)
229 GLuint format
= FMT_INVALID
;
230 GLuint uiElemSize
= 0;
235 case GL_UNSIGNED_BYTE
:
240 format
= FMT_8
; break;
242 format
= FMT_8_8
; break;
244 format
= FMT_8_8_8
; break;
246 format
= FMT_8_8_8_8
; break;
252 case GL_UNSIGNED_SHORT
:
258 format
= FMT_16
; break;
260 format
= FMT_16_16
; break;
262 format
= FMT_16_16_16
; break;
264 format
= FMT_16_16_16_16
; break;
270 case GL_UNSIGNED_INT
:
276 format
= FMT_32
; break;
278 format
= FMT_32_32
; break;
280 format
= FMT_32_32_32
; break;
282 format
= FMT_32_32_32_32
; break;
293 format
= FMT_32_FLOAT
; break;
295 format
= FMT_32_32_FLOAT
; break;
297 format
= FMT_32_32_32_FLOAT
; break;
299 format
= FMT_32_32_32_32_FLOAT
; break;
309 format
= FMT_32_FLOAT
; break;
311 format
= FMT_32_32_FLOAT
; break;
313 format
= FMT_32_32_32_FLOAT
; break;
315 format
= FMT_32_32_32_32_FLOAT
; break;
322 //GL_ASSERT_NO_CASE();
325 if(NULL
!= pClient_size
)
327 *pClient_size
= uiElemSize
* nChannels
;
333 unsigned int r700GetNumOperands(r700_AssemblerBase
* pAsm
)
340 switch (pAsm
->D
.dst
.opcode
)
342 case SQ_OP2_INST_ADD
:
343 case SQ_OP2_INST_KILLGT
:
344 case SQ_OP2_INST_MUL
:
345 case SQ_OP2_INST_MAX
:
346 case SQ_OP2_INST_MIN
:
347 //case SQ_OP2_INST_MAX_DX10:
348 //case SQ_OP2_INST_MIN_DX10:
349 case SQ_OP2_INST_SETE
:
350 case SQ_OP2_INST_SETNE
:
351 case SQ_OP2_INST_SETGT
:
352 case SQ_OP2_INST_SETGE
:
353 case SQ_OP2_INST_PRED_SETE
:
354 case SQ_OP2_INST_PRED_SETGT
:
355 case SQ_OP2_INST_PRED_SETGE
:
356 case SQ_OP2_INST_PRED_SETNE
:
357 case SQ_OP2_INST_DOT4
:
358 case SQ_OP2_INST_DOT4_IEEE
:
359 case SQ_OP2_INST_CUBE
:
362 case SQ_OP2_INST_MOV
:
363 case SQ_OP2_INST_MOVA_FLOOR
:
364 case SQ_OP2_INST_FRACT
:
365 case SQ_OP2_INST_FLOOR
:
366 case SQ_OP2_INST_EXP_IEEE
:
367 case SQ_OP2_INST_LOG_CLAMPED
:
368 case SQ_OP2_INST_LOG_IEEE
:
369 case SQ_OP2_INST_RECIP_IEEE
:
370 case SQ_OP2_INST_RECIPSQRT_IEEE
:
371 case SQ_OP2_INST_FLT_TO_INT
:
372 case SQ_OP2_INST_SIN
:
373 case SQ_OP2_INST_COS
:
376 default: radeon_error(
377 "Need instruction operand number for %x.\n", pAsm
->D
.dst
.opcode
);
383 int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt
, r700_AssemblerBase
* pAsm
, R700_Shader
* pShader
)
387 Init_R700_Shader(pShader
);
388 pAsm
->pR700Shader
= pShader
;
389 pAsm
->currentShaderType
= spt
;
391 pAsm
->cf_last_export_ptr
= NULL
;
393 pAsm
->cf_current_export_clause_ptr
= NULL
;
394 pAsm
->cf_current_alu_clause_ptr
= NULL
;
395 pAsm
->cf_current_tex_clause_ptr
= NULL
;
396 pAsm
->cf_current_vtx_clause_ptr
= NULL
;
397 pAsm
->cf_current_cf_clause_ptr
= NULL
;
399 // No clause has been created yet
400 pAsm
->cf_current_clause_type
= CF_EMPTY_CLAUSE
;
402 pAsm
->number_of_colorandz_exports
= 0;
403 pAsm
->number_of_exports
= 0;
404 pAsm
->number_of_export_opcodes
= 0;
406 pAsm
->alu_x_opcode
= 0;
415 pAsm
->uLastPosUpdate
= 0;
417 *(BITS
*) &pAsm
->fp_stOutFmt0
= 0;
421 pAsm
->number_used_registers
= 0;
422 pAsm
->uUsedConsts
= 256;
426 pAsm
->uBoolConsts
= 0;
427 pAsm
->uIntConsts
= 0;
432 pAsm
->fc_stack
[0].type
= FC_NONE
;
434 pAsm
->branch_depth
= 0;
435 pAsm
->max_branch_depth
= 0;
440 pAsm
->aArgSubst
[3] = (-1);
444 for (i
=0; i
<NUMBER_OF_OUTPUT_COLORS
; i
++)
446 pAsm
->color_export_register_number
[i
] = (-1);
450 pAsm
->depth_export_register_number
= (-1);
451 pAsm
->stencil_export_register_number
= (-1);
452 pAsm
->coverage_to_mask_export_register_number
= (-1);
453 pAsm
->mask_export_register_number
= (-1);
455 pAsm
->starting_export_register_number
= 0;
456 pAsm
->starting_vfetch_register_number
= 0;
457 pAsm
->starting_temp_register_number
= 0;
458 pAsm
->uFirstHelpReg
= 0;
461 pAsm
->input_position_is_used
= GL_FALSE
;
462 pAsm
->input_normal_is_used
= GL_FALSE
;
465 for (i
=0; i
<NUMBER_OF_INPUT_COLORS
; i
++)
467 pAsm
->input_color_is_used
[ i
] = GL_FALSE
;
470 for (i
=0; i
<NUMBER_OF_TEXTURE_UNITS
; i
++)
472 pAsm
->input_texture_unit_is_used
[ i
] = GL_FALSE
;
475 for (i
=0; i
<VERT_ATTRIB_MAX
; i
++)
477 pAsm
->vfetch_instruction_ptr_array
[ i
] = NULL
;
480 pAsm
->number_of_inputs
= 0;
482 pAsm
->is_tex
= GL_FALSE
;
483 pAsm
->need_tex_barrier
= GL_FALSE
;
486 pAsm
->unSubArraySize
= 0;
487 pAsm
->unSubArrayPointer
= 0;
488 pAsm
->callers
= NULL
;
489 pAsm
->unCallerArraySize
= 0;
490 pAsm
->unCallerArrayPointer
= 0;
493 pAsm
->CALLSTACK
[0].FCSP_BeforeEntry
= 0;
494 pAsm
->CALLSTACK
[0].plstCFInstructions_local
495 = &(pAsm
->pR700Shader
->lstCFInstructions
);
497 pAsm
->CALLSTACK
[0].stackUsage
.bits
= 0;
499 SetActiveCFlist(pAsm
->pR700Shader
, pAsm
->CALLSTACK
[0].plstCFInstructions_local
);
506 GLboolean
IsTex(gl_inst_opcode Opcode
)
508 if( (OPCODE_TEX
==Opcode
) || (OPCODE_TXP
==Opcode
) || (OPCODE_TXB
==Opcode
) )
515 GLboolean
IsAlu(gl_inst_opcode Opcode
)
517 //TODO : more for fc and ex for higher spec.
525 int check_current_clause(r700_AssemblerBase
* pAsm
,
526 CF_CLAUSE_TYPE new_clause_type
)
528 if (pAsm
->cf_current_clause_type
!= new_clause_type
)
529 { //Close last open clause
530 switch (pAsm
->cf_current_clause_type
)
533 if ( pAsm
->cf_current_alu_clause_ptr
!= NULL
)
535 pAsm
->cf_current_alu_clause_ptr
= NULL
;
539 if ( pAsm
->cf_current_vtx_clause_ptr
!= NULL
)
541 pAsm
->cf_current_vtx_clause_ptr
= NULL
;
545 if ( pAsm
->cf_current_tex_clause_ptr
!= NULL
)
547 pAsm
->cf_current_tex_clause_ptr
= NULL
;
550 case CF_EXPORT_CLAUSE
:
551 if ( pAsm
->cf_current_export_clause_ptr
!= NULL
)
553 pAsm
->cf_current_export_clause_ptr
= NULL
;
556 case CF_OTHER_CLAUSE
:
557 if ( pAsm
->cf_current_cf_clause_ptr
!= NULL
)
559 pAsm
->cf_current_cf_clause_ptr
= NULL
;
562 case CF_EMPTY_CLAUSE
:
566 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type
);
570 pAsm
->cf_current_clause_type
= CF_EMPTY_CLAUSE
;
573 switch (new_clause_type
)
576 pAsm
->cf_current_clause_type
= CF_ALU_CLAUSE
;
579 pAsm
->cf_current_clause_type
= CF_VTX_CLAUSE
;
582 pAsm
->cf_current_clause_type
= CF_TEX_CLAUSE
;
584 case CF_EXPORT_CLAUSE
:
586 R700ControlFlowSXClause
* pR700ControlFlowSXClause
587 = (R700ControlFlowSXClause
*) CALLOC_STRUCT(R700ControlFlowSXClause
);
589 // Add new export instruction to control flow program
590 if (pR700ControlFlowSXClause
!= 0)
592 pAsm
->cf_current_export_clause_ptr
= pR700ControlFlowSXClause
;
593 Init_R700ControlFlowSXClause(pR700ControlFlowSXClause
);
594 AddCFInstruction( pAsm
->pR700Shader
,
595 (R700ControlFlowInstruction
*)pR700ControlFlowSXClause
);
600 "Error allocating new EXPORT CF instruction in check_current_clause. \n");
603 pAsm
->cf_current_clause_type
= CF_EXPORT_CLAUSE
;
606 case CF_EMPTY_CLAUSE
:
608 case CF_OTHER_CLAUSE
:
609 pAsm
->cf_current_clause_type
= CF_OTHER_CLAUSE
;
613 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type
);
621 GLboolean
add_cf_instruction(r700_AssemblerBase
* pAsm
)
623 if(GL_FALSE
== check_current_clause(pAsm
, CF_OTHER_CLAUSE
))
628 pAsm
->cf_current_cf_clause_ptr
=
629 (R700ControlFlowGenericClause
*) CALLOC_STRUCT(R700ControlFlowGenericClause
);
631 if (pAsm
->cf_current_cf_clause_ptr
!= NULL
)
633 Init_R700ControlFlowGenericClause(pAsm
->cf_current_cf_clause_ptr
);
634 AddCFInstruction( pAsm
->pR700Shader
,
635 (R700ControlFlowInstruction
*)pAsm
->cf_current_cf_clause_ptr
);
639 radeon_error("Could not allocate a new VFetch CF instruction.\n");
646 GLboolean
add_vfetch_instruction(r700_AssemblerBase
* pAsm
,
647 R700VertexInstruction
* vertex_instruction_ptr
)
649 if( GL_FALSE
== check_current_clause(pAsm
, CF_VTX_CLAUSE
) )
654 if( pAsm
->cf_current_vtx_clause_ptr
== NULL
||
655 ( (pAsm
->cf_current_vtx_clause_ptr
!= NULL
) &&
656 (pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.count
>= GetCFMaxInstructions(pAsm
->cf_current_vtx_clause_ptr
->m_ShaderInstType
)-1)
659 // Create new Vfetch control flow instruction for this new clause
660 pAsm
->cf_current_vtx_clause_ptr
= (R700ControlFlowGenericClause
*) CALLOC_STRUCT(R700ControlFlowGenericClause
);
662 if (pAsm
->cf_current_vtx_clause_ptr
!= NULL
)
664 Init_R700ControlFlowGenericClause(pAsm
->cf_current_vtx_clause_ptr
);
665 AddCFInstruction( pAsm
->pR700Shader
,
666 (R700ControlFlowInstruction
*)pAsm
->cf_current_vtx_clause_ptr
);
670 radeon_error("Could not allocate a new VFetch CF instruction.\n");
674 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.pop_count
= 0x0;
675 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
676 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
677 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.count
= 0x0;
678 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
679 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
680 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_VTX
;
681 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
682 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
684 LinkVertexInstruction(pAsm
->cf_current_vtx_clause_ptr
, vertex_instruction_ptr
);
688 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.count
++;
691 AddVTXInstruction(pAsm
->pR700Shader
, vertex_instruction_ptr
);
696 GLboolean
add_tex_instruction(r700_AssemblerBase
* pAsm
,
697 R700TextureInstruction
* tex_instruction_ptr
)
699 if ( GL_FALSE
== check_current_clause(pAsm
, CF_TEX_CLAUSE
) )
704 if ( pAsm
->cf_current_tex_clause_ptr
== NULL
||
705 ( (pAsm
->cf_current_tex_clause_ptr
!= NULL
) &&
706 (pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.count
>= GetCFMaxInstructions(pAsm
->cf_current_tex_clause_ptr
->m_ShaderInstType
)-1)
709 // new tex cf instruction for this new clause
710 pAsm
->cf_current_tex_clause_ptr
= (R700ControlFlowGenericClause
*) CALLOC_STRUCT(R700ControlFlowGenericClause
);
712 if (pAsm
->cf_current_tex_clause_ptr
!= NULL
)
714 Init_R700ControlFlowGenericClause(pAsm
->cf_current_tex_clause_ptr
);
715 AddCFInstruction( pAsm
->pR700Shader
,
716 (R700ControlFlowInstruction
*)pAsm
->cf_current_tex_clause_ptr
);
720 radeon_error("Could not allocate a new TEX CF instruction.\n");
724 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.pop_count
= 0x0;
725 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
726 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
728 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
729 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
730 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_TEX
;
731 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
732 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.barrier
= 0x0; //0x1;
736 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.count
++;
739 // If this clause constains any TEX instruction that is dependent on a previous instruction,
740 // set the barrier bit
741 if( pAsm
->pInstDeps
[pAsm
->uiCurInst
].nDstDep
> (-1) || pAsm
->need_tex_barrier
== GL_TRUE
)
743 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
746 if(NULL
== pAsm
->cf_current_tex_clause_ptr
->m_pLinkedTEXInstruction
)
748 pAsm
->cf_current_tex_clause_ptr
->m_pLinkedTEXInstruction
= tex_instruction_ptr
;
749 tex_instruction_ptr
->m_pLinkedGenericClause
= pAsm
->cf_current_tex_clause_ptr
;
752 AddTEXInstruction(pAsm
->pR700Shader
, tex_instruction_ptr
);
757 GLboolean
assemble_vfetch_instruction(r700_AssemblerBase
* pAsm
,
759 GLuint destination_register
,
760 GLuint number_of_elements
,
761 GLenum dataElementType
,
762 VTX_FETCH_METHOD
* pFetchMethod
)
764 GLuint client_size_inbyte
;
766 GLuint mega_fetch_count
;
767 GLuint is_mega_fetch_flag
;
769 R700VertexGenericFetch
* vfetch_instruction_ptr
;
770 R700VertexGenericFetch
* assembled_vfetch_instruction_ptr
= pAsm
->vfetch_instruction_ptr_array
[ gl_client_id
];
772 if (assembled_vfetch_instruction_ptr
== NULL
)
774 vfetch_instruction_ptr
= (R700VertexGenericFetch
*) CALLOC_STRUCT(R700VertexGenericFetch
);
775 if (vfetch_instruction_ptr
== NULL
)
779 Init_R700VertexGenericFetch(vfetch_instruction_ptr
);
783 vfetch_instruction_ptr
= assembled_vfetch_instruction_ptr
;
786 data_format
= GetSurfaceFormat(dataElementType
, number_of_elements
, &client_size_inbyte
);
788 if(GL_TRUE
== pFetchMethod
->bEnableMini
) //More conditions here
794 mega_fetch_count
= MEGA_FETCH_BYTES
- 1;
795 is_mega_fetch_flag
= 0x1;
796 pFetchMethod
->mega_fetch_remainder
= MEGA_FETCH_BYTES
- client_size_inbyte
;
799 vfetch_instruction_ptr
->m_Word0
.f
.vtx_inst
= SQ_VTX_INST_FETCH
;
800 vfetch_instruction_ptr
->m_Word0
.f
.fetch_type
= SQ_VTX_FETCH_VERTEX_DATA
;
801 vfetch_instruction_ptr
->m_Word0
.f
.fetch_whole_quad
= 0x0;
803 vfetch_instruction_ptr
->m_Word0
.f
.buffer_id
= gl_client_id
;
804 vfetch_instruction_ptr
->m_Word0
.f
.src_gpr
= 0x0;
805 vfetch_instruction_ptr
->m_Word0
.f
.src_rel
= SQ_ABSOLUTE
;
806 vfetch_instruction_ptr
->m_Word0
.f
.src_sel_x
= SQ_SEL_X
;
807 vfetch_instruction_ptr
->m_Word0
.f
.mega_fetch_count
= mega_fetch_count
;
809 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_x
= (number_of_elements
< 1) ? SQ_SEL_0
: SQ_SEL_X
;
810 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_y
= (number_of_elements
< 2) ? SQ_SEL_0
: SQ_SEL_Y
;
811 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_z
= (number_of_elements
< 3) ? SQ_SEL_0
: SQ_SEL_Z
;
812 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_w
= (number_of_elements
< 4) ? SQ_SEL_1
: SQ_SEL_W
;
814 vfetch_instruction_ptr
->m_Word1
.f
.use_const_fields
= 1;
816 // Destination register
817 vfetch_instruction_ptr
->m_Word1_GPR
.f
.dst_gpr
= destination_register
;
818 vfetch_instruction_ptr
->m_Word1_GPR
.f
.dst_rel
= SQ_ABSOLUTE
;
820 vfetch_instruction_ptr
->m_Word2
.f
.offset
= 0;
821 vfetch_instruction_ptr
->m_Word2
.f
.const_buf_no_stride
= 0x0;
823 vfetch_instruction_ptr
->m_Word2
.f
.mega_fetch
= is_mega_fetch_flag
;
825 if (assembled_vfetch_instruction_ptr
== NULL
)
827 if ( GL_FALSE
== add_vfetch_instruction(pAsm
, (R700VertexInstruction
*)vfetch_instruction_ptr
) )
832 if (pAsm
->vfetch_instruction_ptr_array
[ gl_client_id
] != NULL
)
838 pAsm
->vfetch_instruction_ptr_array
[ gl_client_id
] = vfetch_instruction_ptr
;
845 GLboolean
assemble_vfetch_instruction2(r700_AssemblerBase
* pAsm
,
846 GLuint destination_register
,
852 VTX_FETCH_METHOD
* pFetchMethod
)
854 GLuint client_size_inbyte
;
856 GLuint mega_fetch_count
;
857 GLuint is_mega_fetch_flag
;
859 R700VertexGenericFetch
* vfetch_instruction_ptr
;
860 R700VertexGenericFetch
* assembled_vfetch_instruction_ptr
861 = pAsm
->vfetch_instruction_ptr_array
[element
];
863 if (assembled_vfetch_instruction_ptr
== NULL
)
865 vfetch_instruction_ptr
= (R700VertexGenericFetch
*) CALLOC_STRUCT(R700VertexGenericFetch
);
866 if (vfetch_instruction_ptr
== NULL
)
870 Init_R700VertexGenericFetch(vfetch_instruction_ptr
);
874 vfetch_instruction_ptr
= assembled_vfetch_instruction_ptr
;
877 data_format
= GetSurfaceFormat(type
, size
, &client_size_inbyte
);
879 if(GL_TRUE
== pFetchMethod
->bEnableMini
) //More conditions here
885 mega_fetch_count
= MEGA_FETCH_BYTES
- 1;
886 is_mega_fetch_flag
= 0x1;
887 pFetchMethod
->mega_fetch_remainder
= MEGA_FETCH_BYTES
- client_size_inbyte
;
890 vfetch_instruction_ptr
->m_Word0
.f
.vtx_inst
= SQ_VTX_INST_FETCH
;
891 vfetch_instruction_ptr
->m_Word0
.f
.fetch_type
= SQ_VTX_FETCH_VERTEX_DATA
;
892 vfetch_instruction_ptr
->m_Word0
.f
.fetch_whole_quad
= 0x0;
894 vfetch_instruction_ptr
->m_Word0
.f
.buffer_id
= element
;
895 vfetch_instruction_ptr
->m_Word0
.f
.src_gpr
= 0x0;
896 vfetch_instruction_ptr
->m_Word0
.f
.src_rel
= SQ_ABSOLUTE
;
897 vfetch_instruction_ptr
->m_Word0
.f
.src_sel_x
= SQ_SEL_X
;
898 vfetch_instruction_ptr
->m_Word0
.f
.mega_fetch_count
= mega_fetch_count
;
900 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_x
= (size
< 1) ? SQ_SEL_0
: SQ_SEL_X
;
901 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_y
= (size
< 2) ? SQ_SEL_0
: SQ_SEL_Y
;
902 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_z
= (size
< 3) ? SQ_SEL_0
: SQ_SEL_Z
;
903 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_w
= (size
< 4) ? SQ_SEL_1
: SQ_SEL_W
;
905 vfetch_instruction_ptr
->m_Word1
.f
.use_const_fields
= 1;
906 vfetch_instruction_ptr
->m_Word1
.f
.data_format
= data_format
;
907 vfetch_instruction_ptr
->m_Word2
.f
.endian_swap
= SQ_ENDIAN_NONE
;
911 vfetch_instruction_ptr
->m_Word1
.f
.format_comp_all
= SQ_FORMAT_COMP_SIGNED
;
915 vfetch_instruction_ptr
->m_Word1
.f
.format_comp_all
= SQ_FORMAT_COMP_UNSIGNED
;
918 if(GL_TRUE
== normalize
)
920 vfetch_instruction_ptr
->m_Word1
.f
.num_format_all
= SQ_NUM_FORMAT_NORM
;
924 vfetch_instruction_ptr
->m_Word1
.f
.num_format_all
= SQ_NUM_FORMAT_INT
;
927 // Destination register
928 vfetch_instruction_ptr
->m_Word1_GPR
.f
.dst_gpr
= destination_register
;
929 vfetch_instruction_ptr
->m_Word1_GPR
.f
.dst_rel
= SQ_ABSOLUTE
;
931 vfetch_instruction_ptr
->m_Word2
.f
.offset
= 0;
932 vfetch_instruction_ptr
->m_Word2
.f
.const_buf_no_stride
= 0x0;
934 vfetch_instruction_ptr
->m_Word2
.f
.mega_fetch
= is_mega_fetch_flag
;
936 if (assembled_vfetch_instruction_ptr
== NULL
)
938 if ( GL_FALSE
== add_vfetch_instruction(pAsm
, (R700VertexInstruction
*)vfetch_instruction_ptr
) )
943 if (pAsm
->vfetch_instruction_ptr_array
[element
] != NULL
)
949 pAsm
->vfetch_instruction_ptr_array
[element
] = vfetch_instruction_ptr
;
956 GLboolean
cleanup_vfetch_instructions(r700_AssemblerBase
* pAsm
)
959 pAsm
->cf_current_clause_type
= CF_EMPTY_CLAUSE
;
960 pAsm
->cf_current_vtx_clause_ptr
= NULL
;
962 for (i
=0; i
<VERT_ATTRIB_MAX
; i
++)
964 pAsm
->vfetch_instruction_ptr_array
[ i
] = NULL
;
967 cleanup_vfetch_shaderinst(pAsm
->pR700Shader
);
972 GLuint
gethelpr(r700_AssemblerBase
* pAsm
)
974 GLuint r
= pAsm
->uHelpReg
;
976 if (pAsm
->uHelpReg
> pAsm
->number_used_registers
)
978 pAsm
->number_used_registers
= pAsm
->uHelpReg
;
982 void resethelpr(r700_AssemblerBase
* pAsm
)
984 pAsm
->uHelpReg
= pAsm
->uFirstHelpReg
;
987 void checkop_init(r700_AssemblerBase
* pAsm
)
993 pAsm
->aArgSubst
[3] = -1;
996 GLboolean
mov_temp(r700_AssemblerBase
* pAsm
, int src
)
998 GLuint tmp
= gethelpr(pAsm
);
1000 //mov src to temp helper gpr.
1001 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
1003 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1005 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
1006 pAsm
->D
.dst
.reg
= tmp
;
1008 nomask_PVSDST(&(pAsm
->D
.dst
));
1010 if( GL_FALSE
== assemble_src(pAsm
, src
, 0) )
1015 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
1016 noneg_PVSSRC(&(pAsm
->S
[0].src
));
1018 if( GL_FALSE
== next_ins(pAsm
) )
1023 pAsm
->aArgSubst
[1 + src
] = tmp
;
1028 GLboolean
checkop1(r700_AssemblerBase
* pAsm
)
1034 GLboolean
checkop2(r700_AssemblerBase
* pAsm
)
1036 GLboolean bSrcConst
[2];
1037 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1041 if( (pILInst
->SrcReg
[0].File
== PROGRAM_CONSTANT
) ||
1042 (pILInst
->SrcReg
[0].File
== PROGRAM_LOCAL_PARAM
) ||
1043 (pILInst
->SrcReg
[0].File
== PROGRAM_ENV_PARAM
) ||
1044 (pILInst
->SrcReg
[0].File
== PROGRAM_STATE_VAR
) )
1046 bSrcConst
[0] = GL_TRUE
;
1050 bSrcConst
[0] = GL_FALSE
;
1052 if( (pILInst
->SrcReg
[1].File
== PROGRAM_CONSTANT
) ||
1053 (pILInst
->SrcReg
[1].File
== PROGRAM_LOCAL_PARAM
) ||
1054 (pILInst
->SrcReg
[1].File
== PROGRAM_ENV_PARAM
) ||
1055 (pILInst
->SrcReg
[1].File
== PROGRAM_STATE_VAR
) )
1057 bSrcConst
[1] = GL_TRUE
;
1061 bSrcConst
[1] = GL_FALSE
;
1064 if( (bSrcConst
[0] == GL_TRUE
) && (bSrcConst
[1] == GL_TRUE
) )
1066 if(pILInst
->SrcReg
[0].Index
!= pILInst
->SrcReg
[1].Index
)
1068 if( GL_FALSE
== mov_temp(pAsm
, 1) )
1078 GLboolean
checkop3(r700_AssemblerBase
* pAsm
)
1080 GLboolean bSrcConst
[3];
1081 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1085 if( (pILInst
->SrcReg
[0].File
== PROGRAM_CONSTANT
) ||
1086 (pILInst
->SrcReg
[0].File
== PROGRAM_LOCAL_PARAM
) ||
1087 (pILInst
->SrcReg
[0].File
== PROGRAM_ENV_PARAM
) ||
1088 (pILInst
->SrcReg
[0].File
== PROGRAM_STATE_VAR
) )
1090 bSrcConst
[0] = GL_TRUE
;
1094 bSrcConst
[0] = GL_FALSE
;
1096 if( (pILInst
->SrcReg
[1].File
== PROGRAM_CONSTANT
) ||
1097 (pILInst
->SrcReg
[1].File
== PROGRAM_LOCAL_PARAM
) ||
1098 (pILInst
->SrcReg
[1].File
== PROGRAM_ENV_PARAM
) ||
1099 (pILInst
->SrcReg
[1].File
== PROGRAM_STATE_VAR
) )
1101 bSrcConst
[1] = GL_TRUE
;
1105 bSrcConst
[1] = GL_FALSE
;
1107 if( (pILInst
->SrcReg
[2].File
== PROGRAM_CONSTANT
) ||
1108 (pILInst
->SrcReg
[2].File
== PROGRAM_LOCAL_PARAM
) ||
1109 (pILInst
->SrcReg
[2].File
== PROGRAM_ENV_PARAM
) ||
1110 (pILInst
->SrcReg
[2].File
== PROGRAM_STATE_VAR
) )
1112 bSrcConst
[2] = GL_TRUE
;
1116 bSrcConst
[2] = GL_FALSE
;
1119 if( (GL_TRUE
== bSrcConst
[0]) &&
1120 (GL_TRUE
== bSrcConst
[1]) &&
1121 (GL_TRUE
== bSrcConst
[2]) )
1123 if( GL_FALSE
== mov_temp(pAsm
, 1) )
1127 if( GL_FALSE
== mov_temp(pAsm
, 2) )
1134 else if( (GL_TRUE
== bSrcConst
[0]) &&
1135 (GL_TRUE
== bSrcConst
[1]) )
1137 if(pILInst
->SrcReg
[0].Index
!= pILInst
->SrcReg
[1].Index
)
1139 if( GL_FALSE
== mov_temp(pAsm
, 1) )
1147 else if ( (GL_TRUE
== bSrcConst
[0]) &&
1148 (GL_TRUE
== bSrcConst
[2]) )
1150 if(pILInst
->SrcReg
[0].Index
!= pILInst
->SrcReg
[2].Index
)
1152 if( GL_FALSE
== mov_temp(pAsm
, 2) )
1160 else if( (GL_TRUE
== bSrcConst
[1]) &&
1161 (GL_TRUE
== bSrcConst
[2]) )
1163 if(pILInst
->SrcReg
[1].Index
!= pILInst
->SrcReg
[2].Index
)
1165 if( GL_FALSE
== mov_temp(pAsm
, 2) )
1177 GLboolean
assemble_src(r700_AssemblerBase
*pAsm
,
1181 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1188 if(pAsm
->aArgSubst
[1+src
] >= 0)
1190 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1191 pAsm
->S
[fld
].src
.rtype
= SRC_REG_TEMPORARY
;
1192 pAsm
->S
[fld
].src
.reg
= pAsm
->aArgSubst
[1+src
];
1196 switch (pILInst
->SrcReg
[src
].File
)
1198 case PROGRAM_TEMPORARY
:
1199 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1200 pAsm
->S
[fld
].src
.rtype
= SRC_REG_TEMPORARY
;
1201 pAsm
->S
[fld
].src
.reg
= pILInst
->SrcReg
[src
].Index
+ pAsm
->starting_temp_register_number
;
1203 case PROGRAM_CONSTANT
:
1204 case PROGRAM_LOCAL_PARAM
:
1205 case PROGRAM_ENV_PARAM
:
1206 case PROGRAM_STATE_VAR
:
1207 case PROGRAM_UNIFORM
:
1208 if (1 == pILInst
->SrcReg
[src
].RelAddr
)
1210 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_RELATIVE_A0
);
1214 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1217 pAsm
->S
[fld
].src
.rtype
= SRC_REG_CONSTANT
;
1218 pAsm
->S
[fld
].src
.reg
= pILInst
->SrcReg
[src
].Index
;
1221 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1222 pAsm
->S
[fld
].src
.rtype
= SRC_REG_INPUT
;
1223 switch (pAsm
->currentShaderType
)
1226 pAsm
->S
[fld
].src
.reg
= pAsm
->uiFP_AttributeMap
[pILInst
->SrcReg
[src
].Index
];
1229 pAsm
->S
[fld
].src
.reg
= pAsm
->ucVP_AttributeMap
[pILInst
->SrcReg
[src
].Index
];
1234 radeon_error("Invalid source argument type : %d \n", pILInst
->SrcReg
[src
].File
);
1239 pAsm
->S
[fld
].src
.swizzlex
= pILInst
->SrcReg
[src
].Swizzle
& 0x7;
1240 pAsm
->S
[fld
].src
.swizzley
= (pILInst
->SrcReg
[src
].Swizzle
>> 3) & 0x7;
1241 pAsm
->S
[fld
].src
.swizzlez
= (pILInst
->SrcReg
[src
].Swizzle
>> 6) & 0x7;
1242 pAsm
->S
[fld
].src
.swizzlew
= (pILInst
->SrcReg
[src
].Swizzle
>> 9) & 0x7;
1244 pAsm
->S
[fld
].src
.negx
= pILInst
->SrcReg
[src
].Negate
& 0x1;
1245 pAsm
->S
[fld
].src
.negy
= (pILInst
->SrcReg
[src
].Negate
>> 1) & 0x1;
1246 pAsm
->S
[fld
].src
.negz
= (pILInst
->SrcReg
[src
].Negate
>> 2) & 0x1;
1247 pAsm
->S
[fld
].src
.negw
= (pILInst
->SrcReg
[src
].Negate
>> 3) & 0x1;
1252 GLboolean
assemble_dst(r700_AssemblerBase
*pAsm
)
1254 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1255 switch (pILInst
->DstReg
.File
)
1257 case PROGRAM_TEMPORARY
:
1258 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1259 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
1260 pAsm
->D
.dst
.reg
= pILInst
->DstReg
.Index
+ pAsm
->starting_temp_register_number
;
1262 case PROGRAM_ADDRESS
:
1263 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1264 pAsm
->D
.dst
.rtype
= DST_REG_A0
;
1265 pAsm
->D
.dst
.reg
= 0;
1267 case PROGRAM_OUTPUT
:
1268 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1269 pAsm
->D
.dst
.rtype
= DST_REG_OUT
;
1270 switch (pAsm
->currentShaderType
)
1273 pAsm
->D
.dst
.reg
= pAsm
->uiFP_OutputMap
[pILInst
->DstReg
.Index
];
1276 pAsm
->D
.dst
.reg
= pAsm
->ucVP_OutputMap
[pILInst
->DstReg
.Index
];
1281 radeon_error("Invalid destination output argument type\n");
1285 pAsm
->D
.dst
.writex
= pILInst
->DstReg
.WriteMask
& 0x1;
1286 pAsm
->D
.dst
.writey
= (pILInst
->DstReg
.WriteMask
>> 1) & 0x1;
1287 pAsm
->D
.dst
.writez
= (pILInst
->DstReg
.WriteMask
>> 2) & 0x1;
1288 pAsm
->D
.dst
.writew
= (pILInst
->DstReg
.WriteMask
>> 3) & 0x1;
1293 GLboolean
tex_dst(r700_AssemblerBase
*pAsm
)
1295 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1297 if(PROGRAM_TEMPORARY
== pILInst
->DstReg
.File
)
1299 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
1300 pAsm
->D
.dst
.reg
= pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.Index
+ pAsm
->starting_temp_register_number
;
1302 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1304 else if(PROGRAM_OUTPUT
== pILInst
->DstReg
.File
)
1306 pAsm
->D
.dst
.rtype
= DST_REG_OUT
;
1307 switch (pAsm
->currentShaderType
)
1310 pAsm
->D
.dst
.reg
= pAsm
->uiFP_OutputMap
[pILInst
->DstReg
.Index
];
1313 pAsm
->D
.dst
.reg
= pAsm
->ucVP_OutputMap
[pILInst
->DstReg
.Index
];
1317 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1321 radeon_error("Invalid destination output argument type\n");
1325 pAsm
->D
.dst
.writex
= pILInst
->DstReg
.WriteMask
& 0x1;
1326 pAsm
->D
.dst
.writey
= (pILInst
->DstReg
.WriteMask
>> 1) & 0x1;
1327 pAsm
->D
.dst
.writez
= (pILInst
->DstReg
.WriteMask
>> 2) & 0x1;
1328 pAsm
->D
.dst
.writew
= (pILInst
->DstReg
.WriteMask
>> 3) & 0x1;
1333 GLboolean
tex_src(r700_AssemblerBase
*pAsm
)
1335 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1337 GLboolean bValidTexCoord
= GL_FALSE
;
1339 if(pAsm
->aArgSubst
[1] >= 0)
1341 bValidTexCoord
= GL_TRUE
;
1342 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
1343 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
1344 pAsm
->S
[0].src
.reg
= pAsm
->aArgSubst
[1];
1348 switch (pILInst
->SrcReg
[0].File
) {
1349 case PROGRAM_CONSTANT
:
1350 case PROGRAM_LOCAL_PARAM
:
1351 case PROGRAM_ENV_PARAM
:
1352 case PROGRAM_STATE_VAR
:
1354 case PROGRAM_TEMPORARY
:
1355 bValidTexCoord
= GL_TRUE
;
1356 pAsm
->S
[0].src
.reg
= pILInst
->SrcReg
[0].Index
+
1357 pAsm
->starting_temp_register_number
;
1358 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
1361 switch (pILInst
->SrcReg
[0].Index
)
1363 case FRAG_ATTRIB_WPOS
:
1364 case FRAG_ATTRIB_COL0
:
1365 case FRAG_ATTRIB_COL1
:
1366 case FRAG_ATTRIB_FOGC
:
1367 case FRAG_ATTRIB_TEX0
:
1368 case FRAG_ATTRIB_TEX1
:
1369 case FRAG_ATTRIB_TEX2
:
1370 case FRAG_ATTRIB_TEX3
:
1371 case FRAG_ATTRIB_TEX4
:
1372 case FRAG_ATTRIB_TEX5
:
1373 case FRAG_ATTRIB_TEX6
:
1374 case FRAG_ATTRIB_TEX7
:
1375 bValidTexCoord
= GL_TRUE
;
1376 pAsm
->S
[0].src
.reg
=
1377 pAsm
->uiFP_AttributeMap
[pILInst
->SrcReg
[0].Index
];
1378 pAsm
->S
[0].src
.rtype
= SRC_REG_INPUT
;
1380 case FRAG_ATTRIB_FACE
:
1381 fprintf(stderr
, "FRAG_ATTRIB_FACE unsupported\n");
1383 case FRAG_ATTRIB_PNTC
:
1384 fprintf(stderr
, "FRAG_ATTRIB_PNTC unsupported\n");
1386 case FRAG_ATTRIB_VAR0
:
1387 fprintf(stderr
, "FRAG_ATTRIB_VAR0 unsupported\n");
1391 if( (pILInst
->SrcReg
[0].Index
>= FRAG_ATTRIB_VAR0
) ||
1392 (pILInst
->SrcReg
[0].Index
< FRAG_ATTRIB_MAX
) )
1394 bValidTexCoord
= GL_TRUE
;
1395 pAsm
->S
[0].src
.reg
=
1396 pAsm
->uiFP_AttributeMap
[pILInst
->SrcReg
[0].Index
];
1397 pAsm
->S
[0].src
.rtype
= SRC_REG_INPUT
;
1404 if(GL_TRUE
== bValidTexCoord
)
1406 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
1410 radeon_error("Invalid source texcoord for TEX instruction\n");
1414 pAsm
->S
[0].src
.swizzlex
= pILInst
->SrcReg
[0].Swizzle
& 0x7;
1415 pAsm
->S
[0].src
.swizzley
= (pILInst
->SrcReg
[0].Swizzle
>> 3) & 0x7;
1416 pAsm
->S
[0].src
.swizzlez
= (pILInst
->SrcReg
[0].Swizzle
>> 6) & 0x7;
1417 pAsm
->S
[0].src
.swizzlew
= (pILInst
->SrcReg
[0].Swizzle
>> 9) & 0x7;
1419 pAsm
->S
[0].src
.negx
= pILInst
->SrcReg
[0].Negate
& 0x1;
1420 pAsm
->S
[0].src
.negy
= (pILInst
->SrcReg
[0].Negate
>> 1) & 0x1;
1421 pAsm
->S
[0].src
.negz
= (pILInst
->SrcReg
[0].Negate
>> 2) & 0x1;
1422 pAsm
->S
[0].src
.negw
= (pILInst
->SrcReg
[0].Negate
>> 3) & 0x1;
1427 GLboolean
assemble_tex_instruction(r700_AssemblerBase
*pAsm
, GLboolean normalized
)
1429 PVSSRC
* texture_coordinate_source
;
1430 PVSSRC
* texture_unit_source
;
1432 R700TextureInstruction
* tex_instruction_ptr
= (R700TextureInstruction
*) CALLOC_STRUCT(R700TextureInstruction
);
1433 if (tex_instruction_ptr
== NULL
)
1437 Init_R700TextureInstruction(tex_instruction_ptr
);
1439 texture_coordinate_source
= &(pAsm
->S
[0].src
);
1440 texture_unit_source
= &(pAsm
->S
[1].src
);
1442 tex_instruction_ptr
->m_Word0
.f
.tex_inst
= pAsm
->D
.dst
.opcode
;
1443 tex_instruction_ptr
->m_Word0
.f
.bc_frac_mode
= 0x0;
1444 tex_instruction_ptr
->m_Word0
.f
.fetch_whole_quad
= 0x0;
1446 tex_instruction_ptr
->m_Word0
.f
.resource_id
= texture_unit_source
->reg
;
1448 tex_instruction_ptr
->m_Word1
.f
.lod_bias
= 0x0;
1450 tex_instruction_ptr
->m_Word1
.f
.coord_type_x
= SQ_TEX_NORMALIZED
;
1451 tex_instruction_ptr
->m_Word1
.f
.coord_type_y
= SQ_TEX_NORMALIZED
;
1452 tex_instruction_ptr
->m_Word1
.f
.coord_type_z
= SQ_TEX_NORMALIZED
;
1453 tex_instruction_ptr
->m_Word1
.f
.coord_type_w
= SQ_TEX_NORMALIZED
;
1455 /* XXX: UNNORMALIZED tex coords have limited wrap modes */
1456 tex_instruction_ptr
->m_Word1
.f
.coord_type_x
= SQ_TEX_UNNORMALIZED
;
1457 tex_instruction_ptr
->m_Word1
.f
.coord_type_y
= SQ_TEX_UNNORMALIZED
;
1458 tex_instruction_ptr
->m_Word1
.f
.coord_type_z
= SQ_TEX_UNNORMALIZED
;
1459 tex_instruction_ptr
->m_Word1
.f
.coord_type_w
= SQ_TEX_UNNORMALIZED
;
1462 tex_instruction_ptr
->m_Word2
.f
.offset_x
= 0x0;
1463 tex_instruction_ptr
->m_Word2
.f
.offset_y
= 0x0;
1464 tex_instruction_ptr
->m_Word2
.f
.offset_z
= 0x0;
1466 tex_instruction_ptr
->m_Word2
.f
.sampler_id
= texture_unit_source
->reg
;
1469 if ( (pAsm
->D
.dst
.rtype
== DST_REG_TEMPORARY
) ||
1470 (pAsm
->D
.dst
.rtype
== DST_REG_OUT
) )
1472 tex_instruction_ptr
->m_Word0
.f
.src_gpr
= texture_coordinate_source
->reg
;
1473 tex_instruction_ptr
->m_Word0
.f
.src_rel
= SQ_ABSOLUTE
;
1475 tex_instruction_ptr
->m_Word1
.f
.dst_gpr
= pAsm
->D
.dst
.reg
;
1476 tex_instruction_ptr
->m_Word1
.f
.dst_rel
= SQ_ABSOLUTE
;
1478 tex_instruction_ptr
->m_Word1
.f
.dst_sel_x
= (pAsm
->D
.dst
.writex
? texture_unit_source
->swizzlex
: SQ_SEL_MASK
);
1479 tex_instruction_ptr
->m_Word1
.f
.dst_sel_y
= (pAsm
->D
.dst
.writey
? texture_unit_source
->swizzley
: SQ_SEL_MASK
);
1480 tex_instruction_ptr
->m_Word1
.f
.dst_sel_z
= (pAsm
->D
.dst
.writez
? texture_unit_source
->swizzlez
: SQ_SEL_MASK
);
1481 tex_instruction_ptr
->m_Word1
.f
.dst_sel_w
= (pAsm
->D
.dst
.writew
? texture_unit_source
->swizzlew
: SQ_SEL_MASK
);
1484 tex_instruction_ptr
->m_Word2
.f
.src_sel_x
= texture_coordinate_source
->swizzlex
;
1485 tex_instruction_ptr
->m_Word2
.f
.src_sel_y
= texture_coordinate_source
->swizzley
;
1486 tex_instruction_ptr
->m_Word2
.f
.src_sel_z
= texture_coordinate_source
->swizzlez
;
1487 tex_instruction_ptr
->m_Word2
.f
.src_sel_w
= texture_coordinate_source
->swizzlew
;
1491 radeon_error("Only temp destination registers supported for TEX dest regs.\n");
1495 if( GL_FALSE
== add_tex_instruction(pAsm
, tex_instruction_ptr
) )
1503 void initialize(r700_AssemblerBase
*pAsm
)
1505 GLuint cycle
, component
;
1507 for (cycle
=0; cycle
<NUMBER_OF_CYCLES
; cycle
++)
1509 for (component
=0; component
<NUMBER_OF_COMPONENTS
; component
++)
1511 pAsm
->hw_gpr
[cycle
][component
] = (-1);
1514 for (component
=0; component
<NUMBER_OF_COMPONENTS
; component
++)
1516 pAsm
->hw_cfile_addr
[component
] = (-1);
1517 pAsm
->hw_cfile_chan
[component
] = (-1);
1521 GLboolean
assemble_alu_src(R700ALUInstruction
* alu_instruction_ptr
,
1524 BITS scalar_channel_index
)
1531 //--------------------------------------------------------------------------
1532 // Source for operands src0, src1.
1533 // Values [0,127] correspond to GPR[0..127].
1534 // Values [256,511] correspond to cfile constants c[0..255].
1536 //--------------------------------------------------------------------------
1537 // Other special values are shown in the list below.
1539 // 248 SQ_ALU_SRC_0: special constant 0.0.
1540 // 249 SQ_ALU_SRC_1: special constant 1.0 float.
1542 // 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
1543 // 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
1545 // 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
1546 // 253 SQ_ALU_SRC_LITERAL: literal constant.
1548 // 254 SQ_ALU_SRC_PV: previous vector result.
1549 // 255 SQ_ALU_SRC_PS: previous scalar result.
1550 //--------------------------------------------------------------------------
1552 BITS channel_swizzle
;
1553 switch (scalar_channel_index
)
1555 case 0: channel_swizzle
= pSource
->swizzlex
; break;
1556 case 1: channel_swizzle
= pSource
->swizzley
; break;
1557 case 2: channel_swizzle
= pSource
->swizzlez
; break;
1558 case 3: channel_swizzle
= pSource
->swizzlew
; break;
1559 default: channel_swizzle
= SQ_SEL_MASK
; break;
1562 if(channel_swizzle
== SQ_SEL_0
)
1564 src_sel
= SQ_ALU_SRC_0
;
1566 else if (channel_swizzle
== SQ_SEL_1
)
1568 src_sel
= SQ_ALU_SRC_1
;
1572 if ( (pSource
->rtype
== SRC_REG_TEMPORARY
) ||
1573 (pSource
->rtype
== SRC_REG_INPUT
)
1576 src_sel
= pSource
->reg
;
1578 else if (pSource
->rtype
== SRC_REG_CONSTANT
)
1580 src_sel
= pSource
->reg
+ CFILE_REGISTER_OFFSET
;
1582 else if (pSource
->rtype
== SRC_REC_LITERAL
)
1584 src_sel
= SQ_ALU_SRC_LITERAL
;
1588 radeon_error("Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.\n",
1589 source_index
, pSource
->rtype
);
1594 if( ADDR_ABSOLUTE
== addrmode_PVSSRC(pSource
) )
1596 src_rel
= SQ_ABSOLUTE
;
1600 src_rel
= SQ_RELATIVE
;
1603 switch (channel_swizzle
)
1606 src_chan
= SQ_CHAN_X
;
1609 src_chan
= SQ_CHAN_Y
;
1612 src_chan
= SQ_CHAN_Z
;
1615 src_chan
= SQ_CHAN_W
;
1619 // Does not matter since src_sel controls
1620 src_chan
= SQ_CHAN_X
;
1623 radeon_error("Unknown source select value (%d) in assemble_alu_src().\n", channel_swizzle
);
1628 switch (scalar_channel_index
)
1630 case 0: src_neg
= pSource
->negx
; break;
1631 case 1: src_neg
= pSource
->negy
; break;
1632 case 2: src_neg
= pSource
->negz
; break;
1633 case 3: src_neg
= pSource
->negw
; break;
1634 default: src_neg
= 0; break;
1637 switch (source_index
)
1640 alu_instruction_ptr
->m_Word0
.f
.src0_sel
= src_sel
;
1641 alu_instruction_ptr
->m_Word0
.f
.src0_rel
= src_rel
;
1642 alu_instruction_ptr
->m_Word0
.f
.src0_chan
= src_chan
;
1643 alu_instruction_ptr
->m_Word0
.f
.src0_neg
= src_neg
;
1646 alu_instruction_ptr
->m_Word0
.f
.src1_sel
= src_sel
;
1647 alu_instruction_ptr
->m_Word0
.f
.src1_rel
= src_rel
;
1648 alu_instruction_ptr
->m_Word0
.f
.src1_chan
= src_chan
;
1649 alu_instruction_ptr
->m_Word0
.f
.src1_neg
= src_neg
;
1652 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_sel
= src_sel
;
1653 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_rel
= src_rel
;
1654 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_chan
= src_chan
;
1655 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_neg
= src_neg
;
1658 radeon_error("Only three sources allowed in ALU opcodes.\n");
1666 GLboolean
add_alu_instruction(r700_AssemblerBase
* pAsm
,
1667 R700ALUInstruction
* alu_instruction_ptr
,
1668 GLuint contiguous_slots_needed
)
1670 if( GL_FALSE
== check_current_clause(pAsm
, CF_ALU_CLAUSE
) )
1675 if ( pAsm
->alu_x_opcode
!= 0 ||
1676 pAsm
->cf_current_alu_clause_ptr
== NULL
||
1677 ( (pAsm
->cf_current_alu_clause_ptr
!= NULL
) &&
1678 (pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
>= (GetCFMaxInstructions(pAsm
->cf_current_alu_clause_ptr
->m_ShaderInstType
)-contiguous_slots_needed
-1) )
1682 //new cf inst for this clause
1683 pAsm
->cf_current_alu_clause_ptr
= (R700ControlFlowALUClause
*) CALLOC_STRUCT(R700ControlFlowALUClause
);
1685 // link the new cf to cf segment
1686 if(NULL
!= pAsm
->cf_current_alu_clause_ptr
)
1688 Init_R700ControlFlowALUClause(pAsm
->cf_current_alu_clause_ptr
);
1689 AddCFInstruction( pAsm
->pR700Shader
,
1690 (R700ControlFlowInstruction
*)pAsm
->cf_current_alu_clause_ptr
);
1694 radeon_error("Could not allocate a new ALU CF instruction.\n");
1698 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_bank0
= 0x0;
1699 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_bank1
= 0x0;
1700 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_mode0
= SQ_CF_KCACHE_NOP
;
1702 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_mode1
= SQ_CF_KCACHE_NOP
;
1703 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_addr0
= 0x0;
1704 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_addr1
= 0x0;
1706 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
= 0x0;
1708 if(pAsm
->alu_x_opcode
!= 0)
1710 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.cf_inst
= pAsm
->alu_x_opcode
;
1711 pAsm
->alu_x_opcode
= 0;
1715 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_ALU
;
1718 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
1720 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
1724 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
++;
1727 // If this clause constains any instruction that is forward dependent on a TEX instruction,
1728 // set the whole_quad_mode for this clause
1729 if ( pAsm
->pInstDeps
[pAsm
->uiCurInst
].nDstDep
> (-1) )
1731 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x1;
1734 if (pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
>= (GetCFMaxInstructions(pAsm
->cf_current_alu_clause_ptr
->m_ShaderInstType
)-1) )
1736 alu_instruction_ptr
->m_Word0
.f
.last
= 1;
1739 if(NULL
== pAsm
->cf_current_alu_clause_ptr
->m_pLinkedALUInstruction
)
1741 pAsm
->cf_current_alu_clause_ptr
->m_pLinkedALUInstruction
= alu_instruction_ptr
;
1742 alu_instruction_ptr
->m_pLinkedALUClause
= pAsm
->cf_current_alu_clause_ptr
;
1745 AddALUInstruction(pAsm
->pR700Shader
, alu_instruction_ptr
);
1750 void get_src_properties(R700ALUInstruction
* alu_instruction_ptr
,
1757 switch (source_index
)
1760 *psrc_sel
= alu_instruction_ptr
->m_Word0
.f
.src0_sel
;
1761 *psrc_rel
= alu_instruction_ptr
->m_Word0
.f
.src0_rel
;
1762 *psrc_chan
= alu_instruction_ptr
->m_Word0
.f
.src0_chan
;
1763 *psrc_neg
= alu_instruction_ptr
->m_Word0
.f
.src0_neg
;
1767 *psrc_sel
= alu_instruction_ptr
->m_Word0
.f
.src1_sel
;
1768 *psrc_rel
= alu_instruction_ptr
->m_Word0
.f
.src1_rel
;
1769 *psrc_chan
= alu_instruction_ptr
->m_Word0
.f
.src1_chan
;
1770 *psrc_neg
= alu_instruction_ptr
->m_Word0
.f
.src1_neg
;
1774 *psrc_sel
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_sel
;
1775 *psrc_rel
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_rel
;
1776 *psrc_chan
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_chan
;
1777 *psrc_neg
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_neg
;
1782 int is_cfile(BITS sel
)
1784 if (sel
> 255 && sel
< 512)
1791 int is_const(BITS sel
)
1797 else if(sel
>= SQ_ALU_SRC_0
&& sel
<= SQ_ALU_SRC_LITERAL
)
1804 int is_gpr(BITS sel
)
1806 if (sel
>= 0 && sel
< 128)
1813 const GLuint BANK_SWIZZLE_VEC
[8] = {SQ_ALU_VEC_210
, //000
1814 SQ_ALU_VEC_120
, //001
1815 SQ_ALU_VEC_102
, //010
1817 SQ_ALU_VEC_201
, //011
1818 SQ_ALU_VEC_012
, //100
1819 SQ_ALU_VEC_021
, //101
1821 SQ_ALU_VEC_012
, //110
1822 SQ_ALU_VEC_012
}; //111
1824 const GLuint BANK_SWIZZLE_SCL
[8] = {SQ_ALU_SCL_210
, //000
1825 SQ_ALU_SCL_122
, //001
1826 SQ_ALU_SCL_122
, //010
1828 SQ_ALU_SCL_221
, //011
1829 SQ_ALU_SCL_212
, //100
1830 SQ_ALU_SCL_122
, //101
1832 SQ_ALU_SCL_122
, //110
1833 SQ_ALU_SCL_122
}; //111
1835 GLboolean
reserve_cfile(r700_AssemblerBase
* pAsm
,
1839 int res_match
= (-1);
1840 int res_empty
= (-1);
1844 for (res
=3; res
>=0; res
--)
1846 if(pAsm
->hw_cfile_addr
[ res
] < 0)
1850 else if( (pAsm
->hw_cfile_addr
[res
] == (int)sel
)
1852 (pAsm
->hw_cfile_chan
[ res
] == (int) chan
) )
1860 // Read for this scalar component already reserved, nothing to do here.
1863 else if(res_empty
>= 0)
1865 pAsm
->hw_cfile_addr
[ res_empty
] = sel
;
1866 pAsm
->hw_cfile_chan
[ res_empty
] = chan
;
1870 radeon_error("All cfile read ports are used, cannot reference C$sel, channel $chan.\n");
1876 GLboolean
reserve_gpr(r700_AssemblerBase
* pAsm
, GLuint sel
, GLuint chan
, GLuint cycle
)
1878 if(pAsm
->hw_gpr
[cycle
][chan
] < 0)
1880 pAsm
->hw_gpr
[cycle
][chan
] = sel
;
1882 else if(pAsm
->hw_gpr
[cycle
][chan
] != (int)sel
)
1884 radeon_error("Another scalar operation has already used GPR read port for given channel\n");
1891 GLboolean
cycle_for_scalar_bank_swizzle(const int swiz
, const int sel
, GLuint
* pCycle
)
1895 case SQ_ALU_SCL_210
:
1897 int table
[3] = {2, 1, 0};
1898 *pCycle
= table
[sel
];
1902 case SQ_ALU_SCL_122
:
1904 int table
[3] = {1, 2, 2};
1905 *pCycle
= table
[sel
];
1909 case SQ_ALU_SCL_212
:
1911 int table
[3] = {2, 1, 2};
1912 *pCycle
= table
[sel
];
1916 case SQ_ALU_SCL_221
:
1918 int table
[3] = {2, 2, 1};
1919 *pCycle
= table
[sel
];
1924 radeon_error("Bad Scalar bank swizzle value\n");
1931 GLboolean
cycle_for_vector_bank_swizzle(const int swiz
, const int sel
, GLuint
* pCycle
)
1935 case SQ_ALU_VEC_012
:
1937 int table
[3] = {0, 1, 2};
1938 *pCycle
= table
[sel
];
1941 case SQ_ALU_VEC_021
:
1943 int table
[3] = {0, 2, 1};
1944 *pCycle
= table
[sel
];
1947 case SQ_ALU_VEC_120
:
1949 int table
[3] = {1, 2, 0};
1950 *pCycle
= table
[sel
];
1953 case SQ_ALU_VEC_102
:
1955 int table
[3] = {1, 0, 2};
1956 *pCycle
= table
[sel
];
1959 case SQ_ALU_VEC_201
:
1961 int table
[3] = {2, 0, 1};
1962 *pCycle
= table
[sel
];
1965 case SQ_ALU_VEC_210
:
1967 int table
[3] = {2, 1, 0};
1968 *pCycle
= table
[sel
];
1972 radeon_error("Bad Vec bank swizzle value\n");
1980 GLboolean
check_scalar(r700_AssemblerBase
* pAsm
,
1981 R700ALUInstruction
* alu_instruction_ptr
)
1984 GLuint bank_swizzle
;
1985 GLuint const_count
= 0;
1994 BITS src_sel
[3] = {0,0,0};
1995 BITS src_chan
[3] = {0,0,0};
1996 BITS src_rel
[3] = {0,0,0};
1997 BITS src_neg
[3] = {0,0,0};
2001 GLuint number_of_operands
= r700GetNumOperands(pAsm
);
2003 for (src
=0; src
<number_of_operands
; src
++)
2005 get_src_properties(alu_instruction_ptr
,
2014 swizzle_key
= ( (is_const( src_sel
[0] ) ? 4 : 0) +
2015 (is_const( src_sel
[1] ) ? 2 : 0) +
2016 (is_const( src_sel
[2] ) ? 1 : 0) );
2018 alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
= BANK_SWIZZLE_SCL
[ swizzle_key
];
2020 for (src
=0; src
<number_of_operands
; src
++)
2022 sel
= src_sel
[src
];
2023 chan
= src_chan
[src
];
2024 rel
= src_rel
[src
];
2025 neg
= src_neg
[src
];
2027 if (is_const( sel
))
2029 // Any constant, including literal and inline constants
2032 if (is_cfile( sel
))
2034 reserve_cfile(pAsm
, sel
, chan
);
2040 for (src
=0; src
<number_of_operands
; src
++)
2042 sel
= src_sel
[src
];
2043 chan
= src_chan
[src
];
2044 rel
= src_rel
[src
];
2045 neg
= src_neg
[src
];
2049 bank_swizzle
= alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
;
2051 if( GL_FALSE
== cycle_for_scalar_bank_swizzle(bank_swizzle
, src
, &cycle
) )
2056 if(cycle
< const_count
)
2058 if( GL_FALSE
== reserve_gpr(pAsm
, sel
, chan
, cycle
) )
2069 GLboolean
check_vector(r700_AssemblerBase
* pAsm
,
2070 R700ALUInstruction
* alu_instruction_ptr
)
2073 GLuint bank_swizzle
;
2074 GLuint const_count
= 0;
2083 BITS src_sel
[3] = {0,0,0};
2084 BITS src_chan
[3] = {0,0,0};
2085 BITS src_rel
[3] = {0,0,0};
2086 BITS src_neg
[3] = {0,0,0};
2090 GLuint number_of_operands
= r700GetNumOperands(pAsm
);
2092 for (src
=0; src
<number_of_operands
; src
++)
2094 get_src_properties(alu_instruction_ptr
,
2103 swizzle_key
= ( (is_const( src_sel
[0] ) ? 4 : 0) +
2104 (is_const( src_sel
[1] ) ? 2 : 0) +
2105 (is_const( src_sel
[2] ) ? 1 : 0)
2108 alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
= BANK_SWIZZLE_VEC
[swizzle_key
];
2110 for (src
=0; src
<number_of_operands
; src
++)
2112 sel
= src_sel
[src
];
2113 chan
= src_chan
[src
];
2114 rel
= src_rel
[src
];
2115 neg
= src_neg
[src
];
2118 bank_swizzle
= alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
;
2122 if( GL_FALSE
== cycle_for_vector_bank_swizzle(bank_swizzle
, src
, &cycle
) )
2128 (sel
== src_sel
[0]) &&
2129 (chan
== src_chan
[0]) )
2134 if( GL_FALSE
== reserve_gpr(pAsm
, sel
, chan
, cycle
) )
2140 else if( is_const(sel
) )
2146 if( GL_FALSE
== reserve_cfile(pAsm
, sel
, chan
) )
2157 GLboolean
assemble_alu_instruction(r700_AssemblerBase
*pAsm
)
2159 GLuint number_of_scalar_operations
;
2160 GLboolean is_single_scalar_operation
;
2161 GLuint scalar_channel_index
;
2163 PVSSRC
* pcurrent_source
;
2164 int current_source_index
;
2165 GLuint contiguous_slots_needed
;
2167 GLuint uNumSrc
= r700GetNumOperands(pAsm
);
2168 //GLuint channel_swizzle, j;
2169 //GLuint chan_counter[4] = {0, 0, 0, 0};
2170 //PVSSRC * pSource[3];
2171 GLboolean bSplitInst
= GL_FALSE
;
2173 if (1 == pAsm
->D
.dst
.math
)
2175 is_single_scalar_operation
= GL_TRUE
;
2176 number_of_scalar_operations
= 1;
2180 is_single_scalar_operation
= GL_FALSE
;
2181 number_of_scalar_operations
= 4;
2183 /* current assembler doesn't do more than 1 register per source */
2185 /* check read port, only very preliminary algorithm, not count in
2186 src0/1 same comp case and prev slot repeat case; also not count relative
2187 addressing. TODO: improve performance. */
2188 for(j
=0; j
<uNumSrc
; j
++)
2190 pSource
[j
] = &(pAsm
->S
[j
].src
);
2192 for(scalar_channel_index
=0; scalar_channel_index
<4; scalar_channel_index
++)
2194 for(j
=0; j
<uNumSrc
; j
++)
2196 switch (scalar_channel_index
)
2198 case 0: channel_swizzle
= pSource
[j
]->swizzlex
; break;
2199 case 1: channel_swizzle
= pSource
[j
]->swizzley
; break;
2200 case 2: channel_swizzle
= pSource
[j
]->swizzlez
; break;
2201 case 3: channel_swizzle
= pSource
[j
]->swizzlew
; break;
2202 default: channel_swizzle
= SQ_SEL_MASK
; break;
2204 if ( ((pSource
[j
]->rtype
== SRC_REG_TEMPORARY
) ||
2205 (pSource
[j
]->rtype
== SRC_REG_INPUT
))
2206 && (channel_swizzle
<= SQ_SEL_W
) )
2208 chan_counter
[channel_swizzle
]++;
2212 if( (chan_counter
[SQ_SEL_X
] > 3)
2213 || (chan_counter
[SQ_SEL_Y
] > 3)
2214 || (chan_counter
[SQ_SEL_Z
] > 3)
2215 || (chan_counter
[SQ_SEL_W
] > 3) ) /* each chan bank has only 3 ports. */
2217 bSplitInst
= GL_TRUE
;
2222 contiguous_slots_needed
= 0;
2224 if(GL_TRUE
== is_reduction_opcode(&(pAsm
->D
)) )
2226 contiguous_slots_needed
= 4;
2231 for (scalar_channel_index
=0;
2232 scalar_channel_index
< number_of_scalar_operations
;
2233 scalar_channel_index
++)
2235 R700ALUInstruction
* alu_instruction_ptr
= (R700ALUInstruction
*) CALLOC_STRUCT(R700ALUInstruction
);
2236 if (alu_instruction_ptr
== NULL
)
2240 Init_R700ALUInstruction(alu_instruction_ptr
);
2243 current_source_index
= 0;
2244 pcurrent_source
= &(pAsm
->S
[0].src
);
2246 if (GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2247 current_source_index
,
2249 scalar_channel_index
) )
2257 current_source_index
= 1;
2258 pcurrent_source
= &(pAsm
->S
[current_source_index
].src
);
2260 if (GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2261 current_source_index
,
2263 scalar_channel_index
) )
2270 alu_instruction_ptr
->m_Word0
.f
.index_mode
= SQ_INDEX_AR_X
;
2272 if( (is_single_scalar_operation
== GL_TRUE
)
2273 || (GL_TRUE
== bSplitInst
) )
2275 alu_instruction_ptr
->m_Word0
.f
.last
= 1;
2279 alu_instruction_ptr
->m_Word0
.f
.last
= (scalar_channel_index
== 3) ? 1 : 0;
2282 alu_instruction_ptr
->m_Word0
.f
.pred_sel
= 0x0;
2283 alu_instruction_ptr
->m_Word1_OP2
.f
.update_pred
= 0x0;
2284 alu_instruction_ptr
->m_Word1_OP2
.f
.update_execute_mask
= 0x0;
2287 if( (pAsm
->D
.dst
.rtype
== DST_REG_TEMPORARY
) ||
2288 (pAsm
->D
.dst
.rtype
== DST_REG_OUT
) )
2290 alu_instruction_ptr
->m_Word1
.f
.dst_gpr
= pAsm
->D
.dst
.reg
;
2294 radeon_error("Only temp destination registers supported for ALU dest regs.\n");
2298 alu_instruction_ptr
->m_Word1
.f
.dst_rel
= SQ_ABSOLUTE
; //D.rtype
2300 if ( is_single_scalar_operation
== GL_TRUE
)
2302 // Override scalar_channel_index since only one scalar value will be written
2303 if(pAsm
->D
.dst
.writex
)
2305 scalar_channel_index
= 0;
2307 else if(pAsm
->D
.dst
.writey
)
2309 scalar_channel_index
= 1;
2311 else if(pAsm
->D
.dst
.writez
)
2313 scalar_channel_index
= 2;
2315 else if(pAsm
->D
.dst
.writew
)
2317 scalar_channel_index
= 3;
2321 alu_instruction_ptr
->m_Word1
.f
.dst_chan
= scalar_channel_index
;
2323 alu_instruction_ptr
->m_Word1
.f
.clamp
= pAsm
->pILInst
[pAsm
->uiCurInst
].SaturateMode
;
2325 if (pAsm
->D
.dst
.op3
)
2329 alu_instruction_ptr
->m_Word1_OP3
.f
.alu_inst
= pAsm
->D
.dst
.opcode
;
2331 //There's 3rd src for op3
2332 current_source_index
= 2;
2333 pcurrent_source
= &(pAsm
->S
[current_source_index
].src
);
2335 if ( GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2336 current_source_index
,
2338 scalar_channel_index
) )
2348 alu_instruction_ptr
->m_Word1_OP2
.f6
.alu_inst
= pAsm
->D
.dst
.opcode
;
2350 alu_instruction_ptr
->m_Word1_OP2
.f6
.src0_abs
= 0x0;
2351 alu_instruction_ptr
->m_Word1_OP2
.f6
.src1_abs
= 0x0;
2353 //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
2354 //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0;
2355 switch (scalar_channel_index
)
2358 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writex
;
2361 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writey
;
2364 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writez
;
2367 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writew
;
2370 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= 1; //SQ_SEL_MASK;
2373 alu_instruction_ptr
->m_Word1_OP2
.f6
.omod
= SQ_ALU_OMOD_OFF
;
2377 alu_instruction_ptr
->m_Word1_OP2
.f
.alu_inst
= pAsm
->D
.dst
.opcode
;
2379 alu_instruction_ptr
->m_Word1_OP2
.f
.src0_abs
= 0x0;
2380 alu_instruction_ptr
->m_Word1_OP2
.f
.src1_abs
= 0x0;
2382 //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2383 //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2384 switch (scalar_channel_index
)
2387 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writex
;
2390 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writey
;
2393 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writez
;
2396 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writew
;
2399 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= 1; //SQ_SEL_MASK;
2402 alu_instruction_ptr
->m_Word1_OP2
.f
.omod
= SQ_ALU_OMOD_OFF
;
2406 if(GL_FALSE
== add_alu_instruction(pAsm
, alu_instruction_ptr
, contiguous_slots_needed
) )
2412 * Judge the type of current instruction, is it vector or scalar
2415 if (is_single_scalar_operation
)
2417 if(GL_FALSE
== check_scalar(pAsm
, alu_instruction_ptr
) )
2424 if(GL_FALSE
== check_vector(pAsm
, alu_instruction_ptr
) )
2430 contiguous_slots_needed
= 0;
2436 GLboolean
assemble_alu_instruction2(r700_AssemblerBase
*pAsm
)
2438 GLuint number_of_scalar_operations
;
2439 GLboolean is_single_scalar_operation
;
2440 GLuint scalar_channel_index
;
2442 PVSSRC
* pcurrent_source
;
2443 int current_source_index
;
2444 GLuint contiguous_slots_needed
;
2446 GLuint uNumSrc
= r700GetNumOperands(pAsm
);
2448 GLboolean bSplitInst
= GL_FALSE
;
2450 if (1 == pAsm
->D
.dst
.math
)
2452 is_single_scalar_operation
= GL_TRUE
;
2453 number_of_scalar_operations
= 1;
2457 is_single_scalar_operation
= GL_FALSE
;
2458 number_of_scalar_operations
= 4;
2461 contiguous_slots_needed
= 0;
2463 if(GL_TRUE
== is_reduction_opcode(&(pAsm
->D
)) )
2465 contiguous_slots_needed
= 4;
2470 for (scalar_channel_index
=0;
2471 scalar_channel_index
< number_of_scalar_operations
;
2472 scalar_channel_index
++)
2474 R700ALUInstruction
* alu_instruction_ptr
= (R700ALUInstruction
*) CALLOC_STRUCT(R700ALUInstruction
);
2475 if (alu_instruction_ptr
== NULL
)
2479 Init_R700ALUInstruction(alu_instruction_ptr
);
2482 current_source_index
= 0;
2483 pcurrent_source
= &(pAsm
->S
[0].src
);
2485 if (GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2486 current_source_index
,
2488 scalar_channel_index
) )
2496 current_source_index
= 1;
2497 pcurrent_source
= &(pAsm
->S
[current_source_index
].src
);
2499 if (GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2500 current_source_index
,
2502 scalar_channel_index
) )
2509 alu_instruction_ptr
->m_Word0
.f
.index_mode
= SQ_INDEX_LOOP
;
2511 if( (is_single_scalar_operation
== GL_TRUE
)
2512 || (GL_TRUE
== bSplitInst
) )
2514 alu_instruction_ptr
->m_Word0
.f
.last
= 1;
2518 alu_instruction_ptr
->m_Word0
.f
.last
= (scalar_channel_index
== 3) ? 1 : 0;
2521 alu_instruction_ptr
->m_Word0
.f
.pred_sel
= (pAsm
->D
.dst
.pred_inv
> 0) ? 1 : 0;
2522 if(1 == pAsm
->D
.dst
.predicated
)
2524 alu_instruction_ptr
->m_Word1_OP2
.f
.update_pred
= 0x1;
2525 alu_instruction_ptr
->m_Word1_OP2
.f
.update_execute_mask
= 0x1;
2529 alu_instruction_ptr
->m_Word1_OP2
.f
.update_pred
= 0x0;
2530 alu_instruction_ptr
->m_Word1_OP2
.f
.update_execute_mask
= 0x0;
2534 if( (pAsm
->D
.dst
.rtype
== DST_REG_TEMPORARY
) ||
2535 (pAsm
->D
.dst
.rtype
== DST_REG_OUT
) )
2537 alu_instruction_ptr
->m_Word1
.f
.dst_gpr
= pAsm
->D
.dst
.reg
;
2541 radeon_error("Only temp destination registers supported for ALU dest regs.\n");
2545 alu_instruction_ptr
->m_Word1
.f
.dst_rel
= SQ_ABSOLUTE
; //D.rtype
2547 if ( is_single_scalar_operation
== GL_TRUE
)
2549 // Override scalar_channel_index since only one scalar value will be written
2550 if(pAsm
->D
.dst
.writex
)
2552 scalar_channel_index
= 0;
2554 else if(pAsm
->D
.dst
.writey
)
2556 scalar_channel_index
= 1;
2558 else if(pAsm
->D
.dst
.writez
)
2560 scalar_channel_index
= 2;
2562 else if(pAsm
->D
.dst
.writew
)
2564 scalar_channel_index
= 3;
2568 alu_instruction_ptr
->m_Word1
.f
.dst_chan
= scalar_channel_index
;
2570 alu_instruction_ptr
->m_Word1
.f
.clamp
= pAsm
->D2
.dst2
.SaturateMode
;
2572 if (pAsm
->D
.dst
.op3
)
2576 alu_instruction_ptr
->m_Word1_OP3
.f
.alu_inst
= pAsm
->D
.dst
.opcode
;
2578 //There's 3rd src for op3
2579 current_source_index
= 2;
2580 pcurrent_source
= &(pAsm
->S
[current_source_index
].src
);
2582 if ( GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2583 current_source_index
,
2585 scalar_channel_index
) )
2595 alu_instruction_ptr
->m_Word1_OP2
.f6
.alu_inst
= pAsm
->D
.dst
.opcode
;
2597 alu_instruction_ptr
->m_Word1_OP2
.f6
.src0_abs
= 0x0;
2598 alu_instruction_ptr
->m_Word1_OP2
.f6
.src1_abs
= 0x0;
2600 //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
2601 //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0;
2602 switch (scalar_channel_index
)
2605 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writex
;
2608 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writey
;
2611 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writez
;
2614 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writew
;
2617 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= 1; //SQ_SEL_MASK;
2620 alu_instruction_ptr
->m_Word1_OP2
.f6
.omod
= SQ_ALU_OMOD_OFF
;
2624 alu_instruction_ptr
->m_Word1_OP2
.f
.alu_inst
= pAsm
->D
.dst
.opcode
;
2626 alu_instruction_ptr
->m_Word1_OP2
.f
.src0_abs
= 0x0;
2627 alu_instruction_ptr
->m_Word1_OP2
.f
.src1_abs
= 0x0;
2629 //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2630 //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2631 switch (scalar_channel_index
)
2634 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writex
;
2637 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writey
;
2640 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writez
;
2643 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writew
;
2646 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= 1; //SQ_SEL_MASK;
2649 alu_instruction_ptr
->m_Word1_OP2
.f
.omod
= SQ_ALU_OMOD_OFF
;
2653 if(GL_FALSE
== add_alu_instruction(pAsm
, alu_instruction_ptr
, contiguous_slots_needed
) )
2659 * Judge the type of current instruction, is it vector or scalar
2662 if (is_single_scalar_operation
)
2664 if(GL_FALSE
== check_scalar(pAsm
, alu_instruction_ptr
) )
2671 if(GL_FALSE
== check_vector(pAsm
, alu_instruction_ptr
) )
2677 contiguous_slots_needed
= 0;
2683 GLboolean
assemble_alu_instruction_literal(r700_AssemblerBase
*pAsm
, GLfloat
* pLiteral
)
2685 R700ALUInstruction
* alu_instruction_ptr
;
2686 R700ALUInstructionHalfLiteral
* alu_instruction_ptr_hl
;
2687 R700ALUInstructionFullLiteral
* alu_instruction_ptr_fl
;
2689 GLuint number_of_scalar_operations
;
2690 GLboolean is_single_scalar_operation
;
2691 GLuint scalar_channel_index
;
2693 GLuint contiguous_slots_needed
;
2694 GLuint lastInstruction
;
2695 GLuint not_masked
[4];
2697 GLuint uNumSrc
= r700GetNumOperands(pAsm
);
2699 GLboolean bSplitInst
= GL_FALSE
;
2701 number_of_scalar_operations
= 0;
2702 contiguous_slots_needed
= 0;
2704 if(1 == pAsm
->D
.dst
.writew
)
2706 lastInstruction
= 3;
2707 number_of_scalar_operations
++;
2714 if(1 == pAsm
->D
.dst
.writez
)
2716 lastInstruction
= 2;
2717 number_of_scalar_operations
++;
2724 if(1 == pAsm
->D
.dst
.writey
)
2726 lastInstruction
= 1;
2727 number_of_scalar_operations
++;
2734 if(1 == pAsm
->D
.dst
.writex
)
2736 lastInstruction
= 0;
2737 number_of_scalar_operations
++;
2745 if(GL_TRUE
== is_reduction_opcode(&(pAsm
->D
)) )
2747 contiguous_slots_needed
= 4;
2751 contiguous_slots_needed
= number_of_scalar_operations
;
2754 if(1 == pAsm
->D2
.dst2
.literal
)
2756 contiguous_slots_needed
+= 1;
2758 else if(2 == pAsm
->D2
.dst2
.literal
)
2760 contiguous_slots_needed
+= 2;
2765 for (scalar_channel_index
=0; scalar_channel_index
< 4; scalar_channel_index
++)
2767 if(0 == not_masked
[scalar_channel_index
])
2772 if(scalar_channel_index
== lastInstruction
)
2774 switch (pAsm
->D2
.dst2
.literal
)
2777 alu_instruction_ptr
= (R700ALUInstruction
*) CALLOC_STRUCT(R700ALUInstruction
);
2778 if (alu_instruction_ptr
== NULL
)
2782 Init_R700ALUInstruction(alu_instruction_ptr
);
2785 alu_instruction_ptr_hl
= (R700ALUInstructionHalfLiteral
*) CALLOC_STRUCT(R700ALUInstructionHalfLiteral
);
2786 if (alu_instruction_ptr_hl
== NULL
)
2790 Init_R700ALUInstructionHalfLiteral(alu_instruction_ptr_hl
, pLiteral
[0], pLiteral
[1]);
2791 alu_instruction_ptr
= (R700ALUInstruction
*)alu_instruction_ptr_hl
;
2794 alu_instruction_ptr_fl
= (R700ALUInstructionFullLiteral
*) CALLOC_STRUCT(R700ALUInstructionFullLiteral
);
2795 if (alu_instruction_ptr_fl
== NULL
)
2799 Init_R700ALUInstructionFullLiteral(alu_instruction_ptr_fl
, pLiteral
[0], pLiteral
[1], pLiteral
[2], pLiteral
[3]);
2800 alu_instruction_ptr
= (R700ALUInstruction
*)alu_instruction_ptr_fl
;
2808 alu_instruction_ptr
= (R700ALUInstruction
*) CALLOC_STRUCT(R700ALUInstruction
);
2809 if (alu_instruction_ptr
== NULL
)
2813 Init_R700ALUInstruction(alu_instruction_ptr
);
2817 if (GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2820 scalar_channel_index
) )
2828 if (GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2831 scalar_channel_index
) )
2838 alu_instruction_ptr
->m_Word0
.f
.index_mode
= SQ_INDEX_LOOP
;
2840 if(scalar_channel_index
== lastInstruction
)
2842 alu_instruction_ptr
->m_Word0
.f
.last
= 1;
2845 alu_instruction_ptr
->m_Word0
.f
.pred_sel
= 0x0;
2846 if(1 == pAsm
->D
.dst
.predicated
)
2848 alu_instruction_ptr
->m_Word1_OP2
.f
.update_pred
= 0x1;
2849 alu_instruction_ptr
->m_Word1_OP2
.f
.update_execute_mask
= 0x1;
2853 alu_instruction_ptr
->m_Word1_OP2
.f
.update_pred
= 0;
2854 alu_instruction_ptr
->m_Word1_OP2
.f
.update_execute_mask
= 0;
2858 if( (pAsm
->D
.dst
.rtype
== DST_REG_TEMPORARY
) ||
2859 (pAsm
->D
.dst
.rtype
== DST_REG_OUT
) )
2861 alu_instruction_ptr
->m_Word1
.f
.dst_gpr
= pAsm
->D
.dst
.reg
;
2865 radeon_error("Only temp destination registers supported for ALU dest regs.\n");
2869 alu_instruction_ptr
->m_Word1
.f
.dst_rel
= SQ_ABSOLUTE
; //D.rtype
2871 alu_instruction_ptr
->m_Word1
.f
.dst_chan
= scalar_channel_index
;
2873 alu_instruction_ptr
->m_Word1
.f
.clamp
= pAsm
->D2
.dst2
.SaturateMode
;
2875 if (pAsm
->D
.dst
.op3
)
2878 alu_instruction_ptr
->m_Word1_OP3
.f
.alu_inst
= pAsm
->D
.dst
.opcode
;
2880 //There's 3rd src for op3
2881 if ( GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2884 scalar_channel_index
) )
2894 alu_instruction_ptr
->m_Word1_OP2
.f6
.alu_inst
= pAsm
->D
.dst
.opcode
;
2895 alu_instruction_ptr
->m_Word1_OP2
.f6
.src0_abs
= 0x0;
2896 alu_instruction_ptr
->m_Word1_OP2
.f6
.src1_abs
= 0x0;
2897 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= 1;
2898 alu_instruction_ptr
->m_Word1_OP2
.f6
.omod
= SQ_ALU_OMOD_OFF
;
2902 alu_instruction_ptr
->m_Word1_OP2
.f
.alu_inst
= pAsm
->D
.dst
.opcode
;
2903 alu_instruction_ptr
->m_Word1_OP2
.f
.src0_abs
= 0x0;
2904 alu_instruction_ptr
->m_Word1_OP2
.f
.src1_abs
= 0x0;
2905 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= 1;
2906 alu_instruction_ptr
->m_Word1_OP2
.f
.omod
= SQ_ALU_OMOD_OFF
;
2910 if(GL_FALSE
== add_alu_instruction(pAsm
, alu_instruction_ptr
, contiguous_slots_needed
) )
2915 if (1 == number_of_scalar_operations
)
2917 if(GL_FALSE
== check_scalar(pAsm
, alu_instruction_ptr
) )
2924 if(GL_FALSE
== check_vector(pAsm
, alu_instruction_ptr
) )
2930 contiguous_slots_needed
-= 2;
2936 GLboolean
next_ins(r700_AssemblerBase
*pAsm
)
2938 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
2940 if( GL_TRUE
== pAsm
->is_tex
)
2942 if (pILInst
->TexSrcTarget
== TEXTURE_RECT_INDEX
) {
2943 if( GL_FALSE
== assemble_tex_instruction(pAsm
, GL_FALSE
) )
2945 radeon_error("Error assembling TEX instruction\n");
2949 if( GL_FALSE
== assemble_tex_instruction(pAsm
, GL_TRUE
) )
2951 radeon_error("Error assembling TEX instruction\n");
2958 if( GL_FALSE
== assemble_alu_instruction(pAsm
) )
2960 radeon_error("Error assembling ALU instruction\n");
2965 if(pAsm
->D
.dst
.rtype
== DST_REG_OUT
)
2969 // There is no mask for OP3 instructions, so all channels are written
2970 pAsm
->pucOutMask
[pAsm
->D
.dst
.reg
- pAsm
->starting_export_register_number
] = 0xF;
2974 pAsm
->pucOutMask
[pAsm
->D
.dst
.reg
- pAsm
->starting_export_register_number
]
2975 |= (unsigned char)pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
;
2979 //reset for next inst.
2982 pAsm
->S
[0].bits
= 0;
2983 pAsm
->S
[1].bits
= 0;
2984 pAsm
->S
[2].bits
= 0;
2985 pAsm
->is_tex
= GL_FALSE
;
2986 pAsm
->need_tex_barrier
= GL_FALSE
;
2991 GLboolean
next_ins2(r700_AssemblerBase
*pAsm
)
2993 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
2996 if( GL_FALSE
== assemble_alu_instruction2(pAsm
) )
2998 radeon_error("Error assembling ALU instruction\n");
3002 if(pAsm
->D
.dst
.rtype
== DST_REG_OUT
)
3006 // There is no mask for OP3 instructions, so all channels are written
3007 pAsm
->pucOutMask
[pAsm
->D
.dst
.reg
- pAsm
->starting_export_register_number
] = 0xF;
3011 pAsm
->pucOutMask
[pAsm
->D
.dst
.reg
- pAsm
->starting_export_register_number
]
3012 |= (unsigned char)pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
;
3016 //reset for next inst.
3019 pAsm
->S
[0].bits
= 0;
3020 pAsm
->S
[1].bits
= 0;
3021 pAsm
->S
[2].bits
= 0;
3022 pAsm
->is_tex
= GL_FALSE
;
3023 pAsm
->need_tex_barrier
= GL_FALSE
;
3025 //richard nov.16 glsl
3032 GLboolean
next_ins_literal(r700_AssemblerBase
*pAsm
, GLfloat
* pLiteral
)
3034 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
3037 if( GL_FALSE
== assemble_alu_instruction_literal(pAsm
, pLiteral
) )
3039 radeon_error("Error assembling ALU instruction\n");
3043 //reset for next inst.
3046 pAsm
->S
[0].bits
= 0;
3047 pAsm
->S
[1].bits
= 0;
3048 pAsm
->S
[2].bits
= 0;
3049 pAsm
->is_tex
= GL_FALSE
;
3050 pAsm
->need_tex_barrier
= GL_FALSE
;
3054 GLboolean
assemble_math_function(r700_AssemblerBase
* pAsm
, BITS opcode
)
3060 tmp
= gethelpr(pAsm
);
3062 // opcode tmp.x, a.x
3065 pAsm
->D
.dst
.opcode
= opcode
;
3066 pAsm
->D
.dst
.math
= 1;
3068 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3069 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3070 pAsm
->D
.dst
.reg
= tmp
;
3071 pAsm
->D
.dst
.writex
= 1;
3073 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3078 if ( GL_FALSE
== next_ins(pAsm
) )
3083 // Now replicate result to all necessary channels in destination
3084 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3086 if( GL_FALSE
== assemble_dst(pAsm
) )
3091 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3092 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3093 pAsm
->S
[0].src
.reg
= tmp
;
3095 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3096 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3098 if( GL_FALSE
== next_ins(pAsm
) )
3106 GLboolean
assemble_ABS(r700_AssemblerBase
*pAsm
)
3110 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
3112 if( GL_FALSE
== assemble_dst(pAsm
) )
3116 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3121 pAsm
->S
[1].bits
= pAsm
->S
[0].bits
;
3122 flipneg_PVSSRC(&(pAsm
->S
[1].src
));
3124 if ( GL_FALSE
== next_ins(pAsm
) )
3132 GLboolean
assemble_ADD(r700_AssemblerBase
*pAsm
)
3134 if( GL_FALSE
== checkop2(pAsm
) )
3139 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
3141 if( GL_FALSE
== assemble_dst(pAsm
) )
3146 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3151 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3156 if(pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_SUB
)
3158 flipneg_PVSSRC(&(pAsm
->S
[1].src
));
3161 if( GL_FALSE
== next_ins(pAsm
) )
3169 GLboolean
assemble_ARL(r700_AssemblerBase
*pAsm
)
3170 { /* TODO: ar values dont' persist between clauses */
3171 if( GL_FALSE
== checkop1(pAsm
) )
3176 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOVA_FLOOR
;
3177 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3178 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3179 pAsm
->D
.dst
.reg
= 0;
3180 pAsm
->D
.dst
.writex
= 0;
3181 pAsm
->D
.dst
.writey
= 0;
3182 pAsm
->D
.dst
.writez
= 0;
3183 pAsm
->D
.dst
.writew
= 0;
3185 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3190 if( GL_FALSE
== next_ins(pAsm
) )
3198 GLboolean
assemble_BAD(char *opcode_str
)
3200 radeon_error("Not yet implemented instruction (%s)\n", opcode_str
);
3204 GLboolean
assemble_CMP(r700_AssemblerBase
*pAsm
)
3208 if( GL_FALSE
== checkop3(pAsm
) )
3213 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_CNDGE
;
3214 pAsm
->D
.dst
.op3
= 1;
3218 if(0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
3220 //OP3 has no support for write mask
3221 tmp
= gethelpr(pAsm
);
3223 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3224 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3225 pAsm
->D
.dst
.reg
= tmp
;
3227 nomask_PVSDST(&(pAsm
->D
.dst
));
3231 if( GL_FALSE
== assemble_dst(pAsm
) )
3237 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3242 if( GL_FALSE
== assemble_src(pAsm
, 2, 1) )
3247 if( GL_FALSE
== assemble_src(pAsm
, 1, 2) )
3252 if ( GL_FALSE
== next_ins(pAsm
) )
3257 if (0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
3259 if( GL_FALSE
== assemble_dst(pAsm
) )
3264 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3267 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3268 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3269 pAsm
->S
[0].src
.reg
= tmp
;
3271 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3272 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3274 if( GL_FALSE
== next_ins(pAsm
) )
3283 GLboolean
assemble_COS(r700_AssemblerBase
*pAsm
)
3285 return assemble_math_function(pAsm
, SQ_OP2_INST_COS
);
3288 GLboolean
assemble_DOT(r700_AssemblerBase
*pAsm
)
3290 if( GL_FALSE
== checkop2(pAsm
) )
3295 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_DOT4
;
3297 if( GL_FALSE
== assemble_dst(pAsm
) )
3302 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3307 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3312 if(OPCODE_DP3
== pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
)
3314 zerocomp_PVSSRC(&(pAsm
->S
[0].src
), 3);
3315 zerocomp_PVSSRC(&(pAsm
->S
[1].src
), 3);
3317 else if(pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_DPH
)
3319 onecomp_PVSSRC(&(pAsm
->S
[0].src
), 3);
3322 if ( GL_FALSE
== next_ins(pAsm
) )
3330 GLboolean
assemble_DST(r700_AssemblerBase
*pAsm
)
3332 if( GL_FALSE
== checkop2(pAsm
) )
3337 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
3339 if( GL_FALSE
== assemble_dst(pAsm
) )
3344 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3349 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3354 onecomp_PVSSRC(&(pAsm
->S
[0].src
), 0);
3355 onecomp_PVSSRC(&(pAsm
->S
[0].src
), 3);
3357 onecomp_PVSSRC(&(pAsm
->S
[1].src
), 0);
3358 onecomp_PVSSRC(&(pAsm
->S
[1].src
), 2);
3360 if ( GL_FALSE
== next_ins(pAsm
) )
3368 GLboolean
assemble_EX2(r700_AssemblerBase
*pAsm
)
3370 return assemble_math_function(pAsm
, SQ_OP2_INST_EXP_IEEE
);
3373 GLboolean
assemble_EXP(r700_AssemblerBase
*pAsm
)
3379 tmp
= gethelpr(pAsm
);
3384 if (pAsm
->pILInst
->DstReg
.WriteMask
& 0x1) {
3385 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FLOOR
;
3387 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3388 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3389 pAsm
->D
.dst
.reg
= tmp
;
3390 pAsm
->D
.dst
.writex
= 1;
3392 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3397 if( GL_FALSE
== next_ins(pAsm
) )
3402 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
3403 pAsm
->D
.dst
.math
= 1;
3405 if( GL_FALSE
== assemble_dst(pAsm
) )
3410 pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writez
= pAsm
->D
.dst
.writew
= 0;
3412 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3413 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3414 pAsm
->S
[0].src
.reg
= tmp
;
3416 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3417 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3419 if( GL_FALSE
== next_ins(pAsm
) )
3427 if ((pAsm
->pILInst
->DstReg
.WriteMask
>> 1) & 0x1) {
3428 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FRACT
;
3430 if( GL_FALSE
== assemble_dst(pAsm
) )
3435 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3440 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writez
= pAsm
->D
.dst
.writew
= 0;
3442 if( GL_FALSE
== next_ins(pAsm
) )
3450 if ((pAsm
->pILInst
->DstReg
.WriteMask
>> 2) & 0x1) {
3451 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
3452 pAsm
->D
.dst
.math
= 1;
3454 if( GL_FALSE
== assemble_dst(pAsm
) )
3459 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3464 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writew
= 0;
3466 if( GL_FALSE
== next_ins(pAsm
) )
3474 if ((pAsm
->pILInst
->DstReg
.WriteMask
>> 3) & 0x1) {
3475 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3477 if( GL_FALSE
== assemble_dst(pAsm
) )
3482 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writez
= 0;
3484 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3485 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3486 pAsm
->S
[0].src
.reg
= tmp
;
3488 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_1
);
3489 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3491 if( GL_FALSE
== next_ins(pAsm
) )
3500 GLboolean
assemble_FLR(r700_AssemblerBase
*pAsm
)
3504 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FLOOR
;
3506 if ( GL_FALSE
== assemble_dst(pAsm
) )
3511 if ( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3516 if ( GL_FALSE
== next_ins(pAsm
) )
3524 GLboolean
assemble_FLR_INT(r700_AssemblerBase
*pAsm
)
3526 return assemble_math_function(pAsm
, SQ_OP2_INST_FLT_TO_INT
);
3529 GLboolean
assemble_FRC(r700_AssemblerBase
*pAsm
)
3533 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FRACT
;
3535 if ( GL_FALSE
== assemble_dst(pAsm
) )
3540 if ( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3545 if ( GL_FALSE
== next_ins(pAsm
) )
3553 GLboolean
assemble_KIL(r700_AssemblerBase
*pAsm
)
3555 /* TODO: doc says KILL has to be last(end) ALU clause */
3559 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_KILLGT
;
3561 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3562 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3563 pAsm
->D
.dst
.reg
= 0;
3564 pAsm
->D
.dst
.writex
= 0;
3565 pAsm
->D
.dst
.writey
= 0;
3566 pAsm
->D
.dst
.writez
= 0;
3567 pAsm
->D
.dst
.writew
= 0;
3569 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3570 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3571 pAsm
->S
[0].src
.reg
= 0;
3573 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_0
);
3574 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3576 if ( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
3581 if ( GL_FALSE
== next_ins(pAsm
) )
3586 pAsm
->pR700Shader
->killIsUsed
= GL_TRUE
;
3591 GLboolean
assemble_LG2(r700_AssemblerBase
*pAsm
)
3593 return assemble_math_function(pAsm
, SQ_OP2_INST_LOG_IEEE
);
3596 GLboolean
assemble_LRP(r700_AssemblerBase
*pAsm
)
3600 if( GL_FALSE
== checkop3(pAsm
) )
3605 tmp
= gethelpr(pAsm
);
3607 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
3609 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3610 pAsm
->D
.dst
.reg
= tmp
;
3611 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3612 nomask_PVSDST(&(pAsm
->D
.dst
));
3615 if( GL_FALSE
== assemble_src(pAsm
, 1, 0) )
3620 if ( GL_FALSE
== assemble_src(pAsm
, 2, 1) )
3625 neg_PVSSRC(&(pAsm
->S
[1].src
));
3627 if( GL_FALSE
== next_ins(pAsm
) )
3632 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
3633 pAsm
->D
.dst
.op3
= 1;
3635 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3636 pAsm
->D
.dst
.reg
= tmp
;
3637 nomask_PVSDST(&(pAsm
->D
.dst
));
3638 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3640 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3641 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3642 pAsm
->S
[0].src
.reg
= tmp
;
3643 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3646 if( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
3650 if( GL_FALSE
== assemble_src(pAsm
, 2, -1) )
3655 if( GL_FALSE
== next_ins(pAsm
) )
3660 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3662 if( GL_FALSE
== assemble_dst(pAsm
) )
3667 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3668 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3669 pAsm
->S
[0].src
.reg
= tmp
;
3670 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3672 if( GL_FALSE
== next_ins(pAsm
) )
3680 GLboolean
assemble_LOG(r700_AssemblerBase
*pAsm
)
3682 BITS tmp1
, tmp2
, tmp3
;
3686 tmp1
= gethelpr(pAsm
);
3687 tmp2
= gethelpr(pAsm
);
3688 tmp3
= gethelpr(pAsm
);
3690 // FIXME: The hardware can do fabs() directly on input
3691 // elements, but the compiler doesn't have the
3692 // capability to use that.
3694 // MAX tmp1.x, a.x, -a.x (fabs(a.x))
3696 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
3698 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3699 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3700 pAsm
->D
.dst
.reg
= tmp1
;
3701 pAsm
->D
.dst
.writex
= 1;
3703 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3708 pAsm
->S
[1].bits
= pAsm
->S
[0].bits
;
3709 flipneg_PVSSRC(&(pAsm
->S
[1].src
));
3711 if ( GL_FALSE
== next_ins(pAsm
) )
3718 // LG2 tmp2.x, tmp1.x
3719 // FLOOR tmp3.x, tmp2.x
3720 // MOV dst.x, tmp3.x
3721 // ADD tmp3.x, tmp2.x, -tmp3.x
3722 // EX2 dst.y, tmp3.x
3723 // MOV dst.z, tmp2.x
3726 // LG2 tmp2.x, tmp1.x
3727 // FLOOR tmp3.x, tmp2.x
3729 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_LOG_IEEE
;
3730 pAsm
->D
.dst
.math
= 1;
3732 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3733 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3734 pAsm
->D
.dst
.reg
= tmp2
;
3735 pAsm
->D
.dst
.writex
= 1;
3737 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3738 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3739 pAsm
->S
[0].src
.reg
= tmp1
;
3741 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3742 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3744 if( GL_FALSE
== next_ins(pAsm
) )
3749 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FLOOR
;
3751 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3752 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3753 pAsm
->D
.dst
.reg
= tmp3
;
3754 pAsm
->D
.dst
.writex
= 1;
3756 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3757 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3758 pAsm
->S
[0].src
.reg
= tmp2
;
3760 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3761 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3763 if( GL_FALSE
== next_ins(pAsm
) )
3768 // MOV dst.x, tmp3.x
3770 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3772 if( GL_FALSE
== assemble_dst(pAsm
) )
3777 pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writez
= pAsm
->D
.dst
.writew
= 0;
3779 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3780 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3781 pAsm
->S
[0].src
.reg
= tmp3
;
3783 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3784 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3786 if( GL_FALSE
== next_ins(pAsm
) )
3791 // ADD tmp3.x, tmp2.x, -tmp3.x
3792 // EX2 dst.y, tmp3.x
3794 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
3796 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3797 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3798 pAsm
->D
.dst
.reg
= tmp3
;
3799 pAsm
->D
.dst
.writex
= 1;
3801 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3802 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3803 pAsm
->S
[0].src
.reg
= tmp2
;
3805 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3806 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3808 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
3809 pAsm
->S
[1].src
.rtype
= DST_REG_TEMPORARY
;
3810 pAsm
->S
[1].src
.reg
= tmp3
;
3812 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_X
);
3813 neg_PVSSRC(&(pAsm
->S
[1].src
));
3815 if( GL_FALSE
== next_ins(pAsm
) )
3820 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
3821 pAsm
->D
.dst
.math
= 1;
3823 if( GL_FALSE
== assemble_dst(pAsm
) )
3828 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writez
= pAsm
->D
.dst
.writew
= 0;
3830 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3831 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3832 pAsm
->S
[0].src
.reg
= tmp3
;
3834 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3835 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3837 if( GL_FALSE
== next_ins(pAsm
) )
3842 // MOV dst.z, tmp2.x
3844 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3846 if( GL_FALSE
== assemble_dst(pAsm
) )
3851 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writew
= 0;
3853 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3854 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3855 pAsm
->S
[0].src
.reg
= tmp2
;
3857 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3858 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3860 if( GL_FALSE
== next_ins(pAsm
) )
3867 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3869 if( GL_FALSE
== assemble_dst(pAsm
) )
3874 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writez
= 0;
3876 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3877 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3878 pAsm
->S
[0].src
.reg
= tmp1
;
3880 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_1
);
3881 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3883 if( GL_FALSE
== next_ins(pAsm
) )
3891 GLboolean
assemble_MAD(struct r700_AssemblerBase
*pAsm
)
3894 GLboolean bReplaceDst
= GL_FALSE
;
3895 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
3897 if( GL_FALSE
== checkop3(pAsm
) )
3902 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
3903 pAsm
->D
.dst
.op3
= 1;
3907 if(PROGRAM_TEMPORARY
== pILInst
->DstReg
.File
)
3908 { /* TODO : more investigation on MAD src and dst using same register */
3909 for(ii
=0; ii
<3; ii
++)
3911 if( (PROGRAM_TEMPORARY
== pILInst
->SrcReg
[ii
].File
)
3912 && (pILInst
->DstReg
.Index
== pILInst
->SrcReg
[ii
].Index
) )
3914 bReplaceDst
= GL_TRUE
;
3919 if(0xF != pILInst
->DstReg
.WriteMask
)
3920 { /* OP3 has no support for write mask */
3921 bReplaceDst
= GL_TRUE
;
3924 if(GL_TRUE
== bReplaceDst
)
3926 tmp
= gethelpr(pAsm
);
3928 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3929 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3930 pAsm
->D
.dst
.reg
= tmp
;
3932 nomask_PVSDST(&(pAsm
->D
.dst
));
3936 if( GL_FALSE
== assemble_dst(pAsm
) )
3942 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3947 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3952 if( GL_FALSE
== assemble_src(pAsm
, 2, -1) )
3957 if ( GL_FALSE
== next_ins(pAsm
) )
3962 if (GL_TRUE
== bReplaceDst
)
3964 if( GL_FALSE
== assemble_dst(pAsm
) )
3969 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3972 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3973 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3974 pAsm
->S
[0].src
.reg
= tmp
;
3976 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3977 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3979 if( GL_FALSE
== next_ins(pAsm
) )
3989 GLboolean
assemble_LIT(r700_AssemblerBase
*pAsm
)
3991 unsigned int dstReg
;
3992 unsigned int dstType
;
3993 unsigned int srcReg
;
3994 unsigned int srcType
;
3996 int tmp
= gethelpr(pAsm
);
3998 if( GL_FALSE
== assemble_dst(pAsm
) )
4002 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4006 dstReg
= pAsm
->D
.dst
.reg
;
4007 dstType
= pAsm
->D
.dst
.rtype
;
4008 srcReg
= pAsm
->S
[0].src
.reg
;
4009 srcType
= pAsm
->S
[0].src
.rtype
;
4011 /* dst.xw, <- 1.0 */
4012 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4013 pAsm
->D
.dst
.rtype
= dstType
;
4014 pAsm
->D
.dst
.reg
= dstReg
;
4015 pAsm
->D
.dst
.writex
= 1;
4016 pAsm
->D
.dst
.writey
= 0;
4017 pAsm
->D
.dst
.writez
= 0;
4018 pAsm
->D
.dst
.writew
= 1;
4019 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4020 pAsm
->S
[0].src
.reg
= tmp
;
4021 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4022 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4023 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_1
;
4024 pAsm
->S
[0].src
.swizzley
= SQ_SEL_1
;
4025 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_1
;
4026 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_1
;
4027 if( GL_FALSE
== next_ins(pAsm
) )
4032 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4037 /* dst.y = max(src.x, 0.0) */
4038 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
4039 pAsm
->D
.dst
.rtype
= dstType
;
4040 pAsm
->D
.dst
.reg
= dstReg
;
4041 pAsm
->D
.dst
.writex
= 0;
4042 pAsm
->D
.dst
.writey
= 1;
4043 pAsm
->D
.dst
.writez
= 0;
4044 pAsm
->D
.dst
.writew
= 0;
4045 pAsm
->S
[0].src
.rtype
= srcType
;
4046 pAsm
->S
[0].src
.reg
= srcReg
;
4047 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4048 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
);
4049 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
4050 pAsm
->S
[1].src
.reg
= tmp
;
4051 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
4052 noneg_PVSSRC(&(pAsm
->S
[1].src
));
4053 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_0
;
4054 pAsm
->S
[1].src
.swizzley
= SQ_SEL_0
;
4055 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_0
;
4056 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_0
;
4057 if( GL_FALSE
== next_ins(pAsm
) )
4062 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4067 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Y
, SQ_SEL_Y
, SQ_SEL_Y
, SQ_SEL_Y
);
4069 /* dst.z = log(src.y) */
4070 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_LOG_CLAMPED
;
4071 pAsm
->D
.dst
.math
= 1;
4072 pAsm
->D
.dst
.rtype
= dstType
;
4073 pAsm
->D
.dst
.reg
= dstReg
;
4074 pAsm
->D
.dst
.writex
= 0;
4075 pAsm
->D
.dst
.writey
= 0;
4076 pAsm
->D
.dst
.writez
= 1;
4077 pAsm
->D
.dst
.writew
= 0;
4078 pAsm
->S
[0].src
.rtype
= srcType
;
4079 pAsm
->S
[0].src
.reg
= srcReg
;
4080 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4081 if( GL_FALSE
== next_ins(pAsm
) )
4086 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4091 if( GL_FALSE
== assemble_src(pAsm
, 0, 2) )
4096 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
);
4098 swizzleagain_PVSSRC(&(pAsm
->S
[2].src
), SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
);
4100 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
4101 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MUL_LIT
;
4102 pAsm
->D
.dst
.math
= 1;
4103 pAsm
->D
.dst
.op3
= 1;
4104 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4105 pAsm
->D
.dst
.reg
= tmp
;
4106 pAsm
->D
.dst
.writex
= 1;
4107 pAsm
->D
.dst
.writey
= 0;
4108 pAsm
->D
.dst
.writez
= 0;
4109 pAsm
->D
.dst
.writew
= 0;
4111 pAsm
->S
[0].src
.rtype
= srcType
;
4112 pAsm
->S
[0].src
.reg
= srcReg
;
4113 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4115 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
4116 pAsm
->S
[1].src
.reg
= dstReg
;
4117 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
4118 noneg_PVSSRC(&(pAsm
->S
[1].src
));
4119 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_Z
;
4120 pAsm
->S
[1].src
.swizzley
= SQ_SEL_Z
;
4121 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_Z
;
4122 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_Z
;
4124 pAsm
->S
[2].src
.rtype
= srcType
;
4125 pAsm
->S
[2].src
.reg
= srcReg
;
4126 setaddrmode_PVSSRC(&(pAsm
->S
[2].src
), ADDR_ABSOLUTE
);
4128 if( GL_FALSE
== next_ins(pAsm
) )
4133 /* dst.z = exp(tmp.x) */
4134 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
4135 pAsm
->D
.dst
.math
= 1;
4136 pAsm
->D
.dst
.rtype
= dstType
;
4137 pAsm
->D
.dst
.reg
= dstReg
;
4138 pAsm
->D
.dst
.writex
= 0;
4139 pAsm
->D
.dst
.writey
= 0;
4140 pAsm
->D
.dst
.writez
= 1;
4141 pAsm
->D
.dst
.writew
= 0;
4143 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4144 pAsm
->S
[0].src
.reg
= tmp
;
4145 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4146 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4147 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_X
;
4148 pAsm
->S
[0].src
.swizzley
= SQ_SEL_X
;
4149 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_X
;
4150 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_X
;
4152 if( GL_FALSE
== next_ins(pAsm
) )
4160 GLboolean
assemble_MAX(r700_AssemblerBase
*pAsm
)
4162 if( GL_FALSE
== checkop2(pAsm
) )
4167 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
4169 if( GL_FALSE
== assemble_dst(pAsm
) )
4174 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4179 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4184 if( GL_FALSE
== next_ins(pAsm
) )
4192 GLboolean
assemble_MIN(r700_AssemblerBase
*pAsm
)
4194 if( GL_FALSE
== checkop2(pAsm
) )
4199 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MIN
;
4201 if( GL_FALSE
== assemble_dst(pAsm
) )
4206 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4211 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4216 if( GL_FALSE
== next_ins(pAsm
) )
4224 GLboolean
assemble_MOV(r700_AssemblerBase
*pAsm
)
4228 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4230 if (GL_FALSE
== assemble_dst(pAsm
))
4235 if (GL_FALSE
== assemble_src(pAsm
, 0, -1))
4240 if ( GL_FALSE
== next_ins(pAsm
) )
4248 GLboolean
assemble_MUL(r700_AssemblerBase
*pAsm
)
4250 if( GL_FALSE
== checkop2(pAsm
) )
4255 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
4257 if( GL_FALSE
== assemble_dst(pAsm
) )
4262 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4267 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4272 if( GL_FALSE
== next_ins(pAsm
) )
4280 GLboolean
assemble_POW(r700_AssemblerBase
*pAsm
)
4286 tmp
= gethelpr(pAsm
);
4288 // LG2 tmp.x, a.swizzle
4289 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_LOG_IEEE
;
4290 pAsm
->D
.dst
.math
= 1;
4292 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4293 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4294 pAsm
->D
.dst
.reg
= tmp
;
4295 nomask_PVSDST(&(pAsm
->D
.dst
));
4297 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4302 if( GL_FALSE
== next_ins(pAsm
) )
4307 // MUL tmp.x, tmp.x, b.swizzle
4308 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
4310 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4311 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4312 pAsm
->D
.dst
.reg
= tmp
;
4313 nomask_PVSDST(&(pAsm
->D
.dst
));
4315 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4316 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4317 pAsm
->S
[0].src
.reg
= tmp
;
4318 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4319 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4321 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4326 if( GL_FALSE
== next_ins(pAsm
) )
4331 // EX2 dst.mask, tmp.x
4333 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
4334 pAsm
->D
.dst
.math
= 1;
4336 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4337 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4338 pAsm
->D
.dst
.reg
= tmp
;
4339 nomask_PVSDST(&(pAsm
->D
.dst
));
4341 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4342 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4343 pAsm
->S
[0].src
.reg
= tmp
;
4344 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4345 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4347 if( GL_FALSE
== next_ins(pAsm
) )
4352 // Now replicate result to all necessary channels in destination
4353 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4355 if( GL_FALSE
== assemble_dst(pAsm
) )
4360 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4361 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
4362 pAsm
->S
[0].src
.reg
= tmp
;
4364 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4365 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4367 if( GL_FALSE
== next_ins(pAsm
) )
4375 GLboolean
assemble_RCP(r700_AssemblerBase
*pAsm
)
4377 return assemble_math_function(pAsm
, SQ_OP2_INST_RECIP_IEEE
);
4380 GLboolean
assemble_RSQ(r700_AssemblerBase
*pAsm
)
4382 return assemble_math_function(pAsm
, SQ_OP2_INST_RECIPSQRT_IEEE
);
4385 GLboolean
assemble_SIN(r700_AssemblerBase
*pAsm
)
4387 return assemble_math_function(pAsm
, SQ_OP2_INST_SIN
);
4390 GLboolean
assemble_SCS(r700_AssemblerBase
*pAsm
)
4396 tmp
= gethelpr(pAsm
);
4399 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_COS
;
4400 pAsm
->D
.dst
.math
= 1;
4402 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4403 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4404 pAsm
->D
.dst
.reg
= tmp
;
4405 pAsm
->D
.dst
.writex
= 1;
4407 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4412 if ( GL_FALSE
== next_ins(pAsm
) )
4418 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_SIN
;
4419 pAsm
->D
.dst
.math
= 1;
4421 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4422 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4423 pAsm
->D
.dst
.reg
= tmp
;
4424 pAsm
->D
.dst
.writey
= 1;
4426 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4431 if( GL_FALSE
== next_ins(pAsm
) )
4436 // MOV dst.mask, tmp
4437 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4439 if( GL_FALSE
== assemble_dst(pAsm
) )
4444 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4445 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
4446 pAsm
->S
[0].src
.reg
= tmp
;
4448 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
4449 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_0
;
4450 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_0
;
4452 if ( GL_FALSE
== next_ins(pAsm
) )
4460 GLboolean
assemble_LOGIC(r700_AssemblerBase
*pAsm
, BITS opcode
)
4462 if( GL_FALSE
== checkop2(pAsm
) )
4467 pAsm
->D
.dst
.opcode
= opcode
;
4468 pAsm
->D
.dst
.math
= 1;
4470 if( GL_FALSE
== assemble_dst(pAsm
) )
4475 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4480 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4485 if( GL_FALSE
== next_ins(pAsm
) )
4493 GLboolean
assemble_LOGIC_PRED(r700_AssemblerBase
*pAsm
, BITS opcode
)
4495 if( GL_FALSE
== checkop2(pAsm
) )
4500 pAsm
->D
.dst
.opcode
= opcode
;
4501 pAsm
->D
.dst
.math
= 1;
4502 pAsm
->D
.dst
.predicated
= 1;
4503 pAsm
->D2
.dst2
.SaturateMode
= pAsm
->pILInst
[pAsm
->uiCurInst
].SaturateMode
;
4505 if( GL_FALSE
== assemble_dst(pAsm
) )
4510 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4515 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4520 if( GL_FALSE
== next_ins2(pAsm
) )
4528 GLboolean
assemble_SGE(r700_AssemblerBase
*pAsm
)
4530 if( GL_FALSE
== checkop2(pAsm
) )
4535 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_SETGE
;
4537 if( GL_FALSE
== assemble_dst(pAsm
) )
4542 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4547 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4552 if( GL_FALSE
== next_ins(pAsm
) )
4560 GLboolean
assemble_SLT(r700_AssemblerBase
*pAsm
)
4562 if( GL_FALSE
== checkop2(pAsm
) )
4567 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_SETGT
;
4569 if( GL_FALSE
== assemble_dst(pAsm
) )
4574 if( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
4579 if( GL_FALSE
== assemble_src(pAsm
, 1, 0) )
4584 if( GL_FALSE
== next_ins(pAsm
) )
4592 GLboolean
assemble_STP(r700_AssemblerBase
*pAsm
)
4597 GLboolean
assemble_TEX(r700_AssemblerBase
*pAsm
)
4599 GLboolean src_const
;
4600 GLboolean need_barrier
= GL_FALSE
;
4604 switch (pAsm
->pILInst
[pAsm
->uiCurInst
].SrcReg
[0].File
)
4606 case PROGRAM_CONSTANT
:
4607 case PROGRAM_LOCAL_PARAM
:
4608 case PROGRAM_ENV_PARAM
:
4609 case PROGRAM_STATE_VAR
:
4610 src_const
= GL_TRUE
;
4612 case PROGRAM_TEMPORARY
:
4615 src_const
= GL_FALSE
;
4619 if (GL_TRUE
== src_const
)
4621 if ( GL_FALSE
== mov_temp(pAsm
, 0) )
4623 need_barrier
= GL_TRUE
;
4626 switch (pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
)
4631 radeon_error("do not support TXB yet\n");
4637 radeon_error("Internal error: bad texture op (not TEX)\n");
4642 if (pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_TXP
)
4644 GLuint tmp
= gethelpr(pAsm
);
4645 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_RECIP_IEEE
;
4646 pAsm
->D
.dst
.math
= 1;
4647 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4648 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4649 pAsm
->D
.dst
.reg
= tmp
;
4650 pAsm
->D
.dst
.writew
= 1;
4652 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4656 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
);
4657 if( GL_FALSE
== next_ins(pAsm
) )
4662 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
4663 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4664 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4665 pAsm
->D
.dst
.reg
= tmp
;
4666 pAsm
->D
.dst
.writex
= 1;
4667 pAsm
->D
.dst
.writey
= 1;
4668 pAsm
->D
.dst
.writez
= 1;
4669 pAsm
->D
.dst
.writew
= 0;
4671 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4675 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
4676 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
4677 pAsm
->S
[1].src
.reg
= tmp
;
4678 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_W
);
4680 if( GL_FALSE
== next_ins(pAsm
) )
4685 pAsm
->aArgSubst
[1] = tmp
;
4686 need_barrier
= GL_TRUE
;
4689 if (pAsm
->pILInst
[pAsm
->uiCurInst
].TexSrcTarget
== TEXTURE_CUBE_INDEX
)
4691 GLuint tmp1
= gethelpr(pAsm
);
4692 GLuint tmp2
= gethelpr(pAsm
);
4694 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
4695 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_CUBE
;
4696 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4697 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4698 pAsm
->D
.dst
.reg
= tmp1
;
4699 nomask_PVSDST(&(pAsm
->D
.dst
));
4701 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4706 if( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
4711 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Z
, SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_Y
);
4712 swizzleagain_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Y
, SQ_SEL_X
, SQ_SEL_Z
, SQ_SEL_Z
);
4714 if( GL_FALSE
== next_ins(pAsm
) )
4719 /* tmp1.z = ABS(tmp1.z) dont have abs support in assembler currently
4720 * have to do explicit instruction
4722 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
4723 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4724 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4725 pAsm
->D
.dst
.reg
= tmp1
;
4726 pAsm
->D
.dst
.writez
= 1;
4728 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4729 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4730 pAsm
->S
[0].src
.reg
= tmp1
;
4731 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
4732 pAsm
->S
[1].bits
= pAsm
->S
[0].bits
;
4733 flipneg_PVSSRC(&(pAsm
->S
[1].src
));
4737 /* tmp1.z = RCP_e(|tmp1.z|) */
4738 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_RECIP_IEEE
;
4739 pAsm
->D
.dst
.math
= 1;
4740 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4741 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4742 pAsm
->D
.dst
.reg
= tmp1
;
4743 pAsm
->D
.dst
.writez
= 1;
4745 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4746 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4747 pAsm
->S
[0].src
.reg
= tmp1
;
4748 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_Z
;
4752 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
4753 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
4754 * muladd has no writemask, have to use another temp
4755 * also no support for imm constants, so add 1 here
4757 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
4758 pAsm
->D
.dst
.op3
= 1;
4759 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4760 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4761 pAsm
->D
.dst
.reg
= tmp2
;
4763 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4764 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4765 pAsm
->S
[0].src
.reg
= tmp1
;
4766 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
4767 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
4768 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
4769 pAsm
->S
[1].src
.reg
= tmp1
;
4770 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Z
);
4771 setaddrmode_PVSSRC(&(pAsm
->S
[2].src
), ADDR_ABSOLUTE
);
4772 pAsm
->S
[2].src
.rtype
= SRC_REG_TEMPORARY
;
4773 pAsm
->S
[2].src
.reg
= tmp1
;
4774 setswizzle_PVSSRC(&(pAsm
->S
[2].src
), SQ_SEL_1
);
4778 /* ADD the remaining .5 */
4779 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
4780 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4781 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4782 pAsm
->D
.dst
.reg
= tmp2
;
4783 pAsm
->D
.dst
.writex
= 1;
4784 pAsm
->D
.dst
.writey
= 1;
4785 pAsm
->D
.dst
.writez
= 0;
4786 pAsm
->D
.dst
.writew
= 0;
4788 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4789 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4790 pAsm
->S
[0].src
.reg
= tmp2
;
4791 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
4792 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
4793 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
4794 pAsm
->S
[1].src
.reg
= 252; // SQ_ALU_SRC_0_5
4795 noswizzle_PVSSRC(&(pAsm
->S
[1].src
));
4799 /* tmp1.xy = temp2.xy */
4800 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4801 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4802 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4803 pAsm
->D
.dst
.reg
= tmp1
;
4804 pAsm
->D
.dst
.writex
= 1;
4805 pAsm
->D
.dst
.writey
= 1;
4806 pAsm
->D
.dst
.writez
= 0;
4807 pAsm
->D
.dst
.writew
= 0;
4809 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4810 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4811 pAsm
->S
[0].src
.reg
= tmp2
;
4812 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
4815 pAsm
->aArgSubst
[1] = tmp1
;
4816 need_barrier
= GL_TRUE
;
4820 pAsm
->D
.dst
.opcode
= SQ_TEX_INST_SAMPLE
;
4821 pAsm
->is_tex
= GL_TRUE
;
4822 if ( GL_TRUE
== need_barrier
)
4824 pAsm
->need_tex_barrier
= GL_TRUE
;
4826 // Set src1 to tex unit id
4827 pAsm
->S
[1].src
.reg
= pAsm
->pILInst
[pAsm
->uiCurInst
].TexSrcUnit
;
4828 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
4830 //No sw info from mesa compiler, so hard code here.
4831 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_X
;
4832 pAsm
->S
[1].src
.swizzley
= SQ_SEL_Y
;
4833 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_Z
;
4834 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_W
;
4836 if( GL_FALSE
== tex_dst(pAsm
) )
4841 if( GL_FALSE
== tex_src(pAsm
) )
4846 if(pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_TXP
)
4848 /* hopefully did swizzles before */
4849 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
4852 if(pAsm
->pILInst
[pAsm
->uiCurInst
].TexSrcTarget
== TEXTURE_CUBE_INDEX
)
4854 /* SAMPLE dst, tmp.yxwy, CUBE */
4855 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_Y
;
4856 pAsm
->S
[0].src
.swizzley
= SQ_SEL_X
;
4857 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_W
;
4858 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_Y
;
4861 if ( GL_FALSE
== next_ins(pAsm
) )
4869 GLboolean
assemble_XPD(r700_AssemblerBase
*pAsm
)
4873 if( GL_FALSE
== checkop2(pAsm
) )
4878 tmp
= gethelpr(pAsm
);
4880 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
4882 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4883 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4884 pAsm
->D
.dst
.reg
= tmp
;
4885 nomask_PVSDST(&(pAsm
->D
.dst
));
4887 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4892 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4897 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_Y
, SQ_SEL_0
);
4898 swizzleagain_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Y
, SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_0
);
4900 if( GL_FALSE
== next_ins(pAsm
) )
4905 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
4906 pAsm
->D
.dst
.op3
= 1;
4908 if(0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
4910 tmp
= gethelpr(pAsm
);
4912 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4913 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4914 pAsm
->D
.dst
.reg
= tmp
;
4916 nomask_PVSDST(&(pAsm
->D
.dst
));
4920 if( GL_FALSE
== assemble_dst(pAsm
) )
4926 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4931 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4936 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Y
, SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_0
);
4937 swizzleagain_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_Y
, SQ_SEL_0
);
4939 // result1 + (neg) result0
4940 setaddrmode_PVSSRC(&(pAsm
->S
[2].src
),ADDR_ABSOLUTE
);
4941 pAsm
->S
[2].src
.rtype
= SRC_REG_TEMPORARY
;
4942 pAsm
->S
[2].src
.reg
= tmp
;
4944 neg_PVSSRC(&(pAsm
->S
[2].src
));
4945 noswizzle_PVSSRC(&(pAsm
->S
[2].src
));
4947 if( GL_FALSE
== next_ins(pAsm
) )
4953 if(0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
4955 if( GL_FALSE
== assemble_dst(pAsm
) )
4960 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4962 // Use tmp as source
4963 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4964 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4965 pAsm
->S
[0].src
.reg
= tmp
;
4967 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4968 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
4970 if( GL_FALSE
== next_ins(pAsm
) )
4979 GLboolean
assemble_EXPORT(r700_AssemblerBase
*pAsm
)
4984 inline void checkStackDepth(r700_AssemblerBase
*pAsm
, GLuint uReason
)
4999 GLboolean
jumpToOffest(r700_AssemblerBase
*pAsm
, GLuint pops
, GLint offset
)
5001 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5006 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= pops
;
5007 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5008 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5010 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5011 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5012 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_JUMP
;
5013 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5015 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5017 pAsm
->cf_current_cf_clause_ptr
->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
+ offset
;
5022 GLboolean
pops(r700_AssemblerBase
*pAsm
, GLuint pops
)
5024 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5029 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= pops
;
5030 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5031 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5033 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5034 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5035 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_POP
;
5037 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5039 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5040 pAsm
->cf_current_cf_clause_ptr
->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
+ 1;
5045 GLboolean
assemble_IF(r700_AssemblerBase
*pAsm
, GLboolean bHasElse
)
5047 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5052 if(GL_TRUE
!= bHasElse
)
5054 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
5058 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 0;
5060 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5061 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5063 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5064 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5065 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_JUMP
;
5066 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5068 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5071 pAsm
->fc_stack
[pAsm
->FCSP
].type
= FC_IF
;
5072 pAsm
->fc_stack
[pAsm
->FCSP
].bpush
= 0;
5073 pAsm
->fc_stack
[pAsm
->FCSP
].mid
= NULL
;
5074 pAsm
->fc_stack
[pAsm
->FCSP
].midLen
= 0;
5075 pAsm
->fc_stack
[pAsm
->FCSP
].first
= pAsm
->cf_current_cf_clause_ptr
;
5077 #ifndef USE_CF_FOR_POP_AFTER
5078 if(GL_TRUE
!= bHasElse
)
5080 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU_POP_AFTER
;
5082 #endif /* USE_CF_FOR_POP_AFTER */
5084 pAsm
->branch_depth
++;
5086 if(pAsm
->branch_depth
> pAsm
->max_branch_depth
)
5088 pAsm
->max_branch_depth
= pAsm
->branch_depth
;
5093 GLboolean
assemble_ELSE(r700_AssemblerBase
*pAsm
)
5095 #ifdef USE_CF_FOR_POP_AFTER
5097 #endif /* USE_CF_FOR_POP_AFTER */
5099 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5104 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1; ///
5105 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5106 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5108 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5109 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5110 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_ELSE
;
5111 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5113 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5115 pAsm
->fc_stack
[pAsm
->FCSP
].mid
= (R700ControlFlowGenericClause
**)_mesa_realloc( (void *)pAsm
->fc_stack
[pAsm
->FCSP
].mid
,
5117 sizeof(R700ControlFlowGenericClause
*) );
5118 pAsm
->fc_stack
[pAsm
->FCSP
].mid
[0] = pAsm
->cf_current_cf_clause_ptr
;
5119 //pAsm->fc_stack[pAsm->FCSP].unNumMid = 1;
5121 #ifndef USE_CF_FOR_POP_AFTER
5122 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU_POP_AFTER
;
5123 #endif /* USE_CF_FOR_POP_AFTER */
5125 pAsm
->fc_stack
[pAsm
->FCSP
].first
->m_Word0
.f
.addr
= pAsm
->pR700Shader
->plstCFInstructions_active
->uNumOfNode
- 1;
5130 GLboolean
assemble_ENDIF(r700_AssemblerBase
*pAsm
)
5132 #ifdef USE_CF_FOR_POP_AFTER
5134 #endif /* USE_CF_FOR_POP_AFTER */
5136 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
5138 if(NULL
== pAsm
->fc_stack
[pAsm
->FCSP
].mid
)
5140 /* no else in between */
5141 pAsm
->fc_stack
[pAsm
->FCSP
].first
->m_Word0
.f
.addr
= pAsm
->pR700Shader
->plstCFInstructions_active
->uNumOfNode
;
5145 pAsm
->fc_stack
[pAsm
->FCSP
].mid
[0]->m_Word0
.f
.addr
= pAsm
->pR700Shader
->plstCFInstructions_active
->uNumOfNode
;
5148 if(NULL
!= pAsm
->fc_stack
[pAsm
->FCSP
].mid
)
5150 FREE(pAsm
->fc_stack
[pAsm
->FCSP
].mid
);
5153 if(pAsm
->fc_stack
[pAsm
->FCSP
].type
!= FC_IF
)
5155 radeon_error("if/endif in shader code are not paired. \n");
5158 pAsm
->branch_depth
--;
5164 GLboolean
assemble_BGNLOOP(r700_AssemblerBase
*pAsm
)
5166 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5172 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 0;
5173 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5174 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5176 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5177 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5178 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_LOOP_START_NO_AL
;
5179 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5181 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5184 pAsm
->fc_stack
[pAsm
->FCSP
].type
= FC_LOOP
;
5185 pAsm
->fc_stack
[pAsm
->FCSP
].bpush
= 1;
5186 pAsm
->fc_stack
[pAsm
->FCSP
].mid
= NULL
;
5187 pAsm
->fc_stack
[pAsm
->FCSP
].unNumMid
= 0;
5188 pAsm
->fc_stack
[pAsm
->FCSP
].midLen
= 0;
5189 pAsm
->fc_stack
[pAsm
->FCSP
].first
= pAsm
->cf_current_cf_clause_ptr
;
5191 pAsm
->branch_depth
++;
5193 if(pAsm
->branch_depth
> pAsm
->max_branch_depth
)
5195 pAsm
->max_branch_depth
= pAsm
->branch_depth
;
5200 GLboolean
assemble_BRK(r700_AssemblerBase
*pAsm
)
5202 #ifdef USE_CF_FOR_CONTINUE_BREAK
5203 unsigned int unFCSP
;
5204 for(unFCSP
=pAsm
->FCSP
; unFCSP
>0; unFCSP
--)
5206 if(FC_LOOP
== pAsm
->fc_stack
[unFCSP
].type
)
5213 radeon_error("Break is not inside loop/endloop pair.\n");
5217 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5223 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
5224 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5225 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5227 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5228 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5229 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_LOOP_BREAK
;
5231 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5233 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5235 pAsm
->fc_stack
[unFCSP
].mid
= (R700ControlFlowGenericClause
**)_mesa_realloc(
5236 (void *)pAsm
->fc_stack
[unFCSP
].mid
,
5237 sizeof(R700ControlFlowGenericClause
*) * pAsm
->fc_stack
[unFCSP
].unNumMid
,
5238 sizeof(R700ControlFlowGenericClause
*) * (pAsm
->fc_stack
[unFCSP
].unNumMid
+ 1) );
5239 pAsm
->fc_stack
[unFCSP
].mid
[pAsm
->fc_stack
[unFCSP
].unNumMid
] = pAsm
->cf_current_cf_clause_ptr
;
5240 pAsm
->fc_stack
[unFCSP
].unNumMid
++;
5242 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5247 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
5248 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5249 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5251 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5252 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5253 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_POP
;
5255 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5257 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5258 pAsm
->cf_current_cf_clause_ptr
->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
+ 1;
5260 #endif //USE_CF_FOR_CONTINUE_BREAK
5264 GLboolean
assemble_CONT(r700_AssemblerBase
*pAsm
)
5266 #ifdef USE_CF_FOR_CONTINUE_BREAK
5267 unsigned int unFCSP
;
5268 for(unFCSP
=pAsm
->FCSP
; unFCSP
>0; unFCSP
--)
5270 if(FC_LOOP
== pAsm
->fc_stack
[unFCSP
].type
)
5277 radeon_error("Continue is not inside loop/endloop pair.\n");
5281 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5287 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
5288 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5289 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5291 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5292 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5293 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_LOOP_CONTINUE
;
5295 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5297 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5299 pAsm
->fc_stack
[unFCSP
].mid
= (R700ControlFlowGenericClause
**)_mesa_realloc(
5300 (void *)pAsm
->fc_stack
[unFCSP
].mid
,
5301 sizeof(R700ControlFlowGenericClause
*) * pAsm
->fc_stack
[unFCSP
].unNumMid
,
5302 sizeof(R700ControlFlowGenericClause
*) * (pAsm
->fc_stack
[unFCSP
].unNumMid
+ 1) );
5303 pAsm
->fc_stack
[unFCSP
].mid
[pAsm
->fc_stack
[unFCSP
].unNumMid
] = pAsm
->cf_current_cf_clause_ptr
;
5304 pAsm
->fc_stack
[unFCSP
].unNumMid
++;
5306 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5311 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
5312 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5313 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5315 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5316 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5317 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_POP
;
5319 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5321 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5322 pAsm
->cf_current_cf_clause_ptr
->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
+ 1;
5324 #endif /* USE_CF_FOR_CONTINUE_BREAK */
5329 GLboolean
assemble_ENDLOOP(r700_AssemblerBase
*pAsm
)
5333 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5339 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 0;
5340 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5341 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5343 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5344 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5345 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_LOOP_END
;
5346 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5348 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5350 pAsm
->cf_current_cf_clause_ptr
->m_Word0
.f
.addr
= pAsm
->fc_stack
[pAsm
->FCSP
].first
->m_uIndex
+ 1;
5351 pAsm
->fc_stack
[pAsm
->FCSP
].first
->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
+ 1;
5353 #ifdef USE_CF_FOR_CONTINUE_BREAK
5354 for(i
=0; i
<pAsm
->fc_stack
[pAsm
->FCSP
].unNumMid
; i
++)
5356 pAsm
->fc_stack
[pAsm
->FCSP
].mid
[i
]->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
;
5358 if(NULL
!= pAsm
->fc_stack
[pAsm
->FCSP
].mid
)
5360 FREE(pAsm
->fc_stack
[pAsm
->FCSP
].mid
);
5364 if(pAsm
->fc_stack
[pAsm
->FCSP
].type
!= FC_LOOP
)
5366 radeon_error("loop/endloop in shader code are not paired. \n");
5370 unsigned int unFCSP
= 0;
5371 if((pAsm
->unCFflags
& HAS_CURRENT_LOOPRET
) > 0)
5373 for(unFCSP
=(pAsm
->FCSP
-1); unFCSP
>pAsm
->CALLSTACK
[pAsm
->CALLSP
].FCSP_BeforeEntry
; unFCSP
--)
5375 if(FC_LOOP
== pAsm
->fc_stack
[unFCSP
].type
)
5380 if(unFCSP
<= pAsm
->CALLSTACK
[pAsm
->CALLSP
].FCSP_BeforeEntry
)
5385 pAsm
->unCFflags
&= ~HAS_CURRENT_LOOPRET
;
5389 pAsm
->branch_depth
--;
5394 breakLoopOnFlag(pAsm
, unFCSP
);
5400 void add_return_inst(r700_AssemblerBase
*pAsm
)
5402 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5406 //pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5407 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 0;
5408 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5409 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5411 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5412 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5413 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_RETURN
;
5414 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5416 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5419 GLboolean
assemble_BGNSUB(r700_AssemblerBase
*pAsm
, GLint nILindex
)
5422 if( (pAsm
->unSubArrayPointer
+ 1) > pAsm
->unSubArraySize
)
5424 pAsm
->subs
= (SUB_OFFSET
*)_mesa_realloc( (void *)pAsm
->subs
,
5425 sizeof(SUB_OFFSET
) * pAsm
->unSubArraySize
,
5426 sizeof(SUB_OFFSET
) * (pAsm
->unSubArraySize
+ 10) );
5427 if(NULL
== pAsm
->subs
)
5431 pAsm
->unSubArraySize
+= 10;
5434 pAsm
->subs
[pAsm
->unSubArrayPointer
].subIL_Offset
= nILindex
;
5435 pAsm
->subs
[pAsm
->unSubArrayPointer
].lstCFInstructions_local
.pHead
=NULL
;
5436 pAsm
->subs
[pAsm
->unSubArrayPointer
].lstCFInstructions_local
.pTail
=NULL
;
5437 pAsm
->subs
[pAsm
->unSubArrayPointer
].lstCFInstructions_local
.uNumOfNode
=0;
5440 pAsm
->CALLSTACK
[pAsm
->CALLSP
].FCSP_BeforeEntry
= pAsm
->FCSP
;
5441 pAsm
->CALLSTACK
[pAsm
->CALLSP
].plstCFInstructions_local
5442 = &(pAsm
->subs
[pAsm
->unSubArrayPointer
].lstCFInstructions_local
);
5443 pAsm
->CALLSTACK
[pAsm
->CALLSP
].stackUsage
.bits
= 0;
5444 SetActiveCFlist(pAsm
->pR700Shader
,
5445 pAsm
->CALLSTACK
[pAsm
->CALLSP
].plstCFInstructions_local
);
5447 pAsm
->unSubArrayPointer
++;
5450 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
5455 GLboolean
assemble_ENDSUB(r700_AssemblerBase
*pAsm
)
5458 SetActiveCFlist(pAsm
->pR700Shader
,
5459 pAsm
->CALLSTACK
[pAsm
->CALLSP
].plstCFInstructions_local
);
5461 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
5466 GLboolean
assemble_RET(r700_AssemblerBase
*pAsm
)
5468 if(pAsm
->CALLSP
> 0)
5470 unsigned int unFCSP
;
5471 for(unFCSP
=pAsm
->FCSP
; unFCSP
>pAsm
->CALLSTACK
[pAsm
->CALLSP
].FCSP_BeforeEntry
; unFCSP
--)
5473 if(FC_LOOP
== pAsm
->fc_stack
[unFCSP
].type
)
5475 setRetInLoopFlag(pAsm
, SQ_SEL_1
);
5476 breakLoopOnFlag(pAsm
, unFCSP
);
5477 pAsm
->unCFflags
|= LOOPRET_FLAGS
;
5484 add_return_inst(pAsm
);
5489 GLboolean
assemble_CAL(r700_AssemblerBase
*pAsm
,
5491 GLuint uiNumberInsts
,
5492 struct prog_instruction
*pILInst
)
5494 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
5496 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5501 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.call_count
= 1;
5502 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 0;
5503 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5504 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5506 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5507 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5508 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_CALL
;
5509 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5511 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5514 if( (pAsm
->unCallerArrayPointer
+ 1) > pAsm
->unCallerArraySize
)
5516 pAsm
->callers
= (CALLER_POINTER
*)_mesa_realloc( (void *)pAsm
->callers
,
5517 sizeof(CALLER_POINTER
) * pAsm
->unCallerArraySize
,
5518 sizeof(CALLER_POINTER
) * (pAsm
->unCallerArraySize
+ 10) );
5519 if(NULL
== pAsm
->callers
)
5523 pAsm
->unCallerArraySize
+= 10;
5526 pAsm
->callers
[pAsm
->unCallerArrayPointer
].subIL_Offset
= nILindex
;
5527 pAsm
->callers
[pAsm
->unCallerArrayPointer
].cf_ptr
= pAsm
->cf_current_cf_clause_ptr
;
5529 pAsm
->unCallerArrayPointer
++;
5532 for(j
=0; j
<pAsm
->unSubArrayPointer
; j
++)
5534 if(nILindex
== pAsm
->subs
[j
].subIL_Offset
)
5535 { /* compiled before */
5536 pAsm
->callers
[pAsm
->unCallerArrayPointer
- 1].subDescIndex
= j
;
5541 pAsm
->callers
[pAsm
->unCallerArrayPointer
- 1].subDescIndex
= pAsm
->unSubArrayPointer
;
5543 return AssembleInstr(nILindex
, uiNumberInsts
, pILInst
, pAsm
);
5546 GLboolean
setRetInLoopFlag(r700_AssemblerBase
*pAsm
, GLuint flagValue
)
5548 GLfloat fLiteral
[2] = {0.1, 0.0};
5550 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
5551 pAsm
->D
.dst
.op3
= 0;
5552 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
5553 pAsm
->D
.dst
.reg
= pAsm
->flag_reg_index
;
5554 pAsm
->D
.dst
.writex
= 1;
5555 pAsm
->D
.dst
.writey
= 0;
5556 pAsm
->D
.dst
.writez
= 0;
5557 pAsm
->D
.dst
.writew
= 0;
5558 pAsm
->D2
.dst2
.literal
= 1;
5559 pAsm
->D2
.dst2
.SaturateMode
= SATURATE_OFF
;
5560 pAsm
->D
.dst
.predicated
= 0;
5562 pAsm
->S
[0].src
.rtype
= SRC_REC_LITERAL
;
5563 //pAsm->S[0].src.reg = 0;
5564 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
5565 noneg_PVSSRC(&(pAsm
->S
[0].src
));
5566 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_X
;
5567 pAsm
->S
[0].src
.swizzley
= SQ_SEL_Y
;
5568 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_Z
;
5569 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_W
;
5571 if( GL_FALSE
== next_ins_literal(pAsm
, &(fLiteral
[0])) )
5576 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
5577 pAsm
->S
[0].src
.reg
= 0;
5578 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
5579 noneg_PVSSRC(&(pAsm
->S
[0].src
));
5580 pAsm
->S
[0].src
.swizzlex
= flagValue
;
5581 pAsm
->S
[0].src
.swizzley
= flagValue
;
5582 pAsm
->S
[0].src
.swizzlez
= flagValue
;
5583 pAsm
->S
[0].src
.swizzlew
= flagValue
;
5585 if( GL_FALSE
== next_ins2(pAsm
) )
5594 GLboolean
testFlag(r700_AssemblerBase
*pAsm
)
5596 GLfloat fLiteral
[2] = {0.1, 0.0};
5599 GLuint tmp
= gethelpr(pAsm
);
5600 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
5602 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_PRED_SETE
;
5603 pAsm
->D
.dst
.math
= 1;
5604 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
5605 pAsm
->D
.dst
.reg
= tmp
;
5606 pAsm
->D
.dst
.writex
= 1;
5607 pAsm
->D
.dst
.writey
= 0;
5608 pAsm
->D
.dst
.writez
= 0;
5609 pAsm
->D
.dst
.writew
= 0;
5610 pAsm
->D2
.dst2
.literal
= 1;
5611 pAsm
->D2
.dst2
.SaturateMode
= SATURATE_OFF
;
5612 pAsm
->D
.dst
.predicated
= 1;
5614 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
5615 pAsm
->S
[0].src
.reg
= pAsm
->flag_reg_index
;
5616 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
5617 noneg_PVSSRC(&(pAsm
->S
[0].src
));
5618 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_X
;
5619 pAsm
->S
[0].src
.swizzley
= SQ_SEL_Y
;
5620 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_Z
;
5621 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_W
;
5623 pAsm
->S
[1].src
.rtype
= SRC_REC_LITERAL
;
5624 //pAsm->S[1].src.reg = 0;
5625 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
5626 noneg_PVSSRC(&(pAsm
->S
[1].src
));
5627 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_X
;
5628 pAsm
->S
[1].src
.swizzley
= SQ_SEL_Y
;
5629 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_Z
;
5630 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_W
;
5632 if( GL_FALSE
== next_ins_literal(pAsm
, &(fLiteral
[0])) )
5637 pAsm
->S
[1].src
.rtype
= DST_REG_TEMPORARY
;
5638 pAsm
->S
[1].src
.reg
= 0;
5639 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
5640 noneg_PVSSRC(&(pAsm
->S
[1].src
));
5641 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_1
;
5642 pAsm
->S
[1].src
.swizzley
= SQ_SEL_1
;
5643 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_1
;
5644 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_1
;
5646 if( GL_FALSE
== next_ins2(pAsm
) )
5655 GLboolean
returnOnFlag(r700_AssemblerBase
*pAsm
)
5658 jumpToOffest(pAsm
, 1, 4);
5659 setRetInLoopFlag(pAsm
, SQ_SEL_0
);
5661 add_return_inst(pAsm
);
5666 GLboolean
breakLoopOnFlag(r700_AssemblerBase
*pAsm
, GLuint unFCSP
)
5671 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5676 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
5677 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5678 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5680 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5681 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5682 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_LOOP_BREAK
;
5683 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5685 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5687 pAsm
->fc_stack
[unFCSP
].mid
= (R700ControlFlowGenericClause
**)_mesa_realloc(
5688 (void *)pAsm
->fc_stack
[unFCSP
].mid
,
5689 sizeof(R700ControlFlowGenericClause
*) * pAsm
->fc_stack
[unFCSP
].unNumMid
,
5690 sizeof(R700ControlFlowGenericClause
*) * (pAsm
->fc_stack
[unFCSP
].unNumMid
+ 1) );
5691 pAsm
->fc_stack
[unFCSP
].mid
[pAsm
->fc_stack
[unFCSP
].unNumMid
] = pAsm
->cf_current_cf_clause_ptr
;
5692 pAsm
->fc_stack
[unFCSP
].unNumMid
++;
5699 GLboolean
AssembleInstr(GLuint uiFirstInst
,
5700 GLuint uiNumberInsts
,
5701 struct prog_instruction
*pILInst
,
5702 r700_AssemblerBase
*pR700AsmCode
)
5706 pR700AsmCode
->pILInst
= pILInst
;
5707 for(i
=uiFirstInst
; i
<uiNumberInsts
; i
++)
5709 pR700AsmCode
->uiCurInst
= i
;
5711 #ifndef USE_CF_FOR_CONTINUE_BREAK
5712 if(OPCODE_BRK
== pILInst
[i
+1].Opcode
)
5714 switch(pILInst
[i
].Opcode
)
5717 pILInst
[i
].Opcode
= OPCODE_SGT
;
5720 pILInst
[i
].Opcode
= OPCODE_SGE
;
5723 pILInst
[i
].Opcode
= OPCODE_SLT
;
5726 pILInst
[i
].Opcode
= OPCODE_SLE
;
5729 pILInst
[i
].Opcode
= OPCODE_SNE
;
5732 pILInst
[i
].Opcode
= OPCODE_SEQ
;
5740 switch (pILInst
[i
].Opcode
)
5743 if ( GL_FALSE
== assemble_ABS(pR700AsmCode
) )
5748 if ( GL_FALSE
== assemble_ADD(pR700AsmCode
) )
5753 if ( GL_FALSE
== assemble_ARL(pR700AsmCode
) )
5757 radeon_error("Not yet implemented instruction OPCODE_ARR \n");
5758 //if ( GL_FALSE == assemble_BAD("ARR") )
5763 if ( GL_FALSE
== assemble_CMP(pR700AsmCode
) )
5767 if ( GL_FALSE
== assemble_COS(pR700AsmCode
) )
5774 if ( GL_FALSE
== assemble_DOT(pR700AsmCode
) )
5779 if ( GL_FALSE
== assemble_DST(pR700AsmCode
) )
5784 if ( GL_FALSE
== assemble_EX2(pR700AsmCode
) )
5788 if ( GL_FALSE
== assemble_EXP(pR700AsmCode
) )
5793 if ( GL_FALSE
== assemble_FLR(pR700AsmCode
) )
5797 // if ( GL_FALSE == assemble_FLR_INT() )
5802 if ( GL_FALSE
== assemble_FRC(pR700AsmCode
) )
5807 if ( GL_FALSE
== assemble_KIL(pR700AsmCode
) )
5811 if ( GL_FALSE
== assemble_LG2(pR700AsmCode
) )
5815 if ( GL_FALSE
== assemble_LIT(pR700AsmCode
) )
5819 if ( GL_FALSE
== assemble_LRP(pR700AsmCode
) )
5823 if ( GL_FALSE
== assemble_LOG(pR700AsmCode
) )
5828 if ( GL_FALSE
== assemble_MAD(pR700AsmCode
) )
5832 if ( GL_FALSE
== assemble_MAX(pR700AsmCode
) )
5836 if ( GL_FALSE
== assemble_MIN(pR700AsmCode
) )
5841 if ( GL_FALSE
== assemble_MOV(pR700AsmCode
) )
5845 if ( GL_FALSE
== assemble_MUL(pR700AsmCode
) )
5850 if ( GL_FALSE
== assemble_POW(pR700AsmCode
) )
5854 if ( GL_FALSE
== assemble_RCP(pR700AsmCode
) )
5858 if ( GL_FALSE
== assemble_RSQ(pR700AsmCode
) )
5862 if ( GL_FALSE
== assemble_SIN(pR700AsmCode
) )
5866 if ( GL_FALSE
== assemble_SCS(pR700AsmCode
) )
5871 if(OPCODE_IF
== pILInst
[i
+1].Opcode
)
5873 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
5874 if ( GL_FALSE
== assemble_LOGIC_PRED(pR700AsmCode
, SQ_OP2_INST_PRED_SETE
) )
5879 else if(OPCODE_BRK
== pILInst
[i
+1].Opcode
)
5881 #ifdef USE_CF_FOR_CONTINUE_BREAK
5882 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
5884 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_BREAK
;
5886 if ( GL_FALSE
== assemble_LOGIC_PRED(pR700AsmCode
, SQ_OP2_INST_PRED_SETE
) )
5891 else if(OPCODE_CONT
== pILInst
[i
+1].Opcode
)
5893 #ifdef USE_CF_FOR_CONTINUE_BREAK
5894 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
5896 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_CONTINUE
;
5898 if ( GL_FALSE
== assemble_LOGIC_PRED(pR700AsmCode
, SQ_OP2_INST_PRED_SETE
) )
5905 if ( GL_FALSE
== assemble_LOGIC(pR700AsmCode
, SQ_OP2_INST_SETE
) )
5913 if(OPCODE_IF
== pILInst
[i
+1].Opcode
)
5915 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
5916 if ( GL_FALSE
== assemble_LOGIC_PRED(pR700AsmCode
, SQ_OP2_INST_PRED_SETGT
) )
5921 else if(OPCODE_BRK
== pILInst
[i
+1].Opcode
)
5923 #ifdef USE_CF_FOR_CONTINUE_BREAK
5924 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
5926 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_BREAK
;
5928 if ( GL_FALSE
== assemble_LOGIC_PRED(pR700AsmCode
, SQ_OP2_INST_PRED_SETGT
) )
5933 else if(OPCODE_CONT
== pILInst
[i
+1].Opcode
)
5935 #ifdef USE_CF_FOR_CONTINUE_BREAK
5936 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
5938 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_CONTINUE
;
5941 if ( GL_FALSE
== assemble_LOGIC_PRED(pR700AsmCode
, SQ_OP2_INST_PRED_SETGT
) )
5948 if ( GL_FALSE
== assemble_LOGIC(pR700AsmCode
, SQ_OP2_INST_SETGT
) )
5956 if(OPCODE_IF
== pILInst
[i
+1].Opcode
)
5958 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
5959 if ( GL_FALSE
== assemble_LOGIC_PRED(pR700AsmCode
, SQ_OP2_INST_PRED_SETGE
) )
5964 else if(OPCODE_BRK
== pILInst
[i
+1].Opcode
)
5966 #ifdef USE_CF_FOR_CONTINUE_BREAK
5967 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
5969 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_BREAK
;
5971 if ( GL_FALSE
== assemble_LOGIC_PRED(pR700AsmCode
, SQ_OP2_INST_PRED_SETGE
) )
5976 else if(OPCODE_CONT
== pILInst
[i
+1].Opcode
)
5978 #ifdef USE_CF_FOR_CONTINUE_BREAK
5979 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
5981 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_CONTINUE
;
5984 if ( GL_FALSE
== assemble_LOGIC_PRED(pR700AsmCode
, SQ_OP2_INST_PRED_SETGE
) )
5991 if ( GL_FALSE
== assemble_SGE(pR700AsmCode
) )
5998 /* NO LT, LE, TODO : use GE => LE, GT => LT : reverse 2 src order would be simpliest. Or use SQ_CF_COND_FALSE for SQ_CF_COND_ACTIVE.*/
6001 struct prog_src_register SrcRegSave
[2];
6002 SrcRegSave
[0] = pILInst
[i
].SrcReg
[0];
6003 SrcRegSave
[1] = pILInst
[i
].SrcReg
[1];
6004 pILInst
[i
].SrcReg
[0] = SrcRegSave
[1];
6005 pILInst
[i
].SrcReg
[1] = SrcRegSave
[0];
6006 if(OPCODE_IF
== pILInst
[i
+1].Opcode
)
6008 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
6009 if ( GL_FALSE
== assemble_LOGIC_PRED(pR700AsmCode
, SQ_OP2_INST_PRED_SETGT
) )
6011 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
6012 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
6016 else if(OPCODE_BRK
== pILInst
[i
+1].Opcode
)
6018 #ifdef USE_CF_FOR_CONTINUE_BREAK
6019 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
6021 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_BREAK
;
6023 if ( GL_FALSE
== assemble_LOGIC_PRED(pR700AsmCode
, SQ_OP2_INST_PRED_SETGT
) )
6025 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
6026 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
6030 else if(OPCODE_CONT
== pILInst
[i
+1].Opcode
)
6032 #ifdef USE_CF_FOR_CONTINUE_BREAK
6033 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
6035 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_CONTINUE
;
6038 if ( GL_FALSE
== assemble_LOGIC_PRED(pR700AsmCode
, SQ_OP2_INST_PRED_SETGT
) )
6040 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
6041 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
6047 if ( GL_FALSE
== assemble_LOGIC(pR700AsmCode
, SQ_OP2_INST_SETGT
) )
6049 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
6050 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
6054 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
6055 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
6061 struct prog_src_register SrcRegSave
[2];
6062 SrcRegSave
[0] = pILInst
[i
].SrcReg
[0];
6063 SrcRegSave
[1] = pILInst
[i
].SrcReg
[1];
6064 pILInst
[i
].SrcReg
[0] = SrcRegSave
[1];
6065 pILInst
[i
].SrcReg
[1] = SrcRegSave
[0];
6066 if(OPCODE_IF
== pILInst
[i
+1].Opcode
)
6068 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
6069 if ( GL_FALSE
== assemble_LOGIC_PRED(pR700AsmCode
, SQ_OP2_INST_PRED_SETGE
) )
6071 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
6072 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
6076 else if(OPCODE_BRK
== pILInst
[i
+1].Opcode
)
6078 #ifdef USE_CF_FOR_CONTINUE_BREAK
6079 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
6081 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_BREAK
;
6083 if ( GL_FALSE
== assemble_LOGIC_PRED(pR700AsmCode
, SQ_OP2_INST_PRED_SETGE
) )
6085 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
6086 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
6090 else if(OPCODE_CONT
== pILInst
[i
+1].Opcode
)
6092 #ifdef USE_CF_FOR_CONTINUE_BREAK
6093 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
6095 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_CONTINUE
;
6098 if ( GL_FALSE
== assemble_LOGIC_PRED(pR700AsmCode
, SQ_OP2_INST_PRED_SETGE
) )
6100 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
6101 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
6107 if ( GL_FALSE
== assemble_LOGIC(pR700AsmCode
, SQ_OP2_INST_SETGE
) )
6109 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
6110 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
6114 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
6115 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
6120 if(OPCODE_IF
== pILInst
[i
+1].Opcode
)
6122 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
6123 if ( GL_FALSE
== assemble_LOGIC_PRED(pR700AsmCode
, SQ_OP2_INST_PRED_SETNE
) )
6128 else if(OPCODE_BRK
== pILInst
[i
+1].Opcode
)
6130 #ifdef USE_CF_FOR_CONTINUE_BREAK
6131 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
6133 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_BREAK
;
6135 if ( GL_FALSE
== assemble_LOGIC_PRED(pR700AsmCode
, SQ_OP2_INST_PRED_SETNE
) )
6140 else if(OPCODE_CONT
== pILInst
[i
+1].Opcode
)
6142 #ifdef USE_CF_FOR_CONTINUE_BREAK
6143 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
6145 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_CONTINUE
;
6147 if ( GL_FALSE
== assemble_LOGIC_PRED(pR700AsmCode
, SQ_OP2_INST_PRED_SETNE
) )
6154 if ( GL_FALSE
== assemble_LOGIC(pR700AsmCode
, SQ_OP2_INST_SETNE
) )
6162 // if ( GL_FALSE == assemble_STP(pR700AsmCode) )
6167 if ( GL_FALSE
== assemble_MOV(pR700AsmCode
) )
6173 if( (i
+1)<uiNumberInsts
)
6175 if(OPCODE_END
!= pILInst
[i
+1].Opcode
)
6177 if( GL_TRUE
== IsTex(pILInst
[i
+1].Opcode
) )
6179 pR700AsmCode
->pInstDeps
[i
+1].nDstDep
= i
+1; //=1?
6189 if ( GL_FALSE
== assemble_TEX(pR700AsmCode
) )
6194 if ( GL_FALSE
== assemble_XPD(pR700AsmCode
) )
6200 GLboolean bHasElse
= GL_FALSE
;
6202 if(pILInst
[pILInst
[i
].BranchTarget
- 1].Opcode
== OPCODE_ELSE
)
6207 if ( GL_FALSE
== assemble_IF(pR700AsmCode
, bHasElse
) )
6215 if ( GL_FALSE
== assemble_ELSE(pR700AsmCode
) )
6220 if ( GL_FALSE
== assemble_ENDIF(pR700AsmCode
) )
6224 case OPCODE_BGNLOOP
:
6225 if( GL_FALSE
== assemble_BGNLOOP(pR700AsmCode
) )
6232 if( GL_FALSE
== assemble_BRK(pR700AsmCode
) )
6239 if( GL_FALSE
== assemble_CONT(pR700AsmCode
) )
6245 case OPCODE_ENDLOOP
:
6246 if( GL_FALSE
== assemble_ENDLOOP(pR700AsmCode
) )
6253 if( GL_FALSE
== assemble_BGNSUB(pR700AsmCode
, i
) )
6260 if( GL_FALSE
== assemble_RET(pR700AsmCode
) )
6267 if( GL_FALSE
== assemble_CAL(pR700AsmCode
,
6268 pILInst
[i
].BranchTarget
,
6276 //case OPCODE_EXPORT:
6277 // if ( GL_FALSE == assemble_EXPORT() )
6282 return assemble_ENDSUB(pR700AsmCode
);
6285 //pR700AsmCode->uiCurInst = i;
6286 //This is to remaind that if in later exoort there is depth/stencil
6287 //export, we need a mov to re-arrange DST channel, where using a
6288 //psuedo inst, we will use this end inst to do it.
6292 radeon_error("internal: unknown instruction\n");
6300 GLboolean
InitShaderProgram(r700_AssemblerBase
* pAsm
)
6302 setRetInLoopFlag(pAsm
, SQ_SEL_0
);
6306 GLboolean
RelocProgram(r700_AssemblerBase
* pAsm
)
6310 TypedShaderList
* plstCFmain
;
6311 TypedShaderList
* plstCFsub
;
6313 R700ShaderInstruction
* pInst
;
6314 R700ControlFlowGenericClause
* pCFInst
;
6316 if(0 == pAsm
->unSubArrayPointer
)
6321 plstCFmain
= pAsm
->CALLSTACK
[0].plstCFInstructions_local
;
6322 unCFoffset
= plstCFmain
->uNumOfNode
;
6325 for(i
=0; i
<pAsm
->unSubArrayPointer
; i
++)
6327 pAsm
->subs
[i
].unCFoffset
= unCFoffset
;
6328 plstCFsub
= &(pAsm
->subs
[i
].lstCFInstructions_local
);
6330 pInst
= plstCFsub
->pHead
;
6332 /* reloc instructions */
6335 if(SIT_CF_GENERIC
== pInst
->m_ShaderInstType
)
6337 pCFInst
= (R700ControlFlowGenericClause
*)pInst
;
6339 switch (pCFInst
->m_Word1
.f
.cf_inst
)
6341 case SQ_CF_INST_POP
:
6342 case SQ_CF_INST_JUMP
:
6343 case SQ_CF_INST_ELSE
:
6344 case SQ_CF_INST_LOOP_END
:
6345 case SQ_CF_INST_LOOP_START
:
6346 case SQ_CF_INST_LOOP_START_NO_AL
:
6347 case SQ_CF_INST_LOOP_CONTINUE
:
6348 case SQ_CF_INST_LOOP_BREAK
:
6349 pCFInst
->m_Word0
.f
.addr
+= unCFoffset
;
6356 pInst
->m_uIndex
+= unCFoffset
;
6358 pInst
= pInst
->pNextInst
;
6361 /* Put sub into main */
6362 plstCFmain
->pTail
->pNextInst
= plstCFsub
->pHead
;
6363 plstCFmain
->pTail
= plstCFsub
->pTail
;
6364 plstCFmain
->uNumOfNode
+= plstCFsub
->uNumOfNode
;
6366 unCFoffset
+= plstCFsub
->uNumOfNode
;
6370 for(i
=0; i
<pAsm
->unCallerArrayPointer
; i
++)
6372 pAsm
->callers
[i
].cf_ptr
->m_Word0
.f
.addr
6373 = pAsm
->subs
[pAsm
->callers
[i
].subDescIndex
].unCFoffset
;
6376 /* remove flags init if they are not used */
6377 if((pAsm
->unCFflags
& HAS_LOOPRET
) == 0)
6379 R700ControlFlowALUClause
* pCF_ALU
;
6380 pInst
= plstCFmain
->pHead
;
6383 if(SIT_CF_ALU
== pInst
->m_ShaderInstType
)
6385 pCF_ALU
= (R700ControlFlowALUClause
*)pInst
;
6386 if(1 == pCF_ALU
->m_Word1
.f
.count
)
6388 pCF_ALU
->m_Word1
.f
.cf_inst
= SQ_CF_INST_NOP
;
6392 R700ALUInstruction
* pALU
= pCF_ALU
->m_pLinkedALUInstruction
;
6394 pALU
->m_pLinkedALUClause
= NULL
;
6395 pALU
= (R700ALUInstruction
*)(pALU
->pNextInst
);
6396 pALU
->m_pLinkedALUClause
= pCF_ALU
;
6397 pCF_ALU
->m_pLinkedALUInstruction
= pALU
;
6399 pCF_ALU
->m_Word1
.f
.count
--;
6403 pInst
= pInst
->pNextInst
;
6410 GLboolean
Process_Export(r700_AssemblerBase
* pAsm
,
6412 GLuint export_starting_index
,
6413 GLuint export_count
,
6414 GLuint starting_register_number
,
6415 GLboolean is_depth_export
)
6417 unsigned char ucWriteMask
;
6419 check_current_clause(pAsm
, CF_EMPTY_CLAUSE
);
6420 check_current_clause(pAsm
, CF_EXPORT_CLAUSE
); //alloc the cf_current_export_clause_ptr
6422 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.type
= type
;
6426 case SQ_EXPORT_PIXEL
:
6427 if(GL_TRUE
== is_depth_export
)
6429 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= SQ_CF_PIXEL_Z
;
6433 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= SQ_CF_PIXEL_MRT0
+ export_starting_index
;
6438 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= SQ_CF_POS_0
+ export_starting_index
;
6441 case SQ_EXPORT_PARAM
:
6442 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= 0x0 + export_starting_index
;
6446 radeon_error("Unknown export type: %d\n", type
);
6451 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.rw_gpr
= starting_register_number
;
6453 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.rw_rel
= SQ_ABSOLUTE
;
6454 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.index_gpr
= 0x0;
6455 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.elem_size
= 0x3;
6457 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.burst_count
= (export_count
- 1);
6458 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
6459 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
6460 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT
; // _DONE
6461 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
6462 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
6464 if (export_count
== 1)
6466 ucWriteMask
= pAsm
->pucOutMask
[starting_register_number
- pAsm
->starting_export_register_number
];
6467 /* exports Z as a float into Red channel */
6468 if (GL_TRUE
== is_depth_export
)
6471 if( (ucWriteMask
& 0x1) != 0)
6473 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_X
;
6477 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_MASK
;
6479 if( ((ucWriteMask
>>1) & 0x1) != 0)
6481 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_Y
;
6485 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_MASK
;
6487 if( ((ucWriteMask
>>2) & 0x1) != 0)
6489 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_Z
;
6493 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_MASK
;
6495 if( ((ucWriteMask
>>3) & 0x1) != 0)
6497 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_W
;
6501 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_MASK
;
6506 // This should only be used if all components for all registers have been written
6507 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_X
;
6508 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_Y
;
6509 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_Z
;
6510 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_W
;
6513 pAsm
->cf_last_export_ptr
= pAsm
->cf_current_export_clause_ptr
;
6518 GLboolean
Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase
*pAsm
, BITS depth_channel_select
)
6520 gl_inst_opcode Opcode_save
= pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
; //Should be OPCODE_END
6521 pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
= OPCODE_MOV
;
6523 // MOV depth_export_register.hw_depth_channel, depth_export_register.depth_channel_select
6525 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
6527 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
6528 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
6529 pAsm
->D
.dst
.reg
= pAsm
->depth_export_register_number
;
6531 pAsm
->D
.dst
.writex
= 1; // depth goes in R channel for HW
6533 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
6534 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
6535 pAsm
->S
[0].src
.reg
= pAsm
->depth_export_register_number
;
6537 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), depth_channel_select
);
6539 noneg_PVSSRC(&(pAsm
->S
[0].src
));
6541 if( GL_FALSE
== next_ins(pAsm
) )
6546 pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
= Opcode_save
;
6551 GLboolean
Process_Fragment_Exports(r700_AssemblerBase
*pR700AsmCode
,
6552 GLbitfield OutputsWritten
)
6555 GLuint export_count
= 0;
6557 if(pR700AsmCode
->depth_export_register_number
>= 0)
6559 if( GL_FALSE
== Move_Depth_Exports_To_Correct_Channels(pR700AsmCode
, SQ_SEL_Z
) ) // depth
6565 unBit
= 1 << FRAG_RESULT_COLOR
;
6566 if(OutputsWritten
& unBit
)
6568 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6572 pR700AsmCode
->uiFP_OutputMap
[FRAG_RESULT_COLOR
],
6579 unBit
= 1 << FRAG_RESULT_DEPTH
;
6580 if(OutputsWritten
& unBit
)
6582 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6586 pR700AsmCode
->uiFP_OutputMap
[FRAG_RESULT_DEPTH
],
6593 /* Need to export something, otherwise we'll hang
6594 * results are undefined anyway */
6595 if(export_count
== 0)
6597 Process_Export(pR700AsmCode
, SQ_EXPORT_PIXEL
, 0, 1, 0, GL_FALSE
);
6600 if(pR700AsmCode
->cf_last_export_ptr
!= NULL
)
6602 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
6603 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.end_of_program
= 0x1;
6609 GLboolean
Process_Vertex_Exports(r700_AssemblerBase
*pR700AsmCode
,
6610 GLbitfield OutputsWritten
)
6615 GLuint export_starting_index
= 0;
6616 GLuint export_count
= pR700AsmCode
->number_of_exports
;
6618 unBit
= 1 << VERT_RESULT_HPOS
;
6619 if(OutputsWritten
& unBit
)
6621 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6623 export_starting_index
,
6625 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_HPOS
],
6633 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
6636 pR700AsmCode
->number_of_exports
= export_count
;
6638 unBit
= 1 << VERT_RESULT_COL0
;
6639 if(OutputsWritten
& unBit
)
6641 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6643 export_starting_index
,
6645 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_COL0
],
6651 export_starting_index
++;
6654 unBit
= 1 << VERT_RESULT_COL1
;
6655 if(OutputsWritten
& unBit
)
6657 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6659 export_starting_index
,
6661 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_COL1
],
6667 export_starting_index
++;
6670 unBit
= 1 << VERT_RESULT_FOGC
;
6671 if(OutputsWritten
& unBit
)
6673 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6675 export_starting_index
,
6677 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_FOGC
],
6683 export_starting_index
++;
6688 unBit
= 1 << (VERT_RESULT_TEX0
+ i
);
6689 if(OutputsWritten
& unBit
)
6691 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6693 export_starting_index
,
6695 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_TEX0
+ i
],
6701 export_starting_index
++;
6705 for(i
=VERT_RESULT_VAR0
; i
<VERT_RESULT_MAX
; i
++)
6708 if(OutputsWritten
& unBit
)
6710 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6712 export_starting_index
,
6714 pR700AsmCode
->ucVP_OutputMap
[i
],
6720 export_starting_index
++;
6724 // At least one param should be exported
6727 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
6731 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6735 pR700AsmCode
->starting_export_register_number
,
6741 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_0
;
6742 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_0
;
6743 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_0
;
6744 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_1
;
6745 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
6748 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.end_of_program
= 0x1;
6753 GLboolean
Clean_Up_Assembler(r700_AssemblerBase
*pR700AsmCode
)
6755 FREE(pR700AsmCode
->pucOutMask
);
6756 FREE(pR700AsmCode
->pInstDeps
);
6758 if(NULL
!= pR700AsmCode
->subs
)
6760 FREE(pR700AsmCode
->subs
);
6762 if(NULL
!= pR700AsmCode
->callers
)
6764 FREE(pR700AsmCode
->callers
);