2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
33 #include "main/mtypes.h"
34 #include "main/imports.h"
36 #include "radeon_debug.h"
37 #include "r600_context.h"
39 #include "r700_assembler.h"
41 BITS
addrmode_PVSDST(PVSDST
* pPVSDST
)
43 return pPVSDST
->addrmode0
| ((BITS
)pPVSDST
->addrmode1
<< 1);
46 void setaddrmode_PVSDST(PVSDST
* pPVSDST
, BITS addrmode
)
48 pPVSDST
->addrmode0
= addrmode
& 1;
49 pPVSDST
->addrmode1
= (addrmode
>> 1) & 1;
52 void nomask_PVSDST(PVSDST
* pPVSDST
)
54 pPVSDST
->writex
= pPVSDST
->writey
= pPVSDST
->writez
= pPVSDST
->writew
= 1;
57 BITS
addrmode_PVSSRC(PVSSRC
* pPVSSRC
)
59 return pPVSSRC
->addrmode0
| ((BITS
)pPVSSRC
->addrmode1
<< 1);
62 void setaddrmode_PVSSRC(PVSSRC
* pPVSSRC
, BITS addrmode
)
64 pPVSSRC
->addrmode0
= addrmode
& 1;
65 pPVSSRC
->addrmode1
= (addrmode
>> 1) & 1;
69 void setswizzle_PVSSRC(PVSSRC
* pPVSSRC
, BITS swz
)
74 pPVSSRC
->swizzlew
= swz
;
77 void noswizzle_PVSSRC(PVSSRC
* pPVSSRC
)
79 pPVSSRC
->swizzlex
= SQ_SEL_X
;
80 pPVSSRC
->swizzley
= SQ_SEL_Y
;
81 pPVSSRC
->swizzlez
= SQ_SEL_Z
;
82 pPVSSRC
->swizzlew
= SQ_SEL_W
;
86 swizzleagain_PVSSRC(PVSSRC
* pPVSSRC
, BITS x
, BITS y
, BITS z
, BITS w
)
90 case SQ_SEL_X
: x
= pPVSSRC
->swizzlex
;
92 case SQ_SEL_Y
: x
= pPVSSRC
->swizzley
;
94 case SQ_SEL_Z
: x
= pPVSSRC
->swizzlez
;
96 case SQ_SEL_W
: x
= pPVSSRC
->swizzlew
;
103 case SQ_SEL_X
: y
= pPVSSRC
->swizzlex
;
105 case SQ_SEL_Y
: y
= pPVSSRC
->swizzley
;
107 case SQ_SEL_Z
: y
= pPVSSRC
->swizzlez
;
109 case SQ_SEL_W
: y
= pPVSSRC
->swizzlew
;
116 case SQ_SEL_X
: z
= pPVSSRC
->swizzlex
;
118 case SQ_SEL_Y
: z
= pPVSSRC
->swizzley
;
120 case SQ_SEL_Z
: z
= pPVSSRC
->swizzlez
;
122 case SQ_SEL_W
: z
= pPVSSRC
->swizzlew
;
129 case SQ_SEL_X
: w
= pPVSSRC
->swizzlex
;
131 case SQ_SEL_Y
: w
= pPVSSRC
->swizzley
;
133 case SQ_SEL_Z
: w
= pPVSSRC
->swizzlez
;
135 case SQ_SEL_W
: w
= pPVSSRC
->swizzlew
;
140 pPVSSRC
->swizzlex
= x
;
141 pPVSSRC
->swizzley
= y
;
142 pPVSSRC
->swizzlez
= z
;
143 pPVSSRC
->swizzlew
= w
;
146 void neg_PVSSRC(PVSSRC
* pPVSSRC
)
154 void noneg_PVSSRC(PVSSRC
* pPVSSRC
)
162 // negate argument (for SUB instead of ADD and alike)
163 void flipneg_PVSSRC(PVSSRC
* pPVSSRC
)
165 pPVSSRC
->negx
= !pPVSSRC
->negx
;
166 pPVSSRC
->negy
= !pPVSSRC
->negy
;
167 pPVSSRC
->negz
= !pPVSSRC
->negz
;
168 pPVSSRC
->negw
= !pPVSSRC
->negw
;
171 void zerocomp_PVSSRC(PVSSRC
* pPVSSRC
, int c
)
175 case 0: pPVSSRC
->swizzlex
= SQ_SEL_0
; pPVSSRC
->negx
= 0; break;
176 case 1: pPVSSRC
->swizzley
= SQ_SEL_0
; pPVSSRC
->negy
= 0; break;
177 case 2: pPVSSRC
->swizzlez
= SQ_SEL_0
; pPVSSRC
->negz
= 0; break;
178 case 3: pPVSSRC
->swizzlew
= SQ_SEL_0
; pPVSSRC
->negw
= 0; break;
183 void onecomp_PVSSRC(PVSSRC
* pPVSSRC
, int c
)
187 case 0: pPVSSRC
->swizzlex
= SQ_SEL_1
; pPVSSRC
->negx
= 0; break;
188 case 1: pPVSSRC
->swizzley
= SQ_SEL_1
; pPVSSRC
->negy
= 0; break;
189 case 2: pPVSSRC
->swizzlez
= SQ_SEL_1
; pPVSSRC
->negz
= 0; break;
190 case 3: pPVSSRC
->swizzlew
= SQ_SEL_1
; pPVSSRC
->negw
= 0; break;
195 BITS
is_misc_component_exported(VAP_OUT_VTX_FMT_0
* pOutVTXFmt0
)
197 return (pOutVTXFmt0
->point_size
|
198 pOutVTXFmt0
->edge_flag
|
199 pOutVTXFmt0
->rta_index
|
200 pOutVTXFmt0
->kill_flag
|
201 pOutVTXFmt0
->viewport_index
);
204 BITS
is_depth_component_exported(OUT_FRAGMENT_FMT_0
* pFPOutFmt
)
206 return (pFPOutFmt
->depth
|
207 pFPOutFmt
->stencil_ref
|
209 pFPOutFmt
->coverage_to_mask
);
212 GLboolean
is_reduction_opcode(PVSDWORD
* dest
)
214 if (dest
->dst
.op3
== 0)
216 if ( (dest
->dst
.opcode
== SQ_OP2_INST_DOT4
|| dest
->dst
.opcode
== SQ_OP2_INST_DOT4_IEEE
) )
224 GLuint
GetSurfaceFormat(GLenum eType
, GLuint nChannels
, GLuint
* pClient_size
)
226 GLuint format
= FMT_INVALID
;
227 GLuint uiElemSize
= 0;
232 case GL_UNSIGNED_BYTE
:
237 format
= FMT_8
; break;
239 format
= FMT_8_8
; break;
241 format
= FMT_8_8_8
; break;
243 format
= FMT_8_8_8_8
; break;
249 case GL_UNSIGNED_SHORT
:
255 format
= FMT_16
; break;
257 format
= FMT_16_16
; break;
259 format
= FMT_16_16_16
; break;
261 format
= FMT_16_16_16_16
; break;
267 case GL_UNSIGNED_INT
:
273 format
= FMT_32
; break;
275 format
= FMT_32_32
; break;
277 format
= FMT_32_32_32
; break;
279 format
= FMT_32_32_32_32
; break;
290 format
= FMT_32_FLOAT
; break;
292 format
= FMT_32_32_FLOAT
; break;
294 format
= FMT_32_32_32_FLOAT
; break;
296 format
= FMT_32_32_32_32_FLOAT
; break;
306 format
= FMT_32_FLOAT
; break;
308 format
= FMT_32_32_FLOAT
; break;
310 format
= FMT_32_32_32_FLOAT
; break;
312 format
= FMT_32_32_32_32_FLOAT
; break;
319 //GL_ASSERT_NO_CASE();
322 if(NULL
!= pClient_size
)
324 *pClient_size
= uiElemSize
* nChannels
;
330 unsigned int r700GetNumOperands(r700_AssemblerBase
* pAsm
)
337 switch (pAsm
->D
.dst
.opcode
)
339 case SQ_OP2_INST_ADD
:
340 case SQ_OP2_INST_MUL
:
341 case SQ_OP2_INST_MAX
:
342 case SQ_OP2_INST_MIN
:
343 //case SQ_OP2_INST_MAX_DX10:
344 //case SQ_OP2_INST_MIN_DX10:
345 case SQ_OP2_INST_SETGT
:
346 case SQ_OP2_INST_SETGE
:
347 case SQ_OP2_INST_PRED_SETE
:
348 case SQ_OP2_INST_PRED_SETGT
:
349 case SQ_OP2_INST_PRED_SETGE
:
350 case SQ_OP2_INST_PRED_SETNE
:
351 case SQ_OP2_INST_DOT4
:
352 case SQ_OP2_INST_DOT4_IEEE
:
355 case SQ_OP2_INST_MOV
:
356 case SQ_OP2_INST_FRACT
:
357 case SQ_OP2_INST_FLOOR
:
358 case SQ_OP2_INST_KILLGT
:
359 case SQ_OP2_INST_EXP_IEEE
:
360 case SQ_OP2_INST_LOG_CLAMPED
:
361 case SQ_OP2_INST_LOG_IEEE
:
362 case SQ_OP2_INST_RECIP_IEEE
:
363 case SQ_OP2_INST_RECIPSQRT_IEEE
:
364 case SQ_OP2_INST_FLT_TO_INT
:
365 case SQ_OP2_INST_SIN
:
366 case SQ_OP2_INST_COS
:
369 default: radeon_error(
370 "Need instruction operand number for %x.\n", pAsm
->D
.dst
.opcode
);
376 int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt
, r700_AssemblerBase
* pAsm
, R700_Shader
* pShader
)
380 Init_R700_Shader(pShader
);
381 pAsm
->pR700Shader
= pShader
;
382 pAsm
->currentShaderType
= spt
;
384 pAsm
->cf_last_export_ptr
= NULL
;
386 pAsm
->cf_current_export_clause_ptr
= NULL
;
387 pAsm
->cf_current_alu_clause_ptr
= NULL
;
388 pAsm
->cf_current_tex_clause_ptr
= NULL
;
389 pAsm
->cf_current_vtx_clause_ptr
= NULL
;
390 pAsm
->cf_current_cf_clause_ptr
= NULL
;
392 // No clause has been created yet
393 pAsm
->cf_current_clause_type
= CF_EMPTY_CLAUSE
;
395 pAsm
->number_of_colorandz_exports
= 0;
396 pAsm
->number_of_exports
= 0;
397 pAsm
->number_of_export_opcodes
= 0;
405 pAsm
->uLastPosUpdate
= 0;
407 *(BITS
*) &pAsm
->fp_stOutFmt0
= 0;
411 pAsm
->number_used_registers
= 0;
412 pAsm
->uUsedConsts
= 256;
416 pAsm
->uBoolConsts
= 0;
417 pAsm
->uIntConsts
= 0;
422 pAsm
->fc_stack
[0].type
= FC_NONE
;
424 pAsm
->branch_depth
= 0;
425 pAsm
->max_branch_depth
= 0;
430 pAsm
->aArgSubst
[3] = (-1);
434 for (i
=0; i
<NUMBER_OF_OUTPUT_COLORS
; i
++)
436 pAsm
->color_export_register_number
[i
] = (-1);
440 pAsm
->depth_export_register_number
= (-1);
441 pAsm
->stencil_export_register_number
= (-1);
442 pAsm
->coverage_to_mask_export_register_number
= (-1);
443 pAsm
->mask_export_register_number
= (-1);
445 pAsm
->starting_export_register_number
= 0;
446 pAsm
->starting_vfetch_register_number
= 0;
447 pAsm
->starting_temp_register_number
= 0;
448 pAsm
->uFirstHelpReg
= 0;
451 pAsm
->input_position_is_used
= GL_FALSE
;
452 pAsm
->input_normal_is_used
= GL_FALSE
;
455 for (i
=0; i
<NUMBER_OF_INPUT_COLORS
; i
++)
457 pAsm
->input_color_is_used
[ i
] = GL_FALSE
;
460 for (i
=0; i
<NUMBER_OF_TEXTURE_UNITS
; i
++)
462 pAsm
->input_texture_unit_is_used
[ i
] = GL_FALSE
;
465 for (i
=0; i
<VERT_ATTRIB_MAX
; i
++)
467 pAsm
->vfetch_instruction_ptr_array
[ i
] = NULL
;
470 pAsm
->number_of_inputs
= 0;
475 GLboolean
IsTex(gl_inst_opcode Opcode
)
477 if( (OPCODE_TEX
==Opcode
) || (OPCODE_TXP
==Opcode
) || (OPCODE_TXB
==Opcode
) )
484 GLboolean
IsAlu(gl_inst_opcode Opcode
)
486 //TODO : more for fc and ex for higher spec.
494 int check_current_clause(r700_AssemblerBase
* pAsm
,
495 CF_CLAUSE_TYPE new_clause_type
)
497 if (pAsm
->cf_current_clause_type
!= new_clause_type
)
498 { //Close last open clause
499 switch (pAsm
->cf_current_clause_type
)
502 if ( pAsm
->cf_current_alu_clause_ptr
!= NULL
)
504 pAsm
->cf_current_alu_clause_ptr
= NULL
;
508 if ( pAsm
->cf_current_vtx_clause_ptr
!= NULL
)
510 pAsm
->cf_current_vtx_clause_ptr
= NULL
;
514 if ( pAsm
->cf_current_tex_clause_ptr
!= NULL
)
516 pAsm
->cf_current_tex_clause_ptr
= NULL
;
519 case CF_EXPORT_CLAUSE
:
520 if ( pAsm
->cf_current_export_clause_ptr
!= NULL
)
522 pAsm
->cf_current_export_clause_ptr
= NULL
;
525 case CF_OTHER_CLAUSE
:
526 if ( pAsm
->cf_current_cf_clause_ptr
!= NULL
)
528 pAsm
->cf_current_cf_clause_ptr
= NULL
;
531 case CF_EMPTY_CLAUSE
:
535 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type
);
539 pAsm
->cf_current_clause_type
= CF_EMPTY_CLAUSE
;
542 switch (new_clause_type
)
545 pAsm
->cf_current_clause_type
= CF_ALU_CLAUSE
;
548 pAsm
->cf_current_clause_type
= CF_VTX_CLAUSE
;
551 pAsm
->cf_current_clause_type
= CF_TEX_CLAUSE
;
553 case CF_EXPORT_CLAUSE
:
555 R700ControlFlowSXClause
* pR700ControlFlowSXClause
556 = (R700ControlFlowSXClause
*) CALLOC_STRUCT(R700ControlFlowSXClause
);
558 // Add new export instruction to control flow program
559 if (pR700ControlFlowSXClause
!= 0)
561 pAsm
->cf_current_export_clause_ptr
= pR700ControlFlowSXClause
;
562 Init_R700ControlFlowSXClause(pR700ControlFlowSXClause
);
563 AddCFInstruction( pAsm
->pR700Shader
,
564 (R700ControlFlowInstruction
*)pR700ControlFlowSXClause
);
569 "Error allocating new EXPORT CF instruction in check_current_clause. \n");
572 pAsm
->cf_current_clause_type
= CF_EXPORT_CLAUSE
;
575 case CF_EMPTY_CLAUSE
:
577 case CF_OTHER_CLAUSE
:
578 pAsm
->cf_current_clause_type
= CF_OTHER_CLAUSE
;
582 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type
);
590 GLboolean
add_vfetch_instruction(r700_AssemblerBase
* pAsm
,
591 R700VertexInstruction
* vertex_instruction_ptr
)
593 if( GL_FALSE
== check_current_clause(pAsm
, CF_VTX_CLAUSE
) )
598 if( pAsm
->cf_current_vtx_clause_ptr
== NULL
||
599 ( (pAsm
->cf_current_vtx_clause_ptr
!= NULL
) &&
600 (pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.count
>= GetCFMaxInstructions(pAsm
->cf_current_vtx_clause_ptr
->m_ShaderInstType
)-1)
603 // Create new Vfetch control flow instruction for this new clause
604 pAsm
->cf_current_vtx_clause_ptr
= (R700ControlFlowGenericClause
*) CALLOC_STRUCT(R700ControlFlowGenericClause
);
606 if (pAsm
->cf_current_vtx_clause_ptr
!= NULL
)
608 Init_R700ControlFlowGenericClause(pAsm
->cf_current_vtx_clause_ptr
);
609 AddCFInstruction( pAsm
->pR700Shader
,
610 (R700ControlFlowInstruction
*)pAsm
->cf_current_vtx_clause_ptr
);
614 radeon_error("Could not allocate a new VFetch CF instruction.\n");
618 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.pop_count
= 0x0;
619 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
620 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
621 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.count
= 0x0;
622 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
623 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
624 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_VTX
;
625 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
626 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
628 LinkVertexInstruction(pAsm
->cf_current_vtx_clause_ptr
, vertex_instruction_ptr
);
632 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.count
++;
635 AddVTXInstruction(pAsm
->pR700Shader
, vertex_instruction_ptr
);
640 GLboolean
add_tex_instruction(r700_AssemblerBase
* pAsm
,
641 R700TextureInstruction
* tex_instruction_ptr
)
643 if ( GL_FALSE
== check_current_clause(pAsm
, CF_TEX_CLAUSE
) )
648 if ( pAsm
->cf_current_tex_clause_ptr
== NULL
||
649 ( (pAsm
->cf_current_tex_clause_ptr
!= NULL
) &&
650 (pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.count
>= GetCFMaxInstructions(pAsm
->cf_current_tex_clause_ptr
->m_ShaderInstType
)-1)
653 // new tex cf instruction for this new clause
654 pAsm
->cf_current_tex_clause_ptr
= (R700ControlFlowGenericClause
*) CALLOC_STRUCT(R700ControlFlowGenericClause
);
656 if (pAsm
->cf_current_tex_clause_ptr
!= NULL
)
658 Init_R700ControlFlowGenericClause(pAsm
->cf_current_tex_clause_ptr
);
659 AddCFInstruction( pAsm
->pR700Shader
,
660 (R700ControlFlowInstruction
*)pAsm
->cf_current_tex_clause_ptr
);
664 radeon_error("Could not allocate a new TEX CF instruction.\n");
668 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.pop_count
= 0x0;
669 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
670 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
672 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
673 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
674 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_TEX
;
675 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
676 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.barrier
= 0x0; //0x1;
680 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.count
++;
683 // If this clause constains any TEX instruction that is dependent on a previous instruction,
684 // set the barrier bit
685 if( pAsm
->pInstDeps
[pAsm
->uiCurInst
].nDstDep
> (-1) )
687 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
690 if(NULL
== pAsm
->cf_current_tex_clause_ptr
->m_pLinkedTEXInstruction
)
692 pAsm
->cf_current_tex_clause_ptr
->m_pLinkedTEXInstruction
= tex_instruction_ptr
;
693 tex_instruction_ptr
->m_pLinkedGenericClause
= pAsm
->cf_current_tex_clause_ptr
;
696 AddTEXInstruction(pAsm
->pR700Shader
, tex_instruction_ptr
);
701 GLboolean
assemble_vfetch_instruction(r700_AssemblerBase
* pAsm
,
703 GLuint destination_register
,
704 GLuint number_of_elements
,
705 GLenum dataElementType
,
706 VTX_FETCH_METHOD
* pFetchMethod
)
708 GLuint client_size_inbyte
;
710 GLuint mega_fetch_count
;
711 GLuint is_mega_fetch_flag
;
713 R700VertexGenericFetch
* vfetch_instruction_ptr
;
714 R700VertexGenericFetch
* assembled_vfetch_instruction_ptr
= pAsm
->vfetch_instruction_ptr_array
[ gl_client_id
];
716 if (assembled_vfetch_instruction_ptr
== NULL
)
718 vfetch_instruction_ptr
= (R700VertexGenericFetch
*) CALLOC_STRUCT(R700VertexGenericFetch
);
719 if (vfetch_instruction_ptr
== NULL
)
723 Init_R700VertexGenericFetch(vfetch_instruction_ptr
);
727 vfetch_instruction_ptr
= assembled_vfetch_instruction_ptr
;
730 data_format
= GetSurfaceFormat(dataElementType
, number_of_elements
, &client_size_inbyte
);
732 if(GL_TRUE
== pFetchMethod
->bEnableMini
) //More conditions here
738 mega_fetch_count
= MEGA_FETCH_BYTES
- 1;
739 is_mega_fetch_flag
= 0x1;
740 pFetchMethod
->mega_fetch_remainder
= MEGA_FETCH_BYTES
- client_size_inbyte
;
743 vfetch_instruction_ptr
->m_Word0
.f
.vtx_inst
= SQ_VTX_INST_FETCH
;
744 vfetch_instruction_ptr
->m_Word0
.f
.fetch_type
= SQ_VTX_FETCH_VERTEX_DATA
;
745 vfetch_instruction_ptr
->m_Word0
.f
.fetch_whole_quad
= 0x0;
747 vfetch_instruction_ptr
->m_Word0
.f
.buffer_id
= gl_client_id
;
748 vfetch_instruction_ptr
->m_Word0
.f
.src_gpr
= 0x0;
749 vfetch_instruction_ptr
->m_Word0
.f
.src_rel
= SQ_ABSOLUTE
;
750 vfetch_instruction_ptr
->m_Word0
.f
.src_sel_x
= SQ_SEL_X
;
751 vfetch_instruction_ptr
->m_Word0
.f
.mega_fetch_count
= mega_fetch_count
;
753 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_x
= (number_of_elements
< 1) ? SQ_SEL_0
: SQ_SEL_X
;
754 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_y
= (number_of_elements
< 2) ? SQ_SEL_0
: SQ_SEL_Y
;
755 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_z
= (number_of_elements
< 3) ? SQ_SEL_0
: SQ_SEL_Z
;
756 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_w
= (number_of_elements
< 4) ? SQ_SEL_1
: SQ_SEL_W
;
758 vfetch_instruction_ptr
->m_Word1
.f
.use_const_fields
= 1;
760 // Destination register
761 vfetch_instruction_ptr
->m_Word1_GPR
.f
.dst_gpr
= destination_register
;
762 vfetch_instruction_ptr
->m_Word1_GPR
.f
.dst_rel
= SQ_ABSOLUTE
;
764 vfetch_instruction_ptr
->m_Word2
.f
.offset
= 0;
765 vfetch_instruction_ptr
->m_Word2
.f
.const_buf_no_stride
= 0x0;
767 vfetch_instruction_ptr
->m_Word2
.f
.mega_fetch
= is_mega_fetch_flag
;
769 if (assembled_vfetch_instruction_ptr
== NULL
)
771 if ( GL_FALSE
== add_vfetch_instruction(pAsm
, (R700VertexInstruction
*)vfetch_instruction_ptr
) )
776 if (pAsm
->vfetch_instruction_ptr_array
[ gl_client_id
] != NULL
)
782 pAsm
->vfetch_instruction_ptr_array
[ gl_client_id
] = vfetch_instruction_ptr
;
789 GLuint
gethelpr(r700_AssemblerBase
* pAsm
)
791 GLuint r
= pAsm
->uHelpReg
;
793 if (pAsm
->uHelpReg
> pAsm
->number_used_registers
)
795 pAsm
->number_used_registers
= pAsm
->uHelpReg
;
799 void resethelpr(r700_AssemblerBase
* pAsm
)
801 pAsm
->uHelpReg
= pAsm
->uFirstHelpReg
;
804 void checkop_init(r700_AssemblerBase
* pAsm
)
810 pAsm
->aArgSubst
[3] = -1;
813 GLboolean
mov_temp(r700_AssemblerBase
* pAsm
, int src
)
815 GLuint tmp
= gethelpr(pAsm
);
817 //mov src to temp helper gpr.
818 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
820 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
822 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
823 pAsm
->D
.dst
.reg
= tmp
;
825 nomask_PVSDST(&(pAsm
->D
.dst
));
827 if( GL_FALSE
== assemble_src(pAsm
, src
, 0) )
832 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
833 noneg_PVSSRC(&(pAsm
->S
[0].src
));
835 if( GL_FALSE
== next_ins(pAsm
) )
840 pAsm
->aArgSubst
[1 + src
] = tmp
;
845 GLboolean
checkop1(r700_AssemblerBase
* pAsm
)
851 GLboolean
checkop2(r700_AssemblerBase
* pAsm
)
853 GLboolean bSrcConst
[2];
854 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
858 if( (pILInst
->SrcReg
[0].File
== PROGRAM_CONSTANT
) ||
859 (pILInst
->SrcReg
[0].File
== PROGRAM_LOCAL_PARAM
) ||
860 (pILInst
->SrcReg
[0].File
== PROGRAM_ENV_PARAM
) ||
861 (pILInst
->SrcReg
[0].File
== PROGRAM_STATE_VAR
) )
863 bSrcConst
[0] = GL_TRUE
;
867 bSrcConst
[0] = GL_FALSE
;
869 if( (pILInst
->SrcReg
[1].File
== PROGRAM_CONSTANT
) ||
870 (pILInst
->SrcReg
[1].File
== PROGRAM_LOCAL_PARAM
) ||
871 (pILInst
->SrcReg
[1].File
== PROGRAM_ENV_PARAM
) ||
872 (pILInst
->SrcReg
[1].File
== PROGRAM_STATE_VAR
) )
874 bSrcConst
[1] = GL_TRUE
;
878 bSrcConst
[1] = GL_FALSE
;
881 if( (bSrcConst
[0] == GL_TRUE
) && (bSrcConst
[1] == GL_TRUE
) )
883 if(pILInst
->SrcReg
[0].Index
!= pILInst
->SrcReg
[1].Index
)
885 if( GL_FALSE
== mov_temp(pAsm
, 1) )
895 GLboolean
checkop3(r700_AssemblerBase
* pAsm
)
897 GLboolean bSrcConst
[3];
898 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
902 if( (pILInst
->SrcReg
[0].File
== PROGRAM_CONSTANT
) ||
903 (pILInst
->SrcReg
[0].File
== PROGRAM_LOCAL_PARAM
) ||
904 (pILInst
->SrcReg
[0].File
== PROGRAM_ENV_PARAM
) ||
905 (pILInst
->SrcReg
[0].File
== PROGRAM_STATE_VAR
) )
907 bSrcConst
[0] = GL_TRUE
;
911 bSrcConst
[0] = GL_FALSE
;
913 if( (pILInst
->SrcReg
[1].File
== PROGRAM_CONSTANT
) ||
914 (pILInst
->SrcReg
[1].File
== PROGRAM_LOCAL_PARAM
) ||
915 (pILInst
->SrcReg
[1].File
== PROGRAM_ENV_PARAM
) ||
916 (pILInst
->SrcReg
[1].File
== PROGRAM_STATE_VAR
) )
918 bSrcConst
[1] = GL_TRUE
;
922 bSrcConst
[1] = GL_FALSE
;
924 if( (pILInst
->SrcReg
[2].File
== PROGRAM_CONSTANT
) ||
925 (pILInst
->SrcReg
[2].File
== PROGRAM_LOCAL_PARAM
) ||
926 (pILInst
->SrcReg
[2].File
== PROGRAM_ENV_PARAM
) ||
927 (pILInst
->SrcReg
[2].File
== PROGRAM_STATE_VAR
) )
929 bSrcConst
[2] = GL_TRUE
;
933 bSrcConst
[2] = GL_FALSE
;
936 if( (GL_TRUE
== bSrcConst
[0]) &&
937 (GL_TRUE
== bSrcConst
[1]) &&
938 (GL_TRUE
== bSrcConst
[2]) )
940 if( GL_FALSE
== mov_temp(pAsm
, 1) )
944 if( GL_FALSE
== mov_temp(pAsm
, 2) )
951 else if( (GL_TRUE
== bSrcConst
[0]) &&
952 (GL_TRUE
== bSrcConst
[1]) )
954 if(pILInst
->SrcReg
[0].Index
!= pILInst
->SrcReg
[1].Index
)
956 if( GL_FALSE
== mov_temp(pAsm
, 1) )
964 else if ( (GL_TRUE
== bSrcConst
[0]) &&
965 (GL_TRUE
== bSrcConst
[2]) )
967 if(pILInst
->SrcReg
[0].Index
!= pILInst
->SrcReg
[2].Index
)
969 if( GL_FALSE
== mov_temp(pAsm
, 2) )
977 else if( (GL_TRUE
== bSrcConst
[1]) &&
978 (GL_TRUE
== bSrcConst
[2]) )
980 if(pILInst
->SrcReg
[1].Index
!= pILInst
->SrcReg
[2].Index
)
982 if( GL_FALSE
== mov_temp(pAsm
, 2) )
994 GLboolean
assemble_src(r700_AssemblerBase
*pAsm
,
998 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1005 if(pAsm
->aArgSubst
[1+src
] >= 0)
1007 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1008 pAsm
->S
[fld
].src
.rtype
= SRC_REG_TEMPORARY
;
1009 pAsm
->S
[fld
].src
.reg
= pAsm
->aArgSubst
[1+src
];
1013 switch (pILInst
->SrcReg
[src
].File
)
1015 case PROGRAM_TEMPORARY
:
1016 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1017 pAsm
->S
[fld
].src
.rtype
= SRC_REG_TEMPORARY
;
1018 pAsm
->S
[fld
].src
.reg
= pILInst
->SrcReg
[src
].Index
+ pAsm
->starting_temp_register_number
;
1020 case PROGRAM_CONSTANT
:
1021 case PROGRAM_LOCAL_PARAM
:
1022 case PROGRAM_ENV_PARAM
:
1023 case PROGRAM_STATE_VAR
:
1024 if (1 == pILInst
->SrcReg
[src
].RelAddr
)
1026 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_RELATIVE_A0
);
1030 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1033 pAsm
->S
[fld
].src
.rtype
= SRC_REG_CONSTANT
;
1034 pAsm
->S
[fld
].src
.reg
= pILInst
->SrcReg
[src
].Index
;
1037 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1038 pAsm
->S
[fld
].src
.rtype
= SRC_REG_INPUT
;
1039 switch (pAsm
->currentShaderType
)
1042 pAsm
->S
[fld
].src
.reg
= pAsm
->uiFP_AttributeMap
[pILInst
->SrcReg
[src
].Index
];
1045 pAsm
->S
[fld
].src
.reg
= pAsm
->ucVP_AttributeMap
[pILInst
->SrcReg
[src
].Index
];
1050 radeon_error("Invalid source argument type\n");
1055 pAsm
->S
[fld
].src
.swizzlex
= pILInst
->SrcReg
[src
].Swizzle
& 0x7;
1056 pAsm
->S
[fld
].src
.swizzley
= (pILInst
->SrcReg
[src
].Swizzle
>> 3) & 0x7;
1057 pAsm
->S
[fld
].src
.swizzlez
= (pILInst
->SrcReg
[src
].Swizzle
>> 6) & 0x7;
1058 pAsm
->S
[fld
].src
.swizzlew
= (pILInst
->SrcReg
[src
].Swizzle
>> 9) & 0x7;
1060 pAsm
->S
[fld
].src
.negx
= pILInst
->SrcReg
[src
].Negate
& 0x1;
1061 pAsm
->S
[fld
].src
.negy
= (pILInst
->SrcReg
[src
].Negate
>> 1) & 0x1;
1062 pAsm
->S
[fld
].src
.negz
= (pILInst
->SrcReg
[src
].Negate
>> 2) & 0x1;
1063 pAsm
->S
[fld
].src
.negw
= (pILInst
->SrcReg
[src
].Negate
>> 3) & 0x1;
1068 GLboolean
assemble_dst(r700_AssemblerBase
*pAsm
)
1070 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1071 switch (pILInst
->DstReg
.File
)
1073 case PROGRAM_TEMPORARY
:
1074 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1075 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
1076 pAsm
->D
.dst
.reg
= pILInst
->DstReg
.Index
+ pAsm
->starting_temp_register_number
;
1078 case PROGRAM_ADDRESS
:
1079 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1080 pAsm
->D
.dst
.rtype
= DST_REG_A0
;
1081 pAsm
->D
.dst
.reg
= 0;
1083 case PROGRAM_OUTPUT
:
1084 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1085 pAsm
->D
.dst
.rtype
= DST_REG_OUT
;
1086 switch (pAsm
->currentShaderType
)
1089 pAsm
->D
.dst
.reg
= pAsm
->uiFP_OutputMap
[pILInst
->DstReg
.Index
];
1092 pAsm
->D
.dst
.reg
= pAsm
->ucVP_OutputMap
[pILInst
->DstReg
.Index
];
1097 radeon_error("Invalid destination output argument type\n");
1101 pAsm
->D
.dst
.writex
= pILInst
->DstReg
.WriteMask
& 0x1;
1102 pAsm
->D
.dst
.writey
= (pILInst
->DstReg
.WriteMask
>> 1) & 0x1;
1103 pAsm
->D
.dst
.writez
= (pILInst
->DstReg
.WriteMask
>> 2) & 0x1;
1104 pAsm
->D
.dst
.writew
= (pILInst
->DstReg
.WriteMask
>> 3) & 0x1;
1109 GLboolean
tex_dst(r700_AssemblerBase
*pAsm
)
1111 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1113 if(PROGRAM_TEMPORARY
== pILInst
->DstReg
.File
)
1115 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
1116 pAsm
->D
.dst
.reg
= pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.Index
+ pAsm
->starting_temp_register_number
;
1118 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1120 else if(PROGRAM_OUTPUT
== pILInst
->DstReg
.File
)
1122 pAsm
->D
.dst
.rtype
= DST_REG_OUT
;
1123 switch (pAsm
->currentShaderType
)
1126 pAsm
->D
.dst
.reg
= pAsm
->uiFP_OutputMap
[pILInst
->DstReg
.Index
];
1129 pAsm
->D
.dst
.reg
= pAsm
->ucVP_OutputMap
[pILInst
->DstReg
.Index
];
1133 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1137 radeon_error("Invalid destination output argument type\n");
1141 pAsm
->D
.dst
.writex
= pILInst
->DstReg
.WriteMask
& 0x1;
1142 pAsm
->D
.dst
.writey
= (pILInst
->DstReg
.WriteMask
>> 1) & 0x1;
1143 pAsm
->D
.dst
.writez
= (pILInst
->DstReg
.WriteMask
>> 2) & 0x1;
1144 pAsm
->D
.dst
.writew
= (pILInst
->DstReg
.WriteMask
>> 3) & 0x1;
1149 GLboolean
tex_src(r700_AssemblerBase
*pAsm
)
1151 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1153 GLboolean bValidTexCoord
= GL_FALSE
;
1155 switch (pILInst
->SrcReg
[0].File
) {
1156 case PROGRAM_CONSTANT
:
1157 case PROGRAM_LOCAL_PARAM
:
1158 case PROGRAM_ENV_PARAM
:
1159 case PROGRAM_STATE_VAR
:
1160 bValidTexCoord
= GL_TRUE
;
1161 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
1162 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
1163 pAsm
->S
[0].src
.reg
= pAsm
->aArgSubst
[1];
1165 case PROGRAM_TEMPORARY
:
1166 bValidTexCoord
= GL_TRUE
;
1167 pAsm
->S
[0].src
.reg
= pILInst
->SrcReg
[0].Index
+
1168 pAsm
->starting_temp_register_number
;
1169 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
1172 switch (pILInst
->SrcReg
[0].Index
)
1174 case FRAG_ATTRIB_COL0
:
1175 case FRAG_ATTRIB_COL1
:
1176 case FRAG_ATTRIB_TEX0
:
1177 case FRAG_ATTRIB_TEX1
:
1178 case FRAG_ATTRIB_TEX2
:
1179 case FRAG_ATTRIB_TEX3
:
1180 case FRAG_ATTRIB_TEX4
:
1181 case FRAG_ATTRIB_TEX5
:
1182 case FRAG_ATTRIB_TEX6
:
1183 case FRAG_ATTRIB_TEX7
:
1184 bValidTexCoord
= GL_TRUE
;
1185 pAsm
->S
[0].src
.reg
=
1186 pAsm
->uiFP_AttributeMap
[pILInst
->SrcReg
[0].Index
];
1187 pAsm
->S
[0].src
.rtype
= SRC_REG_INPUT
;
1193 if(GL_TRUE
== bValidTexCoord
)
1195 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
1199 radeon_error("Invalid source texcoord for TEX instruction\n");
1203 pAsm
->S
[0].src
.swizzlex
= pILInst
->SrcReg
[0].Swizzle
& 0x7;
1204 pAsm
->S
[0].src
.swizzley
= (pILInst
->SrcReg
[0].Swizzle
>> 3) & 0x7;
1205 pAsm
->S
[0].src
.swizzlez
= (pILInst
->SrcReg
[0].Swizzle
>> 6) & 0x7;
1206 pAsm
->S
[0].src
.swizzlew
= (pILInst
->SrcReg
[0].Swizzle
>> 9) & 0x7;
1208 pAsm
->S
[0].src
.negx
= pILInst
->SrcReg
[0].Negate
& 0x1;
1209 pAsm
->S
[0].src
.negy
= (pILInst
->SrcReg
[0].Negate
>> 1) & 0x1;
1210 pAsm
->S
[0].src
.negz
= (pILInst
->SrcReg
[0].Negate
>> 2) & 0x1;
1211 pAsm
->S
[0].src
.negw
= (pILInst
->SrcReg
[0].Negate
>> 3) & 0x1;
1216 GLboolean
assemble_tex_instruction(r700_AssemblerBase
*pAsm
, GLboolean normalized
)
1218 PVSSRC
* texture_coordinate_source
;
1219 PVSSRC
* texture_unit_source
;
1221 R700TextureInstruction
* tex_instruction_ptr
= (R700TextureInstruction
*) CALLOC_STRUCT(R700TextureInstruction
);
1222 if (tex_instruction_ptr
== NULL
)
1226 Init_R700TextureInstruction(tex_instruction_ptr
);
1228 texture_coordinate_source
= &(pAsm
->S
[0].src
);
1229 texture_unit_source
= &(pAsm
->S
[1].src
);
1231 tex_instruction_ptr
->m_Word0
.f
.tex_inst
= pAsm
->D
.dst
.opcode
;
1232 tex_instruction_ptr
->m_Word0
.f
.bc_frac_mode
= 0x0;
1233 tex_instruction_ptr
->m_Word0
.f
.fetch_whole_quad
= 0x0;
1235 tex_instruction_ptr
->m_Word0
.f
.resource_id
= texture_unit_source
->reg
;
1237 tex_instruction_ptr
->m_Word1
.f
.lod_bias
= 0x0;
1239 tex_instruction_ptr
->m_Word1
.f
.coord_type_x
= SQ_TEX_NORMALIZED
;
1240 tex_instruction_ptr
->m_Word1
.f
.coord_type_y
= SQ_TEX_NORMALIZED
;
1241 tex_instruction_ptr
->m_Word1
.f
.coord_type_z
= SQ_TEX_NORMALIZED
;
1242 tex_instruction_ptr
->m_Word1
.f
.coord_type_w
= SQ_TEX_NORMALIZED
;
1244 /* XXX: UNNORMALIZED tex coords have limited wrap modes */
1245 tex_instruction_ptr
->m_Word1
.f
.coord_type_x
= SQ_TEX_UNNORMALIZED
;
1246 tex_instruction_ptr
->m_Word1
.f
.coord_type_y
= SQ_TEX_UNNORMALIZED
;
1247 tex_instruction_ptr
->m_Word1
.f
.coord_type_z
= SQ_TEX_UNNORMALIZED
;
1248 tex_instruction_ptr
->m_Word1
.f
.coord_type_w
= SQ_TEX_UNNORMALIZED
;
1251 tex_instruction_ptr
->m_Word2
.f
.offset_x
= 0x0;
1252 tex_instruction_ptr
->m_Word2
.f
.offset_y
= 0x0;
1253 tex_instruction_ptr
->m_Word2
.f
.offset_z
= 0x0;
1255 tex_instruction_ptr
->m_Word2
.f
.sampler_id
= texture_unit_source
->reg
;
1258 if ( (pAsm
->D
.dst
.rtype
== DST_REG_TEMPORARY
) ||
1259 (pAsm
->D
.dst
.rtype
== DST_REG_OUT
) )
1261 tex_instruction_ptr
->m_Word0
.f
.src_gpr
= texture_coordinate_source
->reg
;
1262 tex_instruction_ptr
->m_Word0
.f
.src_rel
= SQ_ABSOLUTE
;
1264 tex_instruction_ptr
->m_Word1
.f
.dst_gpr
= pAsm
->D
.dst
.reg
;
1265 tex_instruction_ptr
->m_Word1
.f
.dst_rel
= SQ_ABSOLUTE
;
1267 tex_instruction_ptr
->m_Word1
.f
.dst_sel_x
= (pAsm
->D
.dst
.writex
? texture_unit_source
->swizzlex
: SQ_SEL_MASK
);
1268 tex_instruction_ptr
->m_Word1
.f
.dst_sel_y
= (pAsm
->D
.dst
.writey
? texture_unit_source
->swizzley
: SQ_SEL_MASK
);
1269 tex_instruction_ptr
->m_Word1
.f
.dst_sel_z
= (pAsm
->D
.dst
.writez
? texture_unit_source
->swizzlez
: SQ_SEL_MASK
);
1270 tex_instruction_ptr
->m_Word1
.f
.dst_sel_w
= (pAsm
->D
.dst
.writew
? texture_unit_source
->swizzlew
: SQ_SEL_MASK
);
1273 tex_instruction_ptr
->m_Word2
.f
.src_sel_x
= texture_coordinate_source
->swizzlex
;
1274 tex_instruction_ptr
->m_Word2
.f
.src_sel_y
= texture_coordinate_source
->swizzley
;
1275 tex_instruction_ptr
->m_Word2
.f
.src_sel_z
= texture_coordinate_source
->swizzlez
;
1276 tex_instruction_ptr
->m_Word2
.f
.src_sel_w
= texture_coordinate_source
->swizzlew
;
1280 radeon_error("Only temp destination registers supported for TEX dest regs.\n");
1284 if( GL_FALSE
== add_tex_instruction(pAsm
, tex_instruction_ptr
) )
1292 void initialize(r700_AssemblerBase
*pAsm
)
1294 GLuint cycle
, component
;
1296 for (cycle
=0; cycle
<NUMBER_OF_CYCLES
; cycle
++)
1298 for (component
=0; component
<NUMBER_OF_COMPONENTS
; component
++)
1300 pAsm
->hw_gpr
[cycle
][component
] = (-1);
1303 for (component
=0; component
<NUMBER_OF_COMPONENTS
; component
++)
1305 pAsm
->hw_cfile_addr
[component
] = (-1);
1306 pAsm
->hw_cfile_chan
[component
] = (-1);
1310 GLboolean
assemble_alu_src(R700ALUInstruction
* alu_instruction_ptr
,
1313 BITS scalar_channel_index
)
1320 //--------------------------------------------------------------------------
1321 // Source for operands src0, src1.
1322 // Values [0,127] correspond to GPR[0..127].
1323 // Values [256,511] correspond to cfile constants c[0..255].
1325 //--------------------------------------------------------------------------
1326 // Other special values are shown in the list below.
1328 // 248 SQ_ALU_SRC_0: special constant 0.0.
1329 // 249 SQ_ALU_SRC_1: special constant 1.0 float.
1331 // 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
1332 // 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
1334 // 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
1335 // 253 SQ_ALU_SRC_LITERAL: literal constant.
1337 // 254 SQ_ALU_SRC_PV: previous vector result.
1338 // 255 SQ_ALU_SRC_PS: previous scalar result.
1339 //--------------------------------------------------------------------------
1341 BITS channel_swizzle
;
1342 switch (scalar_channel_index
)
1344 case 0: channel_swizzle
= pSource
->swizzlex
; break;
1345 case 1: channel_swizzle
= pSource
->swizzley
; break;
1346 case 2: channel_swizzle
= pSource
->swizzlez
; break;
1347 case 3: channel_swizzle
= pSource
->swizzlew
; break;
1348 default: channel_swizzle
= SQ_SEL_MASK
; break;
1351 if(channel_swizzle
== SQ_SEL_0
)
1353 src_sel
= SQ_ALU_SRC_0
;
1355 else if (channel_swizzle
== SQ_SEL_1
)
1357 src_sel
= SQ_ALU_SRC_1
;
1361 if ( (pSource
->rtype
== SRC_REG_TEMPORARY
) ||
1362 (pSource
->rtype
== SRC_REG_INPUT
)
1365 src_sel
= pSource
->reg
;
1367 else if (pSource
->rtype
== SRC_REG_CONSTANT
)
1369 src_sel
= pSource
->reg
+ CFILE_REGISTER_OFFSET
;
1373 radeon_error("Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.\n",
1374 source_index
, pSource
->rtype
);
1379 if( ADDR_ABSOLUTE
== addrmode_PVSSRC(pSource
) )
1381 src_rel
= SQ_ABSOLUTE
;
1385 src_rel
= SQ_RELATIVE
;
1388 switch (channel_swizzle
)
1391 src_chan
= SQ_CHAN_X
;
1394 src_chan
= SQ_CHAN_Y
;
1397 src_chan
= SQ_CHAN_Z
;
1400 src_chan
= SQ_CHAN_W
;
1404 // Does not matter since src_sel controls
1405 src_chan
= SQ_CHAN_X
;
1408 radeon_error("Unknown source select value (%d) in assemble_alu_src().\n", channel_swizzle
);
1413 switch (scalar_channel_index
)
1415 case 0: src_neg
= pSource
->negx
; break;
1416 case 1: src_neg
= pSource
->negy
; break;
1417 case 2: src_neg
= pSource
->negz
; break;
1418 case 3: src_neg
= pSource
->negw
; break;
1419 default: src_neg
= 0; break;
1422 switch (source_index
)
1425 alu_instruction_ptr
->m_Word0
.f
.src0_sel
= src_sel
;
1426 alu_instruction_ptr
->m_Word0
.f
.src0_rel
= src_rel
;
1427 alu_instruction_ptr
->m_Word0
.f
.src0_chan
= src_chan
;
1428 alu_instruction_ptr
->m_Word0
.f
.src0_neg
= src_neg
;
1431 alu_instruction_ptr
->m_Word0
.f
.src1_sel
= src_sel
;
1432 alu_instruction_ptr
->m_Word0
.f
.src1_rel
= src_rel
;
1433 alu_instruction_ptr
->m_Word0
.f
.src1_chan
= src_chan
;
1434 alu_instruction_ptr
->m_Word0
.f
.src1_neg
= src_neg
;
1437 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_sel
= src_sel
;
1438 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_rel
= src_rel
;
1439 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_chan
= src_chan
;
1440 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_neg
= src_neg
;
1443 radeon_error("Only three sources allowed in ALU opcodes.\n");
1451 GLboolean
add_alu_instruction(r700_AssemblerBase
* pAsm
,
1452 R700ALUInstruction
* alu_instruction_ptr
,
1453 GLuint contiguous_slots_needed
)
1455 if( GL_FALSE
== check_current_clause(pAsm
, CF_ALU_CLAUSE
) )
1460 if ( pAsm
->cf_current_alu_clause_ptr
== NULL
||
1461 ( (pAsm
->cf_current_alu_clause_ptr
!= NULL
) &&
1462 (pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
>= (GetCFMaxInstructions(pAsm
->cf_current_alu_clause_ptr
->m_ShaderInstType
)-contiguous_slots_needed
-1) )
1466 //new cf inst for this clause
1467 pAsm
->cf_current_alu_clause_ptr
= (R700ControlFlowALUClause
*) CALLOC_STRUCT(R700ControlFlowALUClause
);
1469 // link the new cf to cf segment
1470 if(NULL
!= pAsm
->cf_current_alu_clause_ptr
)
1472 Init_R700ControlFlowALUClause(pAsm
->cf_current_alu_clause_ptr
);
1473 AddCFInstruction( pAsm
->pR700Shader
,
1474 (R700ControlFlowInstruction
*)pAsm
->cf_current_alu_clause_ptr
);
1478 radeon_error("Could not allocate a new ALU CF instruction.\n");
1482 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_bank0
= 0x0;
1483 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_bank1
= 0x0;
1484 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_mode0
= SQ_CF_KCACHE_NOP
;
1486 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_mode1
= SQ_CF_KCACHE_NOP
;
1487 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_addr0
= 0x0;
1488 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_addr1
= 0x0;
1490 //cf_current_alu_clause_ptr->m_Word1.f.count = number_of_scalar_operations - 1;
1491 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
= 0x0;
1492 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_ALU
;
1494 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
1496 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
1500 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
++;
1503 // If this clause constains any instruction that is forward dependent on a TEX instruction,
1504 // set the whole_quad_mode for this clause
1505 if ( pAsm
->pInstDeps
[pAsm
->uiCurInst
].nDstDep
> (-1) )
1507 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x1;
1510 if (pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
>= (GetCFMaxInstructions(pAsm
->cf_current_alu_clause_ptr
->m_ShaderInstType
)-1) )
1512 alu_instruction_ptr
->m_Word0
.f
.last
= 1;
1515 if(NULL
== pAsm
->cf_current_alu_clause_ptr
->m_pLinkedALUInstruction
)
1517 pAsm
->cf_current_alu_clause_ptr
->m_pLinkedALUInstruction
= alu_instruction_ptr
;
1518 alu_instruction_ptr
->m_pLinkedALUClause
= pAsm
->cf_current_alu_clause_ptr
;
1521 AddALUInstruction(pAsm
->pR700Shader
, alu_instruction_ptr
);
1526 void get_src_properties(R700ALUInstruction
* alu_instruction_ptr
,
1533 switch (source_index
)
1536 *psrc_sel
= alu_instruction_ptr
->m_Word0
.f
.src0_sel
;
1537 *psrc_rel
= alu_instruction_ptr
->m_Word0
.f
.src0_rel
;
1538 *psrc_chan
= alu_instruction_ptr
->m_Word0
.f
.src0_chan
;
1539 *psrc_neg
= alu_instruction_ptr
->m_Word0
.f
.src0_neg
;
1543 *psrc_sel
= alu_instruction_ptr
->m_Word0
.f
.src1_sel
;
1544 *psrc_rel
= alu_instruction_ptr
->m_Word0
.f
.src1_rel
;
1545 *psrc_chan
= alu_instruction_ptr
->m_Word0
.f
.src1_chan
;
1546 *psrc_neg
= alu_instruction_ptr
->m_Word0
.f
.src1_neg
;
1550 *psrc_sel
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_sel
;
1551 *psrc_rel
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_rel
;
1552 *psrc_chan
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_chan
;
1553 *psrc_neg
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_neg
;
1558 int is_cfile(BITS sel
)
1560 if (sel
> 255 && sel
< 512)
1567 int is_const(BITS sel
)
1573 else if(sel
>= SQ_ALU_SRC_0
&& sel
<= SQ_ALU_SRC_LITERAL
)
1580 int is_gpr(BITS sel
)
1582 if (sel
>= 0 && sel
< 128)
1589 const GLuint BANK_SWIZZLE_VEC
[8] = {SQ_ALU_VEC_210
, //000
1590 SQ_ALU_VEC_120
, //001
1591 SQ_ALU_VEC_102
, //010
1593 SQ_ALU_VEC_201
, //011
1594 SQ_ALU_VEC_012
, //100
1595 SQ_ALU_VEC_021
, //101
1597 SQ_ALU_VEC_012
, //110
1598 SQ_ALU_VEC_012
}; //111
1600 const GLuint BANK_SWIZZLE_SCL
[8] = {SQ_ALU_SCL_210
, //000
1601 SQ_ALU_SCL_122
, //001
1602 SQ_ALU_SCL_122
, //010
1604 SQ_ALU_SCL_221
, //011
1605 SQ_ALU_SCL_212
, //100
1606 SQ_ALU_SCL_122
, //101
1608 SQ_ALU_SCL_122
, //110
1609 SQ_ALU_SCL_122
}; //111
1611 GLboolean
reserve_cfile(r700_AssemblerBase
* pAsm
,
1615 int res_match
= (-1);
1616 int res_empty
= (-1);
1620 for (res
=3; res
>=0; res
--)
1622 if(pAsm
->hw_cfile_addr
[ res
] < 0)
1626 else if( (pAsm
->hw_cfile_addr
[res
] == (int)sel
)
1628 (pAsm
->hw_cfile_chan
[ res
] == (int) chan
) )
1636 // Read for this scalar component already reserved, nothing to do here.
1639 else if(res_empty
>= 0)
1641 pAsm
->hw_cfile_addr
[ res_empty
] = sel
;
1642 pAsm
->hw_cfile_chan
[ res_empty
] = chan
;
1646 radeon_error("All cfile read ports are used, cannot reference C$sel, channel $chan.\n");
1652 GLboolean
reserve_gpr(r700_AssemblerBase
* pAsm
, GLuint sel
, GLuint chan
, GLuint cycle
)
1654 if(pAsm
->hw_gpr
[cycle
][chan
] < 0)
1656 pAsm
->hw_gpr
[cycle
][chan
] = sel
;
1658 else if(pAsm
->hw_gpr
[cycle
][chan
] != (int)sel
)
1660 radeon_error("Another scalar operation has already used GPR read port for given channel\n");
1667 GLboolean
cycle_for_scalar_bank_swizzle(const int swiz
, const int sel
, GLuint
* pCycle
)
1671 case SQ_ALU_SCL_210
:
1673 int table
[3] = {2, 1, 0};
1674 *pCycle
= table
[sel
];
1678 case SQ_ALU_SCL_122
:
1680 int table
[3] = {1, 2, 2};
1681 *pCycle
= table
[sel
];
1685 case SQ_ALU_SCL_212
:
1687 int table
[3] = {2, 1, 2};
1688 *pCycle
= table
[sel
];
1692 case SQ_ALU_SCL_221
:
1694 int table
[3] = {2, 2, 1};
1695 *pCycle
= table
[sel
];
1700 radeon_error("Bad Scalar bank swizzle value\n");
1707 GLboolean
cycle_for_vector_bank_swizzle(const int swiz
, const int sel
, GLuint
* pCycle
)
1711 case SQ_ALU_VEC_012
:
1713 int table
[3] = {0, 1, 2};
1714 *pCycle
= table
[sel
];
1717 case SQ_ALU_VEC_021
:
1719 int table
[3] = {0, 2, 1};
1720 *pCycle
= table
[sel
];
1723 case SQ_ALU_VEC_120
:
1725 int table
[3] = {1, 2, 0};
1726 *pCycle
= table
[sel
];
1729 case SQ_ALU_VEC_102
:
1731 int table
[3] = {1, 0, 2};
1732 *pCycle
= table
[sel
];
1735 case SQ_ALU_VEC_201
:
1737 int table
[3] = {2, 0, 1};
1738 *pCycle
= table
[sel
];
1741 case SQ_ALU_VEC_210
:
1743 int table
[3] = {2, 1, 0};
1744 *pCycle
= table
[sel
];
1748 radeon_error("Bad Vec bank swizzle value\n");
1756 GLboolean
check_scalar(r700_AssemblerBase
* pAsm
,
1757 R700ALUInstruction
* alu_instruction_ptr
)
1760 GLuint bank_swizzle
;
1761 GLuint const_count
= 0;
1770 BITS src_sel
[3] = {0,0,0};
1771 BITS src_chan
[3] = {0,0,0};
1772 BITS src_rel
[3] = {0,0,0};
1773 BITS src_neg
[3] = {0,0,0};
1777 GLuint number_of_operands
= r700GetNumOperands(pAsm
);
1779 for (src
=0; src
<number_of_operands
; src
++)
1781 get_src_properties(alu_instruction_ptr
,
1790 swizzle_key
= ( (is_const( src_sel
[0] ) ? 4 : 0) +
1791 (is_const( src_sel
[1] ) ? 2 : 0) +
1792 (is_const( src_sel
[2] ) ? 1 : 0) );
1794 alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
= BANK_SWIZZLE_SCL
[ swizzle_key
];
1796 for (src
=0; src
<number_of_operands
; src
++)
1798 sel
= src_sel
[src
];
1799 chan
= src_chan
[src
];
1800 rel
= src_rel
[src
];
1801 neg
= src_neg
[src
];
1803 if (is_const( sel
))
1805 // Any constant, including literal and inline constants
1808 if (is_cfile( sel
))
1810 reserve_cfile(pAsm
, sel
, chan
);
1816 for (src
=0; src
<number_of_operands
; src
++)
1818 sel
= src_sel
[src
];
1819 chan
= src_chan
[src
];
1820 rel
= src_rel
[src
];
1821 neg
= src_neg
[src
];
1825 bank_swizzle
= alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
;
1827 if( GL_FALSE
== cycle_for_scalar_bank_swizzle(bank_swizzle
, src
, &cycle
) )
1832 if(cycle
< const_count
)
1834 if( GL_FALSE
== reserve_gpr(pAsm
, sel
, chan
, cycle
) )
1845 GLboolean
check_vector(r700_AssemblerBase
* pAsm
,
1846 R700ALUInstruction
* alu_instruction_ptr
)
1849 GLuint bank_swizzle
;
1850 GLuint const_count
= 0;
1859 BITS src_sel
[3] = {0,0,0};
1860 BITS src_chan
[3] = {0,0,0};
1861 BITS src_rel
[3] = {0,0,0};
1862 BITS src_neg
[3] = {0,0,0};
1866 GLuint number_of_operands
= r700GetNumOperands(pAsm
);
1868 for (src
=0; src
<number_of_operands
; src
++)
1870 get_src_properties(alu_instruction_ptr
,
1879 swizzle_key
= ( (is_const( src_sel
[0] ) ? 4 : 0) +
1880 (is_const( src_sel
[1] ) ? 2 : 0) +
1881 (is_const( src_sel
[2] ) ? 1 : 0)
1884 alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
= BANK_SWIZZLE_VEC
[swizzle_key
];
1886 for (src
=0; src
<number_of_operands
; src
++)
1888 sel
= src_sel
[src
];
1889 chan
= src_chan
[src
];
1890 rel
= src_rel
[src
];
1891 neg
= src_neg
[src
];
1894 bank_swizzle
= alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
;
1898 if( GL_FALSE
== cycle_for_vector_bank_swizzle(bank_swizzle
, src
, &cycle
) )
1904 (sel
== src_sel
[0]) &&
1905 (chan
== src_chan
[0]) )
1910 if( GL_FALSE
== reserve_gpr(pAsm
, sel
, chan
, cycle
) )
1916 else if( is_const(sel
) )
1922 if( GL_FALSE
== reserve_cfile(pAsm
, sel
, chan
) )
1933 GLboolean
assemble_alu_instruction(r700_AssemblerBase
*pAsm
)
1935 GLuint number_of_scalar_operations
;
1936 GLboolean is_single_scalar_operation
;
1937 GLuint scalar_channel_index
;
1939 PVSSRC
* pcurrent_source
;
1940 int current_source_index
;
1941 GLuint contiguous_slots_needed
;
1943 GLuint uNumSrc
= r700GetNumOperands(pAsm
);
1944 GLuint channel_swizzle
, j
;
1945 GLuint chan_counter
[4] = {0, 0, 0, 0};
1946 PVSSRC
* pSource
[3];
1947 GLboolean bSplitInst
= GL_FALSE
;
1949 if (1 == pAsm
->D
.dst
.math
)
1951 is_single_scalar_operation
= GL_TRUE
;
1952 number_of_scalar_operations
= 1;
1956 is_single_scalar_operation
= GL_FALSE
;
1957 number_of_scalar_operations
= 4;
1959 /* check read port, only very preliminary algorithm, not count in
1960 src0/1 same comp case and prev slot repeat case; also not count relative
1961 addressing. TODO: improve performance. */
1962 for(j
=0; j
<uNumSrc
; j
++)
1964 pSource
[j
] = &(pAsm
->S
[j
].src
);
1966 for(scalar_channel_index
=0; scalar_channel_index
<4; scalar_channel_index
++)
1968 for(j
=0; j
<uNumSrc
; j
++)
1970 switch (scalar_channel_index
)
1972 case 0: channel_swizzle
= pSource
[j
]->swizzlex
; break;
1973 case 1: channel_swizzle
= pSource
[j
]->swizzley
; break;
1974 case 2: channel_swizzle
= pSource
[j
]->swizzlez
; break;
1975 case 3: channel_swizzle
= pSource
[j
]->swizzlew
; break;
1976 default: channel_swizzle
= SQ_SEL_MASK
; break;
1978 if ( ((pSource
[j
]->rtype
== SRC_REG_TEMPORARY
) ||
1979 (pSource
[j
]->rtype
== SRC_REG_INPUT
))
1980 && (channel_swizzle
<= SQ_SEL_W
) )
1982 chan_counter
[channel_swizzle
]++;
1986 if( (chan_counter
[SQ_SEL_X
] > 3)
1987 || (chan_counter
[SQ_SEL_Y
] > 3)
1988 || (chan_counter
[SQ_SEL_Z
] > 3)
1989 || (chan_counter
[SQ_SEL_W
] > 3) ) /* each chan bank has only 3 ports. */
1991 bSplitInst
= GL_TRUE
;
1995 contiguous_slots_needed
= 0;
1997 if(GL_TRUE
== is_reduction_opcode(&(pAsm
->D
)) )
1999 contiguous_slots_needed
= 4;
2004 for (scalar_channel_index
=0;
2005 scalar_channel_index
< number_of_scalar_operations
;
2006 scalar_channel_index
++)
2008 R700ALUInstruction
* alu_instruction_ptr
= (R700ALUInstruction
*) CALLOC_STRUCT(R700ALUInstruction
);
2009 if (alu_instruction_ptr
== NULL
)
2013 Init_R700ALUInstruction(alu_instruction_ptr
);
2016 current_source_index
= 0;
2017 pcurrent_source
= &(pAsm
->S
[0].src
);
2019 if (GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2020 current_source_index
,
2022 scalar_channel_index
) )
2027 if (pAsm
->D
.dst
.math
== 0)
2030 current_source_index
= 1;
2031 pcurrent_source
= &(pAsm
->S
[current_source_index
].src
);
2033 if (GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2034 current_source_index
,
2036 scalar_channel_index
) )
2043 alu_instruction_ptr
->m_Word0
.f
.index_mode
= SQ_INDEX_LOOP
;
2045 if( (is_single_scalar_operation
== GL_TRUE
)
2046 || (GL_TRUE
== bSplitInst
) )
2048 alu_instruction_ptr
->m_Word0
.f
.last
= 1;
2052 alu_instruction_ptr
->m_Word0
.f
.last
= (scalar_channel_index
== 3) ? 1 : 0;
2055 alu_instruction_ptr
->m_Word0
.f
.pred_sel
= 0x0;
2056 alu_instruction_ptr
->m_Word1_OP2
.f
.update_pred
= 0x0;
2057 alu_instruction_ptr
->m_Word1_OP2
.f
.update_execute_mask
= 0x0;
2060 if( (pAsm
->D
.dst
.rtype
== DST_REG_TEMPORARY
) ||
2061 (pAsm
->D
.dst
.rtype
== DST_REG_OUT
) )
2063 alu_instruction_ptr
->m_Word1
.f
.dst_gpr
= pAsm
->D
.dst
.reg
;
2067 radeon_error("Only temp destination registers supported for ALU dest regs.\n");
2071 alu_instruction_ptr
->m_Word1
.f
.dst_rel
= SQ_ABSOLUTE
; //D.rtype
2073 if ( is_single_scalar_operation
== GL_TRUE
)
2075 // Override scalar_channel_index since only one scalar value will be written
2076 if(pAsm
->D
.dst
.writex
)
2078 scalar_channel_index
= 0;
2080 else if(pAsm
->D
.dst
.writey
)
2082 scalar_channel_index
= 1;
2084 else if(pAsm
->D
.dst
.writez
)
2086 scalar_channel_index
= 2;
2088 else if(pAsm
->D
.dst
.writew
)
2090 scalar_channel_index
= 3;
2094 alu_instruction_ptr
->m_Word1
.f
.dst_chan
= scalar_channel_index
;
2096 alu_instruction_ptr
->m_Word1
.f
.clamp
= pAsm
->pILInst
[pAsm
->uiCurInst
].SaturateMode
;
2098 if (pAsm
->D
.dst
.op3
)
2102 alu_instruction_ptr
->m_Word1_OP3
.f
.alu_inst
= pAsm
->D
.dst
.opcode
;
2104 //There's 3rd src for op3
2105 current_source_index
= 2;
2106 pcurrent_source
= &(pAsm
->S
[current_source_index
].src
);
2108 if ( GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2109 current_source_index
,
2111 scalar_channel_index
) )
2121 alu_instruction_ptr
->m_Word1_OP2
.f6
.alu_inst
= pAsm
->D
.dst
.opcode
;
2123 alu_instruction_ptr
->m_Word1_OP2
.f6
.src0_abs
= 0x0;
2124 alu_instruction_ptr
->m_Word1_OP2
.f6
.src1_abs
= 0x0;
2126 //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
2127 //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0;
2128 switch (scalar_channel_index
)
2131 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writex
;
2134 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writey
;
2137 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writez
;
2140 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writew
;
2143 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= 1; //SQ_SEL_MASK;
2146 alu_instruction_ptr
->m_Word1_OP2
.f6
.omod
= SQ_ALU_OMOD_OFF
;
2150 alu_instruction_ptr
->m_Word1_OP2
.f
.alu_inst
= pAsm
->D
.dst
.opcode
;
2152 alu_instruction_ptr
->m_Word1_OP2
.f
.src0_abs
= 0x0;
2153 alu_instruction_ptr
->m_Word1_OP2
.f
.src1_abs
= 0x0;
2155 //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2156 //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2157 switch (scalar_channel_index
)
2160 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writex
;
2163 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writey
;
2166 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writez
;
2169 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writew
;
2172 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= 1; //SQ_SEL_MASK;
2175 alu_instruction_ptr
->m_Word1_OP2
.f
.omod
= SQ_ALU_OMOD_OFF
;
2179 if(GL_FALSE
== add_alu_instruction(pAsm
, alu_instruction_ptr
, contiguous_slots_needed
) )
2185 * Judge the type of current instruction, is it vector or scalar
2188 if (is_single_scalar_operation
)
2190 if(GL_FALSE
== check_scalar(pAsm
, alu_instruction_ptr
) )
2197 if(GL_FALSE
== check_vector(pAsm
, alu_instruction_ptr
) )
2203 contiguous_slots_needed
= 0;
2209 GLboolean
next_ins(r700_AssemblerBase
*pAsm
)
2211 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
2213 if( GL_TRUE
== IsTex(pILInst
->Opcode
) &&
2214 /* handle const moves to temp register */
2215 !(pAsm
->D
.dst
.opcode
== SQ_OP2_INST_MOV
) )
2217 if (pILInst
->TexSrcTarget
== TEXTURE_RECT_INDEX
) {
2218 if( GL_FALSE
== assemble_tex_instruction(pAsm
, GL_FALSE
) )
2220 radeon_error("Error assembling TEX instruction\n");
2224 if( GL_FALSE
== assemble_tex_instruction(pAsm
, GL_TRUE
) )
2226 radeon_error("Error assembling TEX instruction\n");
2233 if( GL_FALSE
== assemble_alu_instruction(pAsm
) )
2235 radeon_error("Error assembling ALU instruction\n");
2240 if(pAsm
->D
.dst
.rtype
== DST_REG_OUT
)
2244 // There is no mask for OP3 instructions, so all channels are written
2245 pAsm
->pucOutMask
[pAsm
->D
.dst
.reg
- pAsm
->starting_export_register_number
] = 0xF;
2249 pAsm
->pucOutMask
[pAsm
->D
.dst
.reg
- pAsm
->starting_export_register_number
]
2250 |= (unsigned char)pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
;
2254 //reset for next inst.
2256 pAsm
->S
[0].bits
= 0;
2257 pAsm
->S
[1].bits
= 0;
2258 pAsm
->S
[2].bits
= 0;
2263 GLboolean
assemble_math_function(r700_AssemblerBase
* pAsm
, BITS opcode
)
2269 tmp
= gethelpr(pAsm
);
2271 // opcode tmp.x, a.x
2274 pAsm
->D
.dst
.opcode
= opcode
;
2275 pAsm
->D
.dst
.math
= 1;
2277 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2278 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2279 pAsm
->D
.dst
.reg
= tmp
;
2280 pAsm
->D
.dst
.writex
= 1;
2282 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2287 if ( GL_FALSE
== next_ins(pAsm
) )
2292 // Now replicate result to all necessary channels in destination
2293 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
2295 if( GL_FALSE
== assemble_dst(pAsm
) )
2300 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2301 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
2302 pAsm
->S
[0].src
.reg
= tmp
;
2304 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
2305 noneg_PVSSRC(&(pAsm
->S
[0].src
));
2307 if( GL_FALSE
== next_ins(pAsm
) )
2315 GLboolean
assemble_ABS(r700_AssemblerBase
*pAsm
)
2319 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
2321 if( GL_FALSE
== assemble_dst(pAsm
) )
2325 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2330 pAsm
->S
[1].bits
= pAsm
->S
[0].bits
;
2331 flipneg_PVSSRC(&(pAsm
->S
[1].src
));
2333 if ( GL_FALSE
== next_ins(pAsm
) )
2341 GLboolean
assemble_ADD(r700_AssemblerBase
*pAsm
)
2343 if( GL_FALSE
== checkop2(pAsm
) )
2348 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
2350 if( GL_FALSE
== assemble_dst(pAsm
) )
2355 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2360 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
2365 if(pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_SUB
)
2367 flipneg_PVSSRC(&(pAsm
->S
[1].src
));
2370 if( GL_FALSE
== next_ins(pAsm
) )
2378 GLboolean
assemble_BAD(char *opcode_str
)
2380 radeon_error("Not yet implemented instruction (%s)\n", opcode_str
);
2384 GLboolean
assemble_CMP(r700_AssemblerBase
*pAsm
)
2388 if( GL_FALSE
== checkop3(pAsm
) )
2393 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_CNDGE
;
2394 pAsm
->D
.dst
.op3
= 1;
2398 if(0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
2400 //OP3 has no support for write mask
2401 tmp
= gethelpr(pAsm
);
2403 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2404 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2405 pAsm
->D
.dst
.reg
= tmp
;
2407 nomask_PVSDST(&(pAsm
->D
.dst
));
2411 if( GL_FALSE
== assemble_dst(pAsm
) )
2417 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2422 if( GL_FALSE
== assemble_src(pAsm
, 2, 1) )
2427 if( GL_FALSE
== assemble_src(pAsm
, 1, 2) )
2432 if ( GL_FALSE
== next_ins(pAsm
) )
2437 if (0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
2439 if( GL_FALSE
== assemble_dst(pAsm
) )
2444 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
2447 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2448 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
2449 pAsm
->S
[0].src
.reg
= tmp
;
2451 noneg_PVSSRC(&(pAsm
->S
[0].src
));
2452 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
2454 if( GL_FALSE
== next_ins(pAsm
) )
2463 GLboolean
assemble_COS(r700_AssemblerBase
*pAsm
)
2465 return assemble_math_function(pAsm
, SQ_OP2_INST_COS
);
2468 GLboolean
assemble_DOT(r700_AssemblerBase
*pAsm
)
2470 if( GL_FALSE
== checkop2(pAsm
) )
2475 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_DOT4
;
2477 if( GL_FALSE
== assemble_dst(pAsm
) )
2482 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2487 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
2492 if(OPCODE_DP3
== pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
)
2494 zerocomp_PVSSRC(&(pAsm
->S
[0].src
), 3);
2495 zerocomp_PVSSRC(&(pAsm
->S
[1].src
), 3);
2497 else if(pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_DPH
)
2499 onecomp_PVSSRC(&(pAsm
->S
[1].src
), 3);
2502 if ( GL_FALSE
== next_ins(pAsm
) )
2510 GLboolean
assemble_DST(r700_AssemblerBase
*pAsm
)
2512 if( GL_FALSE
== checkop2(pAsm
) )
2517 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
2519 if( GL_FALSE
== assemble_dst(pAsm
) )
2524 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2529 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
2534 onecomp_PVSSRC(&(pAsm
->S
[0].src
), 0);
2535 onecomp_PVSSRC(&(pAsm
->S
[0].src
), 3);
2537 onecomp_PVSSRC(&(pAsm
->S
[1].src
), 0);
2538 onecomp_PVSSRC(&(pAsm
->S
[1].src
), 2);
2540 if ( GL_FALSE
== next_ins(pAsm
) )
2548 GLboolean
assemble_EX2(r700_AssemblerBase
*pAsm
)
2550 return assemble_math_function(pAsm
, SQ_OP2_INST_EXP_IEEE
);
2553 GLboolean
assemble_FLR(r700_AssemblerBase
*pAsm
)
2557 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FLOOR
;
2559 if ( GL_FALSE
== assemble_dst(pAsm
) )
2564 if ( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2569 if ( GL_FALSE
== next_ins(pAsm
) )
2577 GLboolean
assemble_FLR_INT(r700_AssemblerBase
*pAsm
)
2579 return assemble_math_function(pAsm
, SQ_OP2_INST_FLT_TO_INT
);
2582 GLboolean
assemble_FRC(r700_AssemblerBase
*pAsm
)
2586 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FRACT
;
2588 if ( GL_FALSE
== assemble_dst(pAsm
) )
2593 if ( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2598 if ( GL_FALSE
== next_ins(pAsm
) )
2606 GLboolean
assemble_KIL(r700_AssemblerBase
*pAsm
)
2610 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_KILLGT
;
2612 if ( GL_FALSE
== assemble_dst(pAsm
) )
2617 pAsm
->D
.dst
.writex
= 0;
2618 pAsm
->D
.dst
.writey
= 0;
2619 pAsm
->D
.dst
.writez
= 0;
2620 pAsm
->D
.dst
.writew
= 0;
2622 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2623 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
2624 pAsm
->S
[0].src
.reg
= 0;
2626 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_0
);
2627 noneg_PVSSRC(&(pAsm
->S
[0].src
));
2629 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
2631 if(PROGRAM_TEMPORARY
== pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.File
)
2633 pAsm
->S
[1].src
.reg
= pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.Index
+ pAsm
->starting_temp_register_number
;
2637 pAsm
->S
[1].src
.reg
= pAsm
->uiFP_OutputMap
[pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.Index
];
2640 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
2641 noswizzle_PVSSRC(&(pAsm
->S
[1].src
));
2643 if ( GL_FALSE
== next_ins(pAsm
) )
2648 pAsm
->pR700Shader
->killIsUsed
= GL_TRUE
;
2653 GLboolean
assemble_LG2(r700_AssemblerBase
*pAsm
)
2655 return assemble_math_function(pAsm
, SQ_OP2_INST_LOG_IEEE
);
2658 GLboolean
assemble_LRP(r700_AssemblerBase
*pAsm
)
2662 if( GL_FALSE
== checkop3(pAsm
) )
2667 tmp
= gethelpr(pAsm
);
2669 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
2671 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2672 pAsm
->D
.dst
.reg
= tmp
;
2673 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2674 nomask_PVSDST(&(pAsm
->D
.dst
));
2677 if( GL_FALSE
== assemble_src(pAsm
, 1, 0) )
2682 if ( GL_FALSE
== assemble_src(pAsm
, 2, 1) )
2687 neg_PVSSRC(&(pAsm
->S
[1].src
));
2689 if( GL_FALSE
== next_ins(pAsm
) )
2694 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
2695 pAsm
->D
.dst
.op3
= 1;
2697 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2698 pAsm
->D
.dst
.reg
= tmp
;
2699 nomask_PVSDST(&(pAsm
->D
.dst
));
2700 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2702 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2703 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
2704 pAsm
->S
[0].src
.reg
= tmp
;
2705 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
2708 if( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
2712 if( GL_FALSE
== assemble_src(pAsm
, 2, -1) )
2717 if( GL_FALSE
== next_ins(pAsm
) )
2722 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
2724 if( GL_FALSE
== assemble_dst(pAsm
) )
2729 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2730 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
2731 pAsm
->S
[0].src
.reg
= tmp
;
2732 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
2734 if( GL_FALSE
== next_ins(pAsm
) )
2742 GLboolean
assemble_MAD(struct r700_AssemblerBase
*pAsm
)
2745 GLboolean bReplaceDst
= GL_FALSE
;
2746 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
2748 if( GL_FALSE
== checkop3(pAsm
) )
2753 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
2754 pAsm
->D
.dst
.op3
= 1;
2758 if(PROGRAM_TEMPORARY
== pILInst
->DstReg
.File
)
2759 { /* TODO : more investigation on MAD src and dst using same register */
2760 for(ii
=0; ii
<3; ii
++)
2762 if( (PROGRAM_TEMPORARY
== pILInst
->SrcReg
[ii
].File
)
2763 && (pILInst
->DstReg
.Index
== pILInst
->SrcReg
[ii
].Index
) )
2765 bReplaceDst
= GL_TRUE
;
2770 if(0xF != pILInst
->DstReg
.WriteMask
)
2771 { /* OP3 has no support for write mask */
2772 bReplaceDst
= GL_TRUE
;
2775 if(GL_TRUE
== bReplaceDst
)
2777 tmp
= gethelpr(pAsm
);
2779 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2780 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2781 pAsm
->D
.dst
.reg
= tmp
;
2783 nomask_PVSDST(&(pAsm
->D
.dst
));
2787 if( GL_FALSE
== assemble_dst(pAsm
) )
2793 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2798 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
2803 if( GL_FALSE
== assemble_src(pAsm
, 2, -1) )
2808 if ( GL_FALSE
== next_ins(pAsm
) )
2813 if (GL_TRUE
== bReplaceDst
)
2815 if( GL_FALSE
== assemble_dst(pAsm
) )
2820 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
2823 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2824 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
2825 pAsm
->S
[0].src
.reg
= tmp
;
2827 noneg_PVSSRC(&(pAsm
->S
[0].src
));
2828 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
2830 if( GL_FALSE
== next_ins(pAsm
) )
2840 GLboolean
assemble_LIT(r700_AssemblerBase
*pAsm
)
2842 unsigned int dstReg
;
2843 unsigned int dstType
;
2844 unsigned int srcReg
;
2845 unsigned int srcType
;
2847 int tmp
= gethelpr(pAsm
);
2849 if( GL_FALSE
== assemble_dst(pAsm
) )
2853 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2857 dstReg
= pAsm
->D
.dst
.reg
;
2858 dstType
= pAsm
->D
.dst
.rtype
;
2859 srcReg
= pAsm
->S
[0].src
.reg
;
2860 srcType
= pAsm
->S
[0].src
.rtype
;
2862 /* dst.xw, <- 1.0 */
2863 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
2864 pAsm
->D
.dst
.rtype
= dstType
;
2865 pAsm
->D
.dst
.reg
= dstReg
;
2866 pAsm
->D
.dst
.writex
= 1;
2867 pAsm
->D
.dst
.writey
= 0;
2868 pAsm
->D
.dst
.writez
= 0;
2869 pAsm
->D
.dst
.writew
= 1;
2870 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
2871 pAsm
->S
[0].src
.reg
= tmp
;
2872 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2873 noneg_PVSSRC(&(pAsm
->S
[0].src
));
2874 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_1
;
2875 pAsm
->S
[0].src
.swizzley
= SQ_SEL_1
;
2876 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_1
;
2877 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_1
;
2878 if( GL_FALSE
== next_ins(pAsm
) )
2883 /* dst.y = max(src.x, 0.0) */
2884 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
2885 pAsm
->D
.dst
.rtype
= dstType
;
2886 pAsm
->D
.dst
.reg
= dstReg
;
2887 pAsm
->D
.dst
.writex
= 0;
2888 pAsm
->D
.dst
.writey
= 1;
2889 pAsm
->D
.dst
.writez
= 0;
2890 pAsm
->D
.dst
.writew
= 0;
2891 pAsm
->S
[0].src
.rtype
= srcType
;
2892 pAsm
->S
[0].src
.reg
= srcReg
;
2893 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2894 noneg_PVSSRC(&(pAsm
->S
[0].src
));
2895 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_X
;
2896 pAsm
->S
[0].src
.swizzley
= SQ_SEL_X
;
2897 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_X
;
2898 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_X
;
2899 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
2900 pAsm
->S
[1].src
.reg
= tmp
;
2901 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
2902 noneg_PVSSRC(&(pAsm
->S
[1].src
));
2903 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_0
;
2904 pAsm
->S
[1].src
.swizzley
= SQ_SEL_0
;
2905 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_0
;
2906 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_0
;
2907 if( GL_FALSE
== next_ins(pAsm
) )
2912 /* before: dst.w = log(src.y)
2913 * after : dst.x = log(src.y)
2914 * why change dest register is that dst.w has been initialized as 1 before
2916 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_LOG_CLAMPED
;
2917 pAsm
->D
.dst
.math
= 1;
2918 pAsm
->D
.dst
.rtype
= dstType
;
2919 pAsm
->D
.dst
.reg
= dstReg
;
2920 pAsm
->D
.dst
.writex
= 1;
2921 pAsm
->D
.dst
.writey
= 0;
2922 pAsm
->D
.dst
.writez
= 0;
2923 pAsm
->D
.dst
.writew
= 0;
2924 pAsm
->S
[0].src
.rtype
= srcType
;
2925 pAsm
->S
[0].src
.reg
= srcReg
;
2926 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2927 noneg_PVSSRC(&(pAsm
->S
[0].src
));
2928 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_Y
;
2929 pAsm
->S
[0].src
.swizzley
= SQ_SEL_Y
;
2930 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_Y
;
2931 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_Y
;
2932 if( GL_FALSE
== next_ins(pAsm
) )
2937 /* before: tmp.x = amd MUL_LIT(src.w, dst.w, src.x ) */
2938 /* after : tmp.x = amd MUL_LIT(src.w, dst.x, src.x ) */
2939 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MUL_LIT
;
2940 pAsm
->D
.dst
.op3
= 1;
2941 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2942 pAsm
->D
.dst
.reg
= tmp
;
2943 pAsm
->D
.dst
.writex
= 1;
2944 pAsm
->D
.dst
.writey
= 0;
2945 pAsm
->D
.dst
.writez
= 0;
2946 pAsm
->D
.dst
.writew
= 0;
2948 pAsm
->S
[0].src
.rtype
= srcType
;
2949 pAsm
->S
[0].src
.reg
= srcReg
;
2950 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2951 noneg_PVSSRC(&(pAsm
->S
[0].src
));
2952 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_W
;
2953 pAsm
->S
[0].src
.swizzley
= SQ_SEL_W
;
2954 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_W
;
2955 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_W
;
2957 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
2958 pAsm
->S
[1].src
.reg
= dstReg
;
2959 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
2960 noneg_PVSSRC(&(pAsm
->S
[1].src
));
2961 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_X
;
2962 pAsm
->S
[1].src
.swizzley
= SQ_SEL_X
;
2963 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_X
;
2964 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_X
;
2966 pAsm
->S
[2].src
.rtype
= srcType
;
2967 pAsm
->S
[2].src
.reg
= srcReg
;
2968 setaddrmode_PVSSRC(&(pAsm
->S
[2].src
), ADDR_ABSOLUTE
);
2969 noneg_PVSSRC(&(pAsm
->S
[2].src
));
2970 pAsm
->S
[2].src
.swizzlex
= SQ_SEL_X
;
2971 pAsm
->S
[2].src
.swizzley
= SQ_SEL_X
;
2972 pAsm
->S
[2].src
.swizzlez
= SQ_SEL_X
;
2973 pAsm
->S
[2].src
.swizzlew
= SQ_SEL_X
;
2975 if( GL_FALSE
== next_ins(pAsm
) )
2980 /* dst.z = exp(tmp.x) */
2981 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
2982 pAsm
->D
.dst
.math
= 1;
2983 pAsm
->D
.dst
.rtype
= dstType
;
2984 pAsm
->D
.dst
.reg
= dstReg
;
2985 pAsm
->D
.dst
.writex
= 0;
2986 pAsm
->D
.dst
.writey
= 0;
2987 pAsm
->D
.dst
.writez
= 1;
2988 pAsm
->D
.dst
.writew
= 0;
2990 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
2991 pAsm
->S
[0].src
.reg
= tmp
;
2992 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2993 noneg_PVSSRC(&(pAsm
->S
[0].src
));
2994 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_X
;
2995 pAsm
->S
[0].src
.swizzley
= SQ_SEL_X
;
2996 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_X
;
2997 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_X
;
2999 if( GL_FALSE
== next_ins(pAsm
) )
3007 GLboolean
assemble_MAX(r700_AssemblerBase
*pAsm
)
3009 if( GL_FALSE
== checkop2(pAsm
) )
3014 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
3016 if( GL_FALSE
== assemble_dst(pAsm
) )
3021 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3026 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3031 if( GL_FALSE
== next_ins(pAsm
) )
3039 GLboolean
assemble_MIN(r700_AssemblerBase
*pAsm
)
3041 if( GL_FALSE
== checkop2(pAsm
) )
3046 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MIN
;
3048 if( GL_FALSE
== assemble_dst(pAsm
) )
3053 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3058 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3063 if( GL_FALSE
== next_ins(pAsm
) )
3071 GLboolean
assemble_MOV(r700_AssemblerBase
*pAsm
)
3075 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3077 if (GL_FALSE
== assemble_dst(pAsm
))
3082 if (GL_FALSE
== assemble_src(pAsm
, 0, -1))
3087 if ( GL_FALSE
== next_ins(pAsm
) )
3095 GLboolean
assemble_MUL(r700_AssemblerBase
*pAsm
)
3097 if( GL_FALSE
== checkop2(pAsm
) )
3102 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
3104 if( GL_FALSE
== assemble_dst(pAsm
) )
3109 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3114 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3119 if( GL_FALSE
== next_ins(pAsm
) )
3127 GLboolean
assemble_POW(r700_AssemblerBase
*pAsm
)
3133 tmp
= gethelpr(pAsm
);
3135 // LG2 tmp.x, a.swizzle
3136 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_LOG_IEEE
;
3137 pAsm
->D
.dst
.math
= 1;
3139 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3140 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3141 pAsm
->D
.dst
.reg
= tmp
;
3142 nomask_PVSDST(&(pAsm
->D
.dst
));
3144 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3149 if( GL_FALSE
== next_ins(pAsm
) )
3154 // MUL tmp.x, tmp.x, b.swizzle
3155 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
3157 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3158 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3159 pAsm
->D
.dst
.reg
= tmp
;
3160 nomask_PVSDST(&(pAsm
->D
.dst
));
3162 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3163 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3164 pAsm
->S
[0].src
.reg
= tmp
;
3165 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3166 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3168 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3173 if( GL_FALSE
== next_ins(pAsm
) )
3178 // EX2 dst.mask, tmp.x
3180 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
3181 pAsm
->D
.dst
.math
= 1;
3183 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3184 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3185 pAsm
->D
.dst
.reg
= tmp
;
3186 nomask_PVSDST(&(pAsm
->D
.dst
));
3188 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3189 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3190 pAsm
->S
[0].src
.reg
= tmp
;
3191 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3192 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3194 if( GL_FALSE
== next_ins(pAsm
) )
3199 // Now replicate result to all necessary channels in destination
3200 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3202 if( GL_FALSE
== assemble_dst(pAsm
) )
3207 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3208 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3209 pAsm
->S
[0].src
.reg
= tmp
;
3211 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3212 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3214 if( GL_FALSE
== next_ins(pAsm
) )
3222 GLboolean
assemble_RCP(r700_AssemblerBase
*pAsm
)
3224 return assemble_math_function(pAsm
, SQ_OP2_INST_RECIP_IEEE
);
3227 GLboolean
assemble_RSQ(r700_AssemblerBase
*pAsm
)
3229 return assemble_math_function(pAsm
, SQ_OP2_INST_RECIPSQRT_IEEE
);
3232 GLboolean
assemble_SIN(r700_AssemblerBase
*pAsm
)
3234 return assemble_math_function(pAsm
, SQ_OP2_INST_SIN
);
3237 GLboolean
assemble_SCS(r700_AssemblerBase
*pAsm
)
3243 tmp
= gethelpr(pAsm
);
3246 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_COS
;
3247 pAsm
->D
.dst
.math
= 1;
3249 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3250 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3251 pAsm
->D
.dst
.reg
= tmp
;
3252 pAsm
->D
.dst
.writex
= 1;
3254 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3259 if ( GL_FALSE
== next_ins(pAsm
) )
3265 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_SIN
;
3266 pAsm
->D
.dst
.math
= 1;
3268 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3269 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3270 pAsm
->D
.dst
.reg
= tmp
;
3271 pAsm
->D
.dst
.writey
= 1;
3273 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3278 if( GL_FALSE
== next_ins(pAsm
) )
3283 // MOV dst.mask, tmp
3284 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3286 if( GL_FALSE
== assemble_dst(pAsm
) )
3291 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3292 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3293 pAsm
->S
[0].src
.reg
= tmp
;
3295 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3296 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_0
;
3297 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_0
;
3299 if ( GL_FALSE
== next_ins(pAsm
) )
3307 GLboolean
assemble_SGE(r700_AssemblerBase
*pAsm
)
3309 if( GL_FALSE
== checkop2(pAsm
) )
3314 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_SETGE
;
3316 if( GL_FALSE
== assemble_dst(pAsm
) )
3321 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3326 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3331 if( GL_FALSE
== next_ins(pAsm
) )
3339 GLboolean
assemble_SLT(r700_AssemblerBase
*pAsm
)
3341 if( GL_FALSE
== checkop2(pAsm
) )
3346 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_SETGT
;
3348 if( GL_FALSE
== assemble_dst(pAsm
) )
3353 if( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
3358 if( GL_FALSE
== assemble_src(pAsm
, 1, 0) )
3363 if( GL_FALSE
== next_ins(pAsm
) )
3371 GLboolean
assemble_STP(r700_AssemblerBase
*pAsm
)
3376 GLboolean
assemble_TEX(r700_AssemblerBase
*pAsm
)
3378 GLboolean src_const
;
3380 switch (pAsm
->pILInst
[pAsm
->uiCurInst
].SrcReg
[0].File
)
3382 case PROGRAM_CONSTANT
:
3383 case PROGRAM_LOCAL_PARAM
:
3384 case PROGRAM_ENV_PARAM
:
3385 case PROGRAM_STATE_VAR
:
3386 src_const
= GL_TRUE
;
3388 case PROGRAM_TEMPORARY
:
3391 src_const
= GL_FALSE
;
3395 if (GL_TRUE
== src_const
)
3397 if ( GL_FALSE
== mov_temp(pAsm
, 0) )
3401 switch (pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
)
3404 pAsm
->D
.dst
.opcode
= SQ_TEX_INST_SAMPLE
;
3407 radeon_error("do not support TXB yet\n");
3411 /* TODO : tex proj version : divid first 3 components by 4th */
3412 pAsm
->D
.dst
.opcode
= SQ_TEX_INST_SAMPLE
;
3415 radeon_error("Internal error: bad texture op (not TEX)\n");
3420 // Set src1 to tex unit id
3421 pAsm
->S
[1].src
.reg
= pAsm
->pILInst
[pAsm
->uiCurInst
].TexSrcUnit
;
3422 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
3424 //No sw info from mesa compiler, so hard code here.
3425 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_X
;
3426 pAsm
->S
[1].src
.swizzley
= SQ_SEL_Y
;
3427 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_Z
;
3428 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_W
;
3430 if( GL_FALSE
== tex_dst(pAsm
) )
3435 if( GL_FALSE
== tex_src(pAsm
) )
3440 if ( GL_FALSE
== next_ins(pAsm
) )
3448 GLboolean
assemble_XPD(r700_AssemblerBase
*pAsm
)
3452 if( GL_FALSE
== checkop2(pAsm
) )
3457 tmp
= gethelpr(pAsm
);
3459 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
3461 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3462 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3463 pAsm
->D
.dst
.reg
= tmp
;
3464 nomask_PVSDST(&(pAsm
->D
.dst
));
3466 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3471 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3476 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_Y
, SQ_SEL_0
);
3477 swizzleagain_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Y
, SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_0
);
3479 if( GL_FALSE
== next_ins(pAsm
) )
3484 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
3485 pAsm
->D
.dst
.op3
= 1;
3487 if(0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
3489 tmp
= gethelpr(pAsm
);
3491 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3492 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3493 pAsm
->D
.dst
.reg
= tmp
;
3495 nomask_PVSDST(&(pAsm
->D
.dst
));
3499 if( GL_FALSE
== assemble_dst(pAsm
) )
3505 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3510 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3515 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Y
, SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_0
);
3516 swizzleagain_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_Y
, SQ_SEL_0
);
3518 // result1 + (neg) result0
3519 setaddrmode_PVSSRC(&(pAsm
->S
[2].src
),ADDR_ABSOLUTE
);
3520 pAsm
->S
[2].src
.rtype
= SRC_REG_TEMPORARY
;
3521 pAsm
->S
[2].src
.reg
= tmp
;
3523 neg_PVSSRC(&(pAsm
->S
[2].src
));
3524 noswizzle_PVSSRC(&(pAsm
->S
[2].src
));
3526 if( GL_FALSE
== next_ins(pAsm
) )
3532 if(0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
3534 if( GL_FALSE
== assemble_dst(pAsm
) )
3539 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3541 // Use tmp as source
3542 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3543 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3544 pAsm
->S
[0].src
.reg
= tmp
;
3546 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3547 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3549 if( GL_FALSE
== next_ins(pAsm
) )
3558 GLboolean
assemble_EXPORT(r700_AssemblerBase
*pAsm
)
3563 GLboolean
assemble_IF(r700_AssemblerBase
*pAsm
)
3568 GLboolean
assemble_ENDIF(r700_AssemblerBase
*pAsm
)
3573 GLboolean
AssembleInstr(GLuint uiNumberInsts
,
3574 struct prog_instruction
*pILInst
,
3575 r700_AssemblerBase
*pR700AsmCode
)
3579 pR700AsmCode
->pILInst
= pILInst
;
3580 for(i
=0; i
<uiNumberInsts
; i
++)
3582 pR700AsmCode
->uiCurInst
= i
;
3584 switch (pILInst
[i
].Opcode
)
3587 if ( GL_FALSE
== assemble_ABS(pR700AsmCode
) )
3592 if ( GL_FALSE
== assemble_ADD(pR700AsmCode
) )
3597 radeon_error("Not yet implemented instruction OPCODE_ARL \n");
3598 //if ( GL_FALSE == assemble_BAD("ARL") )
3602 radeon_error("Not yet implemented instruction OPCODE_ARR \n");
3603 //if ( GL_FALSE == assemble_BAD("ARR") )
3608 if ( GL_FALSE
== assemble_CMP(pR700AsmCode
) )
3612 if ( GL_FALSE
== assemble_COS(pR700AsmCode
) )
3619 if ( GL_FALSE
== assemble_DOT(pR700AsmCode
) )
3624 if ( GL_FALSE
== assemble_DST(pR700AsmCode
) )
3629 if ( GL_FALSE
== assemble_EX2(pR700AsmCode
) )
3633 radeon_error("Not yet implemented instruction OPCODE_EXP \n");
3634 //if ( GL_FALSE == assemble_BAD("EXP") )
3636 break; // approx of EX2
3639 if ( GL_FALSE
== assemble_FLR(pR700AsmCode
) )
3643 // if ( GL_FALSE == assemble_FLR_INT() )
3648 if ( GL_FALSE
== assemble_FRC(pR700AsmCode
) )
3653 if ( GL_FALSE
== assemble_KIL(pR700AsmCode
) )
3657 if ( GL_FALSE
== assemble_LG2(pR700AsmCode
) )
3661 if ( GL_FALSE
== assemble_LIT(pR700AsmCode
) )
3665 if ( GL_FALSE
== assemble_LRP(pR700AsmCode
) )
3669 radeon_error("Not yet implemented instruction OPCODE_LOG \n");
3670 //if ( GL_FALSE == assemble_BAD("LOG") )
3672 break; // approx of LG2
3675 if ( GL_FALSE
== assemble_MAD(pR700AsmCode
) )
3679 if ( GL_FALSE
== assemble_MAX(pR700AsmCode
) )
3683 if ( GL_FALSE
== assemble_MIN(pR700AsmCode
) )
3688 if ( GL_FALSE
== assemble_MOV(pR700AsmCode
) )
3692 if ( GL_FALSE
== assemble_MUL(pR700AsmCode
) )
3697 if ( GL_FALSE
== assemble_POW(pR700AsmCode
) )
3701 if ( GL_FALSE
== assemble_RCP(pR700AsmCode
) )
3705 if ( GL_FALSE
== assemble_RSQ(pR700AsmCode
) )
3709 if ( GL_FALSE
== assemble_SIN(pR700AsmCode
) )
3713 if ( GL_FALSE
== assemble_SCS(pR700AsmCode
) )
3718 if ( GL_FALSE
== assemble_SGE(pR700AsmCode
) )
3722 if ( GL_FALSE
== assemble_SLT(pR700AsmCode
) )
3727 // if ( GL_FALSE == assemble_STP(pR700AsmCode) )
3732 if ( GL_FALSE
== assemble_MOV(pR700AsmCode
) )
3738 if( (i
+1)<uiNumberInsts
)
3740 if(OPCODE_END
!= pILInst
[i
+1].Opcode
)
3742 if( GL_TRUE
== IsTex(pILInst
[i
+1].Opcode
) )
3744 pR700AsmCode
->pInstDeps
[i
+1].nDstDep
= i
+1; //=1?
3754 if ( GL_FALSE
== assemble_TEX(pR700AsmCode
) )
3759 if ( GL_FALSE
== assemble_XPD(pR700AsmCode
) )
3764 if ( GL_FALSE
== assemble_IF(pR700AsmCode
) )
3768 radeon_error("Not yet implemented instruction OPCODE_ELSE \n");
3769 //if ( GL_FALSE == assemble_BAD("ELSE") )
3773 if ( GL_FALSE
== assemble_ENDIF(pR700AsmCode
) )
3777 //case OPCODE_EXPORT:
3778 // if ( GL_FALSE == assemble_EXPORT() )
3783 //pR700AsmCode->uiCurInst = i;
3784 //This is to remaind that if in later exoort there is depth/stencil
3785 //export, we need a mov to re-arrange DST channel, where using a
3786 //psuedo inst, we will use this end inst to do it.
3790 radeon_error("internal: unknown instruction\n");
3798 GLboolean
Process_Export(r700_AssemblerBase
* pAsm
,
3800 GLuint export_starting_index
,
3801 GLuint export_count
,
3802 GLuint starting_register_number
,
3803 GLboolean is_depth_export
)
3805 unsigned char ucWriteMask
;
3807 check_current_clause(pAsm
, CF_EMPTY_CLAUSE
);
3808 check_current_clause(pAsm
, CF_EXPORT_CLAUSE
); //alloc the cf_current_export_clause_ptr
3810 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.type
= type
;
3814 case SQ_EXPORT_PIXEL
:
3815 if(GL_TRUE
== is_depth_export
)
3817 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= SQ_CF_PIXEL_Z
;
3821 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= SQ_CF_PIXEL_MRT0
+ export_starting_index
;
3826 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= SQ_CF_POS_0
+ export_starting_index
;
3829 case SQ_EXPORT_PARAM
:
3830 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= 0x0 + export_starting_index
;
3834 radeon_error("Unknown export type: %d\n", type
);
3839 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.rw_gpr
= starting_register_number
;
3841 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.rw_rel
= SQ_ABSOLUTE
;
3842 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.index_gpr
= 0x0;
3843 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.elem_size
= 0x3;
3845 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.burst_count
= (export_count
- 1);
3846 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
3847 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
3848 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT
; // _DONE
3849 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
3850 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
3852 if (export_count
== 1)
3854 ucWriteMask
= pAsm
->pucOutMask
[starting_register_number
- pAsm
->starting_export_register_number
];
3855 /* exports Z as a float into Red channel */
3856 if (GL_TRUE
== is_depth_export
)
3859 if( (ucWriteMask
& 0x1) != 0)
3861 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_X
;
3865 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_MASK
;
3867 if( ((ucWriteMask
>>1) & 0x1) != 0)
3869 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_Y
;
3873 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_MASK
;
3875 if( ((ucWriteMask
>>2) & 0x1) != 0)
3877 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_Z
;
3881 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_MASK
;
3883 if( ((ucWriteMask
>>3) & 0x1) != 0)
3885 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_W
;
3889 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_MASK
;
3894 // This should only be used if all components for all registers have been written
3895 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_X
;
3896 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_Y
;
3897 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_Z
;
3898 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_W
;
3901 pAsm
->cf_last_export_ptr
= pAsm
->cf_current_export_clause_ptr
;
3906 GLboolean
Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase
*pAsm
, BITS depth_channel_select
)
3908 gl_inst_opcode Opcode_save
= pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
; //Should be OPCODE_END
3909 pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
= OPCODE_MOV
;
3911 // MOV depth_export_register.hw_depth_channel, depth_export_register.depth_channel_select
3913 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3915 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3916 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3917 pAsm
->D
.dst
.reg
= pAsm
->depth_export_register_number
;
3919 pAsm
->D
.dst
.writex
= 1; // depth goes in R channel for HW
3921 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3922 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3923 pAsm
->S
[0].src
.reg
= pAsm
->depth_export_register_number
;
3925 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), depth_channel_select
);
3927 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3929 if( GL_FALSE
== next_ins(pAsm
) )
3934 pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
= Opcode_save
;
3939 GLboolean
Process_Fragment_Exports(r700_AssemblerBase
*pR700AsmCode
,
3940 GLbitfield OutputsWritten
)
3944 if(pR700AsmCode
->depth_export_register_number
>= 0)
3946 if( GL_FALSE
== Move_Depth_Exports_To_Correct_Channels(pR700AsmCode
, SQ_SEL_Z
) ) // depth
3952 unBit
= 1 << FRAG_RESULT_COLOR
;
3953 if(OutputsWritten
& unBit
)
3955 if( GL_FALSE
== Process_Export(pR700AsmCode
,
3959 pR700AsmCode
->uiFP_OutputMap
[FRAG_RESULT_COLOR
],
3965 unBit
= 1 << FRAG_RESULT_DEPTH
;
3966 if(OutputsWritten
& unBit
)
3968 if( GL_FALSE
== Process_Export(pR700AsmCode
,
3972 pR700AsmCode
->uiFP_OutputMap
[FRAG_RESULT_DEPTH
],
3979 if(pR700AsmCode
->cf_last_export_ptr
!= NULL
)
3981 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
3982 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.end_of_program
= 0x1;
3988 GLboolean
Process_Vertex_Exports(r700_AssemblerBase
*pR700AsmCode
,
3989 GLbitfield OutputsWritten
)
3994 GLuint export_starting_index
= 0;
3995 GLuint export_count
= pR700AsmCode
->number_of_exports
;
3997 unBit
= 1 << VERT_RESULT_HPOS
;
3998 if(OutputsWritten
& unBit
)
4000 if( GL_FALSE
== Process_Export(pR700AsmCode
,
4002 export_starting_index
,
4004 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_HPOS
],
4012 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
4015 pR700AsmCode
->number_of_exports
= export_count
;
4017 unBit
= 1 << VERT_RESULT_COL0
;
4018 if(OutputsWritten
& unBit
)
4020 if( GL_FALSE
== Process_Export(pR700AsmCode
,
4022 export_starting_index
,
4024 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_COL0
],
4030 export_starting_index
++;
4033 unBit
= 1 << VERT_RESULT_COL1
;
4034 if(OutputsWritten
& unBit
)
4036 if( GL_FALSE
== Process_Export(pR700AsmCode
,
4038 export_starting_index
,
4040 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_COL1
],
4046 export_starting_index
++;
4049 unBit
= 1 << VERT_RESULT_FOGC
;
4050 if(OutputsWritten
& unBit
)
4052 if( GL_FALSE
== Process_Export(pR700AsmCode
,
4054 export_starting_index
,
4056 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_FOGC
],
4062 export_starting_index
++;
4067 unBit
= 1 << (VERT_RESULT_TEX0
+ i
);
4068 if(OutputsWritten
& unBit
)
4070 if( GL_FALSE
== Process_Export(pR700AsmCode
,
4072 export_starting_index
,
4074 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_TEX0
+ i
],
4080 export_starting_index
++;
4084 // At least one param should be exported
4087 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
4091 if( GL_FALSE
== Process_Export(pR700AsmCode
,
4095 pR700AsmCode
->starting_export_register_number
,
4101 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_0
;
4102 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_0
;
4103 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_0
;
4104 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_1
;
4105 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
4108 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.end_of_program
= 0x1;
4113 GLboolean
Clean_Up_Assembler(r700_AssemblerBase
*pR700AsmCode
)
4115 FREE(pR700AsmCode
->pucOutMask
);
4116 FREE(pR700AsmCode
->pInstDeps
);