2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
33 #include "main/mtypes.h"
34 #include "main/imports.h"
36 #include "radeon_debug.h"
37 #include "r600_context.h"
39 #include "r700_assembler.h"
41 BITS
addrmode_PVSDST(PVSDST
* pPVSDST
)
43 return pPVSDST
->addrmode0
| ((BITS
)pPVSDST
->addrmode1
<< 1);
46 void setaddrmode_PVSDST(PVSDST
* pPVSDST
, BITS addrmode
)
48 pPVSDST
->addrmode0
= addrmode
& 1;
49 pPVSDST
->addrmode1
= (addrmode
>> 1) & 1;
52 void nomask_PVSDST(PVSDST
* pPVSDST
)
54 pPVSDST
->writex
= pPVSDST
->writey
= pPVSDST
->writez
= pPVSDST
->writew
= 1;
57 BITS
addrmode_PVSSRC(PVSSRC
* pPVSSRC
)
59 return pPVSSRC
->addrmode0
| ((BITS
)pPVSSRC
->addrmode1
<< 1);
62 void setaddrmode_PVSSRC(PVSSRC
* pPVSSRC
, BITS addrmode
)
64 pPVSSRC
->addrmode0
= addrmode
& 1;
65 pPVSSRC
->addrmode1
= (addrmode
>> 1) & 1;
69 void setswizzle_PVSSRC(PVSSRC
* pPVSSRC
, BITS swz
)
74 pPVSSRC
->swizzlew
= swz
;
77 void noswizzle_PVSSRC(PVSSRC
* pPVSSRC
)
79 pPVSSRC
->swizzlex
= SQ_SEL_X
;
80 pPVSSRC
->swizzley
= SQ_SEL_Y
;
81 pPVSSRC
->swizzlez
= SQ_SEL_Z
;
82 pPVSSRC
->swizzlew
= SQ_SEL_W
;
86 swizzleagain_PVSSRC(PVSSRC
* pPVSSRC
, BITS x
, BITS y
, BITS z
, BITS w
)
90 case SQ_SEL_X
: x
= pPVSSRC
->swizzlex
;
92 case SQ_SEL_Y
: x
= pPVSSRC
->swizzley
;
94 case SQ_SEL_Z
: x
= pPVSSRC
->swizzlez
;
96 case SQ_SEL_W
: x
= pPVSSRC
->swizzlew
;
103 case SQ_SEL_X
: y
= pPVSSRC
->swizzlex
;
105 case SQ_SEL_Y
: y
= pPVSSRC
->swizzley
;
107 case SQ_SEL_Z
: y
= pPVSSRC
->swizzlez
;
109 case SQ_SEL_W
: y
= pPVSSRC
->swizzlew
;
116 case SQ_SEL_X
: z
= pPVSSRC
->swizzlex
;
118 case SQ_SEL_Y
: z
= pPVSSRC
->swizzley
;
120 case SQ_SEL_Z
: z
= pPVSSRC
->swizzlez
;
122 case SQ_SEL_W
: z
= pPVSSRC
->swizzlew
;
129 case SQ_SEL_X
: w
= pPVSSRC
->swizzlex
;
131 case SQ_SEL_Y
: w
= pPVSSRC
->swizzley
;
133 case SQ_SEL_Z
: w
= pPVSSRC
->swizzlez
;
135 case SQ_SEL_W
: w
= pPVSSRC
->swizzlew
;
140 pPVSSRC
->swizzlex
= x
;
141 pPVSSRC
->swizzley
= y
;
142 pPVSSRC
->swizzlez
= z
;
143 pPVSSRC
->swizzlew
= w
;
146 void neg_PVSSRC(PVSSRC
* pPVSSRC
)
154 void noneg_PVSSRC(PVSSRC
* pPVSSRC
)
162 // negate argument (for SUB instead of ADD and alike)
163 void flipneg_PVSSRC(PVSSRC
* pPVSSRC
)
165 pPVSSRC
->negx
= !pPVSSRC
->negx
;
166 pPVSSRC
->negy
= !pPVSSRC
->negy
;
167 pPVSSRC
->negz
= !pPVSSRC
->negz
;
168 pPVSSRC
->negw
= !pPVSSRC
->negw
;
171 void zerocomp_PVSSRC(PVSSRC
* pPVSSRC
, int c
)
175 case 0: pPVSSRC
->swizzlex
= SQ_SEL_0
; pPVSSRC
->negx
= 0; break;
176 case 1: pPVSSRC
->swizzley
= SQ_SEL_0
; pPVSSRC
->negy
= 0; break;
177 case 2: pPVSSRC
->swizzlez
= SQ_SEL_0
; pPVSSRC
->negz
= 0; break;
178 case 3: pPVSSRC
->swizzlew
= SQ_SEL_0
; pPVSSRC
->negw
= 0; break;
183 void onecomp_PVSSRC(PVSSRC
* pPVSSRC
, int c
)
187 case 0: pPVSSRC
->swizzlex
= SQ_SEL_1
; pPVSSRC
->negx
= 0; break;
188 case 1: pPVSSRC
->swizzley
= SQ_SEL_1
; pPVSSRC
->negy
= 0; break;
189 case 2: pPVSSRC
->swizzlez
= SQ_SEL_1
; pPVSSRC
->negz
= 0; break;
190 case 3: pPVSSRC
->swizzlew
= SQ_SEL_1
; pPVSSRC
->negw
= 0; break;
195 BITS
is_misc_component_exported(VAP_OUT_VTX_FMT_0
* pOutVTXFmt0
)
197 return (pOutVTXFmt0
->point_size
|
198 pOutVTXFmt0
->edge_flag
|
199 pOutVTXFmt0
->rta_index
|
200 pOutVTXFmt0
->kill_flag
|
201 pOutVTXFmt0
->viewport_index
);
204 BITS
is_depth_component_exported(OUT_FRAGMENT_FMT_0
* pFPOutFmt
)
206 return (pFPOutFmt
->depth
|
207 pFPOutFmt
->stencil_ref
|
209 pFPOutFmt
->coverage_to_mask
);
212 GLboolean
is_reduction_opcode(PVSDWORD
* dest
)
214 if (dest
->dst
.op3
== 0)
216 if ( (dest
->dst
.opcode
== SQ_OP2_INST_DOT4
|| dest
->dst
.opcode
== SQ_OP2_INST_DOT4_IEEE
|| dest
->dst
.opcode
== SQ_OP2_INST_CUBE
) )
224 GLuint
GetSurfaceFormat(GLenum eType
, GLuint nChannels
, GLuint
* pClient_size
)
226 GLuint format
= FMT_INVALID
;
227 GLuint uiElemSize
= 0;
232 case GL_UNSIGNED_BYTE
:
237 format
= FMT_8
; break;
239 format
= FMT_8_8
; break;
241 format
= FMT_8_8_8
; break;
243 format
= FMT_8_8_8_8
; break;
249 case GL_UNSIGNED_SHORT
:
255 format
= FMT_16
; break;
257 format
= FMT_16_16
; break;
259 format
= FMT_16_16_16
; break;
261 format
= FMT_16_16_16_16
; break;
267 case GL_UNSIGNED_INT
:
273 format
= FMT_32
; break;
275 format
= FMT_32_32
; break;
277 format
= FMT_32_32_32
; break;
279 format
= FMT_32_32_32_32
; break;
290 format
= FMT_32_FLOAT
; break;
292 format
= FMT_32_32_FLOAT
; break;
294 format
= FMT_32_32_32_FLOAT
; break;
296 format
= FMT_32_32_32_32_FLOAT
; break;
306 format
= FMT_32_FLOAT
; break;
308 format
= FMT_32_32_FLOAT
; break;
310 format
= FMT_32_32_32_FLOAT
; break;
312 format
= FMT_32_32_32_32_FLOAT
; break;
319 //GL_ASSERT_NO_CASE();
322 if(NULL
!= pClient_size
)
324 *pClient_size
= uiElemSize
* nChannels
;
330 unsigned int r700GetNumOperands(r700_AssemblerBase
* pAsm
)
337 switch (pAsm
->D
.dst
.opcode
)
339 case SQ_OP2_INST_ADD
:
340 case SQ_OP2_INST_KILLGT
:
341 case SQ_OP2_INST_MUL
:
342 case SQ_OP2_INST_MAX
:
343 case SQ_OP2_INST_MIN
:
344 //case SQ_OP2_INST_MAX_DX10:
345 //case SQ_OP2_INST_MIN_DX10:
346 case SQ_OP2_INST_SETGT
:
347 case SQ_OP2_INST_SETGE
:
348 case SQ_OP2_INST_PRED_SETE
:
349 case SQ_OP2_INST_PRED_SETGT
:
350 case SQ_OP2_INST_PRED_SETGE
:
351 case SQ_OP2_INST_PRED_SETNE
:
352 case SQ_OP2_INST_DOT4
:
353 case SQ_OP2_INST_DOT4_IEEE
:
354 case SQ_OP2_INST_CUBE
:
357 case SQ_OP2_INST_MOV
:
358 case SQ_OP2_INST_MOVA_FLOOR
:
359 case SQ_OP2_INST_FRACT
:
360 case SQ_OP2_INST_FLOOR
:
361 case SQ_OP2_INST_EXP_IEEE
:
362 case SQ_OP2_INST_LOG_CLAMPED
:
363 case SQ_OP2_INST_LOG_IEEE
:
364 case SQ_OP2_INST_RECIP_IEEE
:
365 case SQ_OP2_INST_RECIPSQRT_IEEE
:
366 case SQ_OP2_INST_FLT_TO_INT
:
367 case SQ_OP2_INST_SIN
:
368 case SQ_OP2_INST_COS
:
371 default: radeon_error(
372 "Need instruction operand number for %x.\n", pAsm
->D
.dst
.opcode
);
378 int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt
, r700_AssemblerBase
* pAsm
, R700_Shader
* pShader
)
382 Init_R700_Shader(pShader
);
383 pAsm
->pR700Shader
= pShader
;
384 pAsm
->currentShaderType
= spt
;
386 pAsm
->cf_last_export_ptr
= NULL
;
388 pAsm
->cf_current_export_clause_ptr
= NULL
;
389 pAsm
->cf_current_alu_clause_ptr
= NULL
;
390 pAsm
->cf_current_tex_clause_ptr
= NULL
;
391 pAsm
->cf_current_vtx_clause_ptr
= NULL
;
392 pAsm
->cf_current_cf_clause_ptr
= NULL
;
394 // No clause has been created yet
395 pAsm
->cf_current_clause_type
= CF_EMPTY_CLAUSE
;
397 pAsm
->number_of_colorandz_exports
= 0;
398 pAsm
->number_of_exports
= 0;
399 pAsm
->number_of_export_opcodes
= 0;
407 pAsm
->uLastPosUpdate
= 0;
409 *(BITS
*) &pAsm
->fp_stOutFmt0
= 0;
413 pAsm
->number_used_registers
= 0;
414 pAsm
->uUsedConsts
= 256;
418 pAsm
->uBoolConsts
= 0;
419 pAsm
->uIntConsts
= 0;
424 pAsm
->fc_stack
[0].type
= FC_NONE
;
426 pAsm
->branch_depth
= 0;
427 pAsm
->max_branch_depth
= 0;
432 pAsm
->aArgSubst
[3] = (-1);
436 for (i
=0; i
<NUMBER_OF_OUTPUT_COLORS
; i
++)
438 pAsm
->color_export_register_number
[i
] = (-1);
442 pAsm
->depth_export_register_number
= (-1);
443 pAsm
->stencil_export_register_number
= (-1);
444 pAsm
->coverage_to_mask_export_register_number
= (-1);
445 pAsm
->mask_export_register_number
= (-1);
447 pAsm
->starting_export_register_number
= 0;
448 pAsm
->starting_vfetch_register_number
= 0;
449 pAsm
->starting_temp_register_number
= 0;
450 pAsm
->uFirstHelpReg
= 0;
453 pAsm
->input_position_is_used
= GL_FALSE
;
454 pAsm
->input_normal_is_used
= GL_FALSE
;
457 for (i
=0; i
<NUMBER_OF_INPUT_COLORS
; i
++)
459 pAsm
->input_color_is_used
[ i
] = GL_FALSE
;
462 for (i
=0; i
<NUMBER_OF_TEXTURE_UNITS
; i
++)
464 pAsm
->input_texture_unit_is_used
[ i
] = GL_FALSE
;
467 for (i
=0; i
<VERT_ATTRIB_MAX
; i
++)
469 pAsm
->vfetch_instruction_ptr_array
[ i
] = NULL
;
472 pAsm
->number_of_inputs
= 0;
474 pAsm
->is_tex
= GL_FALSE
;
475 pAsm
->need_tex_barrier
= GL_FALSE
;
480 GLboolean
IsTex(gl_inst_opcode Opcode
)
482 if( (OPCODE_TEX
==Opcode
) || (OPCODE_TXP
==Opcode
) || (OPCODE_TXB
==Opcode
) )
489 GLboolean
IsAlu(gl_inst_opcode Opcode
)
491 //TODO : more for fc and ex for higher spec.
499 int check_current_clause(r700_AssemblerBase
* pAsm
,
500 CF_CLAUSE_TYPE new_clause_type
)
502 if (pAsm
->cf_current_clause_type
!= new_clause_type
)
503 { //Close last open clause
504 switch (pAsm
->cf_current_clause_type
)
507 if ( pAsm
->cf_current_alu_clause_ptr
!= NULL
)
509 pAsm
->cf_current_alu_clause_ptr
= NULL
;
513 if ( pAsm
->cf_current_vtx_clause_ptr
!= NULL
)
515 pAsm
->cf_current_vtx_clause_ptr
= NULL
;
519 if ( pAsm
->cf_current_tex_clause_ptr
!= NULL
)
521 pAsm
->cf_current_tex_clause_ptr
= NULL
;
524 case CF_EXPORT_CLAUSE
:
525 if ( pAsm
->cf_current_export_clause_ptr
!= NULL
)
527 pAsm
->cf_current_export_clause_ptr
= NULL
;
530 case CF_OTHER_CLAUSE
:
531 if ( pAsm
->cf_current_cf_clause_ptr
!= NULL
)
533 pAsm
->cf_current_cf_clause_ptr
= NULL
;
536 case CF_EMPTY_CLAUSE
:
540 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type
);
544 pAsm
->cf_current_clause_type
= CF_EMPTY_CLAUSE
;
547 switch (new_clause_type
)
550 pAsm
->cf_current_clause_type
= CF_ALU_CLAUSE
;
553 pAsm
->cf_current_clause_type
= CF_VTX_CLAUSE
;
556 pAsm
->cf_current_clause_type
= CF_TEX_CLAUSE
;
558 case CF_EXPORT_CLAUSE
:
560 R700ControlFlowSXClause
* pR700ControlFlowSXClause
561 = (R700ControlFlowSXClause
*) CALLOC_STRUCT(R700ControlFlowSXClause
);
563 // Add new export instruction to control flow program
564 if (pR700ControlFlowSXClause
!= 0)
566 pAsm
->cf_current_export_clause_ptr
= pR700ControlFlowSXClause
;
567 Init_R700ControlFlowSXClause(pR700ControlFlowSXClause
);
568 AddCFInstruction( pAsm
->pR700Shader
,
569 (R700ControlFlowInstruction
*)pR700ControlFlowSXClause
);
574 "Error allocating new EXPORT CF instruction in check_current_clause. \n");
577 pAsm
->cf_current_clause_type
= CF_EXPORT_CLAUSE
;
580 case CF_EMPTY_CLAUSE
:
582 case CF_OTHER_CLAUSE
:
583 pAsm
->cf_current_clause_type
= CF_OTHER_CLAUSE
;
587 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type
);
595 GLboolean
add_vfetch_instruction(r700_AssemblerBase
* pAsm
,
596 R700VertexInstruction
* vertex_instruction_ptr
)
598 if( GL_FALSE
== check_current_clause(pAsm
, CF_VTX_CLAUSE
) )
603 if( pAsm
->cf_current_vtx_clause_ptr
== NULL
||
604 ( (pAsm
->cf_current_vtx_clause_ptr
!= NULL
) &&
605 (pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.count
>= GetCFMaxInstructions(pAsm
->cf_current_vtx_clause_ptr
->m_ShaderInstType
)-1)
608 // Create new Vfetch control flow instruction for this new clause
609 pAsm
->cf_current_vtx_clause_ptr
= (R700ControlFlowGenericClause
*) CALLOC_STRUCT(R700ControlFlowGenericClause
);
611 if (pAsm
->cf_current_vtx_clause_ptr
!= NULL
)
613 Init_R700ControlFlowGenericClause(pAsm
->cf_current_vtx_clause_ptr
);
614 AddCFInstruction( pAsm
->pR700Shader
,
615 (R700ControlFlowInstruction
*)pAsm
->cf_current_vtx_clause_ptr
);
619 radeon_error("Could not allocate a new VFetch CF instruction.\n");
623 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.pop_count
= 0x0;
624 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
625 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
626 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.count
= 0x0;
627 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
628 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
629 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_VTX
;
630 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
631 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
633 LinkVertexInstruction(pAsm
->cf_current_vtx_clause_ptr
, vertex_instruction_ptr
);
637 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.count
++;
640 AddVTXInstruction(pAsm
->pR700Shader
, vertex_instruction_ptr
);
645 GLboolean
add_tex_instruction(r700_AssemblerBase
* pAsm
,
646 R700TextureInstruction
* tex_instruction_ptr
)
648 if ( GL_FALSE
== check_current_clause(pAsm
, CF_TEX_CLAUSE
) )
653 if ( pAsm
->cf_current_tex_clause_ptr
== NULL
||
654 ( (pAsm
->cf_current_tex_clause_ptr
!= NULL
) &&
655 (pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.count
>= GetCFMaxInstructions(pAsm
->cf_current_tex_clause_ptr
->m_ShaderInstType
)-1)
658 // new tex cf instruction for this new clause
659 pAsm
->cf_current_tex_clause_ptr
= (R700ControlFlowGenericClause
*) CALLOC_STRUCT(R700ControlFlowGenericClause
);
661 if (pAsm
->cf_current_tex_clause_ptr
!= NULL
)
663 Init_R700ControlFlowGenericClause(pAsm
->cf_current_tex_clause_ptr
);
664 AddCFInstruction( pAsm
->pR700Shader
,
665 (R700ControlFlowInstruction
*)pAsm
->cf_current_tex_clause_ptr
);
669 radeon_error("Could not allocate a new TEX CF instruction.\n");
673 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.pop_count
= 0x0;
674 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
675 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
677 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
678 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
679 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_TEX
;
680 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
681 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.barrier
= 0x0; //0x1;
685 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.count
++;
688 // If this clause constains any TEX instruction that is dependent on a previous instruction,
689 // set the barrier bit
690 if( pAsm
->pInstDeps
[pAsm
->uiCurInst
].nDstDep
> (-1) || pAsm
->need_tex_barrier
== GL_TRUE
)
692 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
695 if(NULL
== pAsm
->cf_current_tex_clause_ptr
->m_pLinkedTEXInstruction
)
697 pAsm
->cf_current_tex_clause_ptr
->m_pLinkedTEXInstruction
= tex_instruction_ptr
;
698 tex_instruction_ptr
->m_pLinkedGenericClause
= pAsm
->cf_current_tex_clause_ptr
;
701 AddTEXInstruction(pAsm
->pR700Shader
, tex_instruction_ptr
);
706 GLboolean
assemble_vfetch_instruction(r700_AssemblerBase
* pAsm
,
708 GLuint destination_register
,
709 GLuint number_of_elements
,
710 GLenum dataElementType
,
711 VTX_FETCH_METHOD
* pFetchMethod
)
713 GLuint client_size_inbyte
;
715 GLuint mega_fetch_count
;
716 GLuint is_mega_fetch_flag
;
718 R700VertexGenericFetch
* vfetch_instruction_ptr
;
719 R700VertexGenericFetch
* assembled_vfetch_instruction_ptr
= pAsm
->vfetch_instruction_ptr_array
[ gl_client_id
];
721 if (assembled_vfetch_instruction_ptr
== NULL
)
723 vfetch_instruction_ptr
= (R700VertexGenericFetch
*) CALLOC_STRUCT(R700VertexGenericFetch
);
724 if (vfetch_instruction_ptr
== NULL
)
728 Init_R700VertexGenericFetch(vfetch_instruction_ptr
);
732 vfetch_instruction_ptr
= assembled_vfetch_instruction_ptr
;
735 data_format
= GetSurfaceFormat(dataElementType
, number_of_elements
, &client_size_inbyte
);
737 if(GL_TRUE
== pFetchMethod
->bEnableMini
) //More conditions here
743 mega_fetch_count
= MEGA_FETCH_BYTES
- 1;
744 is_mega_fetch_flag
= 0x1;
745 pFetchMethod
->mega_fetch_remainder
= MEGA_FETCH_BYTES
- client_size_inbyte
;
748 vfetch_instruction_ptr
->m_Word0
.f
.vtx_inst
= SQ_VTX_INST_FETCH
;
749 vfetch_instruction_ptr
->m_Word0
.f
.fetch_type
= SQ_VTX_FETCH_VERTEX_DATA
;
750 vfetch_instruction_ptr
->m_Word0
.f
.fetch_whole_quad
= 0x0;
752 vfetch_instruction_ptr
->m_Word0
.f
.buffer_id
= gl_client_id
;
753 vfetch_instruction_ptr
->m_Word0
.f
.src_gpr
= 0x0;
754 vfetch_instruction_ptr
->m_Word0
.f
.src_rel
= SQ_ABSOLUTE
;
755 vfetch_instruction_ptr
->m_Word0
.f
.src_sel_x
= SQ_SEL_X
;
756 vfetch_instruction_ptr
->m_Word0
.f
.mega_fetch_count
= mega_fetch_count
;
758 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_x
= (number_of_elements
< 1) ? SQ_SEL_0
: SQ_SEL_X
;
759 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_y
= (number_of_elements
< 2) ? SQ_SEL_0
: SQ_SEL_Y
;
760 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_z
= (number_of_elements
< 3) ? SQ_SEL_0
: SQ_SEL_Z
;
761 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_w
= (number_of_elements
< 4) ? SQ_SEL_1
: SQ_SEL_W
;
763 vfetch_instruction_ptr
->m_Word1
.f
.use_const_fields
= 1;
765 // Destination register
766 vfetch_instruction_ptr
->m_Word1_GPR
.f
.dst_gpr
= destination_register
;
767 vfetch_instruction_ptr
->m_Word1_GPR
.f
.dst_rel
= SQ_ABSOLUTE
;
769 vfetch_instruction_ptr
->m_Word2
.f
.offset
= 0;
770 vfetch_instruction_ptr
->m_Word2
.f
.const_buf_no_stride
= 0x0;
772 vfetch_instruction_ptr
->m_Word2
.f
.mega_fetch
= is_mega_fetch_flag
;
774 if (assembled_vfetch_instruction_ptr
== NULL
)
776 if ( GL_FALSE
== add_vfetch_instruction(pAsm
, (R700VertexInstruction
*)vfetch_instruction_ptr
) )
781 if (pAsm
->vfetch_instruction_ptr_array
[ gl_client_id
] != NULL
)
787 pAsm
->vfetch_instruction_ptr_array
[ gl_client_id
] = vfetch_instruction_ptr
;
794 GLboolean
assemble_vfetch_instruction2(r700_AssemblerBase
* pAsm
,
795 GLuint destination_register
,
801 VTX_FETCH_METHOD
* pFetchMethod
)
803 GLuint client_size_inbyte
;
805 GLuint mega_fetch_count
;
806 GLuint is_mega_fetch_flag
;
808 R700VertexGenericFetch
* vfetch_instruction_ptr
;
809 R700VertexGenericFetch
* assembled_vfetch_instruction_ptr
810 = pAsm
->vfetch_instruction_ptr_array
[element
];
812 if (assembled_vfetch_instruction_ptr
== NULL
)
814 vfetch_instruction_ptr
= (R700VertexGenericFetch
*) CALLOC_STRUCT(R700VertexGenericFetch
);
815 if (vfetch_instruction_ptr
== NULL
)
819 Init_R700VertexGenericFetch(vfetch_instruction_ptr
);
823 vfetch_instruction_ptr
= assembled_vfetch_instruction_ptr
;
826 data_format
= GetSurfaceFormat(type
, size
, &client_size_inbyte
);
828 if(GL_TRUE
== pFetchMethod
->bEnableMini
) //More conditions here
834 mega_fetch_count
= MEGA_FETCH_BYTES
- 1;
835 is_mega_fetch_flag
= 0x1;
836 pFetchMethod
->mega_fetch_remainder
= MEGA_FETCH_BYTES
- client_size_inbyte
;
839 vfetch_instruction_ptr
->m_Word0
.f
.vtx_inst
= SQ_VTX_INST_FETCH
;
840 vfetch_instruction_ptr
->m_Word0
.f
.fetch_type
= SQ_VTX_FETCH_VERTEX_DATA
;
841 vfetch_instruction_ptr
->m_Word0
.f
.fetch_whole_quad
= 0x0;
843 vfetch_instruction_ptr
->m_Word0
.f
.buffer_id
= element
;
844 vfetch_instruction_ptr
->m_Word0
.f
.src_gpr
= 0x0;
845 vfetch_instruction_ptr
->m_Word0
.f
.src_rel
= SQ_ABSOLUTE
;
846 vfetch_instruction_ptr
->m_Word0
.f
.src_sel_x
= SQ_SEL_X
;
847 vfetch_instruction_ptr
->m_Word0
.f
.mega_fetch_count
= mega_fetch_count
;
849 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_x
= (size
< 1) ? SQ_SEL_0
: SQ_SEL_X
;
850 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_y
= (size
< 2) ? SQ_SEL_0
: SQ_SEL_Y
;
851 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_z
= (size
< 3) ? SQ_SEL_0
: SQ_SEL_Z
;
852 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_w
= (size
< 4) ? SQ_SEL_1
: SQ_SEL_W
;
854 vfetch_instruction_ptr
->m_Word1
.f
.use_const_fields
= 1;
855 vfetch_instruction_ptr
->m_Word1
.f
.data_format
= data_format
;
856 vfetch_instruction_ptr
->m_Word2
.f
.endian_swap
= SQ_ENDIAN_NONE
;
860 vfetch_instruction_ptr
->m_Word1
.f
.format_comp_all
= SQ_FORMAT_COMP_SIGNED
;
864 vfetch_instruction_ptr
->m_Word1
.f
.format_comp_all
= SQ_FORMAT_COMP_UNSIGNED
;
867 if(GL_TRUE
== normalize
)
869 vfetch_instruction_ptr
->m_Word1
.f
.num_format_all
= SQ_NUM_FORMAT_NORM
;
873 vfetch_instruction_ptr
->m_Word1
.f
.num_format_all
= SQ_NUM_FORMAT_INT
;
876 // Destination register
877 vfetch_instruction_ptr
->m_Word1_GPR
.f
.dst_gpr
= destination_register
;
878 vfetch_instruction_ptr
->m_Word1_GPR
.f
.dst_rel
= SQ_ABSOLUTE
;
880 vfetch_instruction_ptr
->m_Word2
.f
.offset
= 0;
881 vfetch_instruction_ptr
->m_Word2
.f
.const_buf_no_stride
= 0x0;
883 vfetch_instruction_ptr
->m_Word2
.f
.mega_fetch
= is_mega_fetch_flag
;
885 if (assembled_vfetch_instruction_ptr
== NULL
)
887 if ( GL_FALSE
== add_vfetch_instruction(pAsm
, (R700VertexInstruction
*)vfetch_instruction_ptr
) )
892 if (pAsm
->vfetch_instruction_ptr_array
[element
] != NULL
)
898 pAsm
->vfetch_instruction_ptr_array
[element
] = vfetch_instruction_ptr
;
905 GLboolean
cleanup_vfetch_instructions(r700_AssemblerBase
* pAsm
)
908 pAsm
->cf_current_clause_type
= CF_EMPTY_CLAUSE
;
909 pAsm
->cf_current_vtx_clause_ptr
= NULL
;
911 for (i
=0; i
<VERT_ATTRIB_MAX
; i
++)
913 pAsm
->vfetch_instruction_ptr_array
[ i
] = NULL
;
916 cleanup_vfetch_shaderinst(pAsm
->pR700Shader
);
921 GLuint
gethelpr(r700_AssemblerBase
* pAsm
)
923 GLuint r
= pAsm
->uHelpReg
;
925 if (pAsm
->uHelpReg
> pAsm
->number_used_registers
)
927 pAsm
->number_used_registers
= pAsm
->uHelpReg
;
931 void resethelpr(r700_AssemblerBase
* pAsm
)
933 pAsm
->uHelpReg
= pAsm
->uFirstHelpReg
;
936 void checkop_init(r700_AssemblerBase
* pAsm
)
942 pAsm
->aArgSubst
[3] = -1;
945 GLboolean
mov_temp(r700_AssemblerBase
* pAsm
, int src
)
947 GLuint tmp
= gethelpr(pAsm
);
949 //mov src to temp helper gpr.
950 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
952 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
954 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
955 pAsm
->D
.dst
.reg
= tmp
;
957 nomask_PVSDST(&(pAsm
->D
.dst
));
959 if( GL_FALSE
== assemble_src(pAsm
, src
, 0) )
964 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
965 noneg_PVSSRC(&(pAsm
->S
[0].src
));
967 if( GL_FALSE
== next_ins(pAsm
) )
972 pAsm
->aArgSubst
[1 + src
] = tmp
;
977 GLboolean
checkop1(r700_AssemblerBase
* pAsm
)
983 GLboolean
checkop2(r700_AssemblerBase
* pAsm
)
985 GLboolean bSrcConst
[2];
986 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
990 if( (pILInst
->SrcReg
[0].File
== PROGRAM_CONSTANT
) ||
991 (pILInst
->SrcReg
[0].File
== PROGRAM_LOCAL_PARAM
) ||
992 (pILInst
->SrcReg
[0].File
== PROGRAM_ENV_PARAM
) ||
993 (pILInst
->SrcReg
[0].File
== PROGRAM_STATE_VAR
) )
995 bSrcConst
[0] = GL_TRUE
;
999 bSrcConst
[0] = GL_FALSE
;
1001 if( (pILInst
->SrcReg
[1].File
== PROGRAM_CONSTANT
) ||
1002 (pILInst
->SrcReg
[1].File
== PROGRAM_LOCAL_PARAM
) ||
1003 (pILInst
->SrcReg
[1].File
== PROGRAM_ENV_PARAM
) ||
1004 (pILInst
->SrcReg
[1].File
== PROGRAM_STATE_VAR
) )
1006 bSrcConst
[1] = GL_TRUE
;
1010 bSrcConst
[1] = GL_FALSE
;
1013 if( (bSrcConst
[0] == GL_TRUE
) && (bSrcConst
[1] == GL_TRUE
) )
1015 if(pILInst
->SrcReg
[0].Index
!= pILInst
->SrcReg
[1].Index
)
1017 if( GL_FALSE
== mov_temp(pAsm
, 1) )
1027 GLboolean
checkop3(r700_AssemblerBase
* pAsm
)
1029 GLboolean bSrcConst
[3];
1030 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1034 if( (pILInst
->SrcReg
[0].File
== PROGRAM_CONSTANT
) ||
1035 (pILInst
->SrcReg
[0].File
== PROGRAM_LOCAL_PARAM
) ||
1036 (pILInst
->SrcReg
[0].File
== PROGRAM_ENV_PARAM
) ||
1037 (pILInst
->SrcReg
[0].File
== PROGRAM_STATE_VAR
) )
1039 bSrcConst
[0] = GL_TRUE
;
1043 bSrcConst
[0] = GL_FALSE
;
1045 if( (pILInst
->SrcReg
[1].File
== PROGRAM_CONSTANT
) ||
1046 (pILInst
->SrcReg
[1].File
== PROGRAM_LOCAL_PARAM
) ||
1047 (pILInst
->SrcReg
[1].File
== PROGRAM_ENV_PARAM
) ||
1048 (pILInst
->SrcReg
[1].File
== PROGRAM_STATE_VAR
) )
1050 bSrcConst
[1] = GL_TRUE
;
1054 bSrcConst
[1] = GL_FALSE
;
1056 if( (pILInst
->SrcReg
[2].File
== PROGRAM_CONSTANT
) ||
1057 (pILInst
->SrcReg
[2].File
== PROGRAM_LOCAL_PARAM
) ||
1058 (pILInst
->SrcReg
[2].File
== PROGRAM_ENV_PARAM
) ||
1059 (pILInst
->SrcReg
[2].File
== PROGRAM_STATE_VAR
) )
1061 bSrcConst
[2] = GL_TRUE
;
1065 bSrcConst
[2] = GL_FALSE
;
1068 if( (GL_TRUE
== bSrcConst
[0]) &&
1069 (GL_TRUE
== bSrcConst
[1]) &&
1070 (GL_TRUE
== bSrcConst
[2]) )
1072 if( GL_FALSE
== mov_temp(pAsm
, 1) )
1076 if( GL_FALSE
== mov_temp(pAsm
, 2) )
1083 else if( (GL_TRUE
== bSrcConst
[0]) &&
1084 (GL_TRUE
== bSrcConst
[1]) )
1086 if(pILInst
->SrcReg
[0].Index
!= pILInst
->SrcReg
[1].Index
)
1088 if( GL_FALSE
== mov_temp(pAsm
, 1) )
1096 else if ( (GL_TRUE
== bSrcConst
[0]) &&
1097 (GL_TRUE
== bSrcConst
[2]) )
1099 if(pILInst
->SrcReg
[0].Index
!= pILInst
->SrcReg
[2].Index
)
1101 if( GL_FALSE
== mov_temp(pAsm
, 2) )
1109 else if( (GL_TRUE
== bSrcConst
[1]) &&
1110 (GL_TRUE
== bSrcConst
[2]) )
1112 if(pILInst
->SrcReg
[1].Index
!= pILInst
->SrcReg
[2].Index
)
1114 if( GL_FALSE
== mov_temp(pAsm
, 2) )
1126 GLboolean
assemble_src(r700_AssemblerBase
*pAsm
,
1130 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1137 if(pAsm
->aArgSubst
[1+src
] >= 0)
1139 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1140 pAsm
->S
[fld
].src
.rtype
= SRC_REG_TEMPORARY
;
1141 pAsm
->S
[fld
].src
.reg
= pAsm
->aArgSubst
[1+src
];
1145 switch (pILInst
->SrcReg
[src
].File
)
1147 case PROGRAM_TEMPORARY
:
1148 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1149 pAsm
->S
[fld
].src
.rtype
= SRC_REG_TEMPORARY
;
1150 pAsm
->S
[fld
].src
.reg
= pILInst
->SrcReg
[src
].Index
+ pAsm
->starting_temp_register_number
;
1152 case PROGRAM_CONSTANT
:
1153 case PROGRAM_LOCAL_PARAM
:
1154 case PROGRAM_ENV_PARAM
:
1155 case PROGRAM_STATE_VAR
:
1156 if (1 == pILInst
->SrcReg
[src
].RelAddr
)
1158 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_RELATIVE_A0
);
1162 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1165 pAsm
->S
[fld
].src
.rtype
= SRC_REG_CONSTANT
;
1166 pAsm
->S
[fld
].src
.reg
= pILInst
->SrcReg
[src
].Index
;
1169 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1170 pAsm
->S
[fld
].src
.rtype
= SRC_REG_INPUT
;
1171 switch (pAsm
->currentShaderType
)
1174 pAsm
->S
[fld
].src
.reg
= pAsm
->uiFP_AttributeMap
[pILInst
->SrcReg
[src
].Index
];
1177 pAsm
->S
[fld
].src
.reg
= pAsm
->ucVP_AttributeMap
[pILInst
->SrcReg
[src
].Index
];
1182 radeon_error("Invalid source argument type\n");
1187 pAsm
->S
[fld
].src
.swizzlex
= pILInst
->SrcReg
[src
].Swizzle
& 0x7;
1188 pAsm
->S
[fld
].src
.swizzley
= (pILInst
->SrcReg
[src
].Swizzle
>> 3) & 0x7;
1189 pAsm
->S
[fld
].src
.swizzlez
= (pILInst
->SrcReg
[src
].Swizzle
>> 6) & 0x7;
1190 pAsm
->S
[fld
].src
.swizzlew
= (pILInst
->SrcReg
[src
].Swizzle
>> 9) & 0x7;
1192 pAsm
->S
[fld
].src
.negx
= pILInst
->SrcReg
[src
].Negate
& 0x1;
1193 pAsm
->S
[fld
].src
.negy
= (pILInst
->SrcReg
[src
].Negate
>> 1) & 0x1;
1194 pAsm
->S
[fld
].src
.negz
= (pILInst
->SrcReg
[src
].Negate
>> 2) & 0x1;
1195 pAsm
->S
[fld
].src
.negw
= (pILInst
->SrcReg
[src
].Negate
>> 3) & 0x1;
1200 GLboolean
assemble_dst(r700_AssemblerBase
*pAsm
)
1202 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1203 switch (pILInst
->DstReg
.File
)
1205 case PROGRAM_TEMPORARY
:
1206 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1207 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
1208 pAsm
->D
.dst
.reg
= pILInst
->DstReg
.Index
+ pAsm
->starting_temp_register_number
;
1210 case PROGRAM_ADDRESS
:
1211 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1212 pAsm
->D
.dst
.rtype
= DST_REG_A0
;
1213 pAsm
->D
.dst
.reg
= 0;
1215 case PROGRAM_OUTPUT
:
1216 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1217 pAsm
->D
.dst
.rtype
= DST_REG_OUT
;
1218 switch (pAsm
->currentShaderType
)
1221 pAsm
->D
.dst
.reg
= pAsm
->uiFP_OutputMap
[pILInst
->DstReg
.Index
];
1224 pAsm
->D
.dst
.reg
= pAsm
->ucVP_OutputMap
[pILInst
->DstReg
.Index
];
1229 radeon_error("Invalid destination output argument type\n");
1233 pAsm
->D
.dst
.writex
= pILInst
->DstReg
.WriteMask
& 0x1;
1234 pAsm
->D
.dst
.writey
= (pILInst
->DstReg
.WriteMask
>> 1) & 0x1;
1235 pAsm
->D
.dst
.writez
= (pILInst
->DstReg
.WriteMask
>> 2) & 0x1;
1236 pAsm
->D
.dst
.writew
= (pILInst
->DstReg
.WriteMask
>> 3) & 0x1;
1241 GLboolean
tex_dst(r700_AssemblerBase
*pAsm
)
1243 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1245 if(PROGRAM_TEMPORARY
== pILInst
->DstReg
.File
)
1247 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
1248 pAsm
->D
.dst
.reg
= pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.Index
+ pAsm
->starting_temp_register_number
;
1250 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1252 else if(PROGRAM_OUTPUT
== pILInst
->DstReg
.File
)
1254 pAsm
->D
.dst
.rtype
= DST_REG_OUT
;
1255 switch (pAsm
->currentShaderType
)
1258 pAsm
->D
.dst
.reg
= pAsm
->uiFP_OutputMap
[pILInst
->DstReg
.Index
];
1261 pAsm
->D
.dst
.reg
= pAsm
->ucVP_OutputMap
[pILInst
->DstReg
.Index
];
1265 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1269 radeon_error("Invalid destination output argument type\n");
1273 pAsm
->D
.dst
.writex
= pILInst
->DstReg
.WriteMask
& 0x1;
1274 pAsm
->D
.dst
.writey
= (pILInst
->DstReg
.WriteMask
>> 1) & 0x1;
1275 pAsm
->D
.dst
.writez
= (pILInst
->DstReg
.WriteMask
>> 2) & 0x1;
1276 pAsm
->D
.dst
.writew
= (pILInst
->DstReg
.WriteMask
>> 3) & 0x1;
1281 GLboolean
tex_src(r700_AssemblerBase
*pAsm
)
1283 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1285 GLboolean bValidTexCoord
= GL_FALSE
;
1287 if(pAsm
->aArgSubst
[1] >= 0)
1289 bValidTexCoord
= GL_TRUE
;
1290 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
1291 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
1292 pAsm
->S
[0].src
.reg
= pAsm
->aArgSubst
[1];
1296 switch (pILInst
->SrcReg
[0].File
) {
1297 case PROGRAM_CONSTANT
:
1298 case PROGRAM_LOCAL_PARAM
:
1299 case PROGRAM_ENV_PARAM
:
1300 case PROGRAM_STATE_VAR
:
1302 case PROGRAM_TEMPORARY
:
1303 bValidTexCoord
= GL_TRUE
;
1304 pAsm
->S
[0].src
.reg
= pILInst
->SrcReg
[0].Index
+
1305 pAsm
->starting_temp_register_number
;
1306 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
1309 switch (pILInst
->SrcReg
[0].Index
)
1311 case FRAG_ATTRIB_WPOS
:
1312 case FRAG_ATTRIB_COL0
:
1313 case FRAG_ATTRIB_COL1
:
1314 case FRAG_ATTRIB_FOGC
:
1315 case FRAG_ATTRIB_TEX0
:
1316 case FRAG_ATTRIB_TEX1
:
1317 case FRAG_ATTRIB_TEX2
:
1318 case FRAG_ATTRIB_TEX3
:
1319 case FRAG_ATTRIB_TEX4
:
1320 case FRAG_ATTRIB_TEX5
:
1321 case FRAG_ATTRIB_TEX6
:
1322 case FRAG_ATTRIB_TEX7
:
1323 bValidTexCoord
= GL_TRUE
;
1324 pAsm
->S
[0].src
.reg
=
1325 pAsm
->uiFP_AttributeMap
[pILInst
->SrcReg
[0].Index
];
1326 pAsm
->S
[0].src
.rtype
= SRC_REG_INPUT
;
1328 case FRAG_ATTRIB_FACE
:
1329 fprintf(stderr
, "FRAG_ATTRIB_FACE unsupported\n");
1331 case FRAG_ATTRIB_PNTC
:
1332 fprintf(stderr
, "FRAG_ATTRIB_PNTC unsupported\n");
1334 case FRAG_ATTRIB_VAR0
:
1335 fprintf(stderr
, "FRAG_ATTRIB_VAR0 unsupported\n");
1342 if(GL_TRUE
== bValidTexCoord
)
1344 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
1348 radeon_error("Invalid source texcoord for TEX instruction\n");
1352 pAsm
->S
[0].src
.swizzlex
= pILInst
->SrcReg
[0].Swizzle
& 0x7;
1353 pAsm
->S
[0].src
.swizzley
= (pILInst
->SrcReg
[0].Swizzle
>> 3) & 0x7;
1354 pAsm
->S
[0].src
.swizzlez
= (pILInst
->SrcReg
[0].Swizzle
>> 6) & 0x7;
1355 pAsm
->S
[0].src
.swizzlew
= (pILInst
->SrcReg
[0].Swizzle
>> 9) & 0x7;
1357 pAsm
->S
[0].src
.negx
= pILInst
->SrcReg
[0].Negate
& 0x1;
1358 pAsm
->S
[0].src
.negy
= (pILInst
->SrcReg
[0].Negate
>> 1) & 0x1;
1359 pAsm
->S
[0].src
.negz
= (pILInst
->SrcReg
[0].Negate
>> 2) & 0x1;
1360 pAsm
->S
[0].src
.negw
= (pILInst
->SrcReg
[0].Negate
>> 3) & 0x1;
1365 GLboolean
assemble_tex_instruction(r700_AssemblerBase
*pAsm
, GLboolean normalized
)
1367 PVSSRC
* texture_coordinate_source
;
1368 PVSSRC
* texture_unit_source
;
1370 R700TextureInstruction
* tex_instruction_ptr
= (R700TextureInstruction
*) CALLOC_STRUCT(R700TextureInstruction
);
1371 if (tex_instruction_ptr
== NULL
)
1375 Init_R700TextureInstruction(tex_instruction_ptr
);
1377 texture_coordinate_source
= &(pAsm
->S
[0].src
);
1378 texture_unit_source
= &(pAsm
->S
[1].src
);
1380 tex_instruction_ptr
->m_Word0
.f
.tex_inst
= pAsm
->D
.dst
.opcode
;
1381 tex_instruction_ptr
->m_Word0
.f
.bc_frac_mode
= 0x0;
1382 tex_instruction_ptr
->m_Word0
.f
.fetch_whole_quad
= 0x0;
1384 tex_instruction_ptr
->m_Word0
.f
.resource_id
= texture_unit_source
->reg
;
1386 tex_instruction_ptr
->m_Word1
.f
.lod_bias
= 0x0;
1388 tex_instruction_ptr
->m_Word1
.f
.coord_type_x
= SQ_TEX_NORMALIZED
;
1389 tex_instruction_ptr
->m_Word1
.f
.coord_type_y
= SQ_TEX_NORMALIZED
;
1390 tex_instruction_ptr
->m_Word1
.f
.coord_type_z
= SQ_TEX_NORMALIZED
;
1391 tex_instruction_ptr
->m_Word1
.f
.coord_type_w
= SQ_TEX_NORMALIZED
;
1393 /* XXX: UNNORMALIZED tex coords have limited wrap modes */
1394 tex_instruction_ptr
->m_Word1
.f
.coord_type_x
= SQ_TEX_UNNORMALIZED
;
1395 tex_instruction_ptr
->m_Word1
.f
.coord_type_y
= SQ_TEX_UNNORMALIZED
;
1396 tex_instruction_ptr
->m_Word1
.f
.coord_type_z
= SQ_TEX_UNNORMALIZED
;
1397 tex_instruction_ptr
->m_Word1
.f
.coord_type_w
= SQ_TEX_UNNORMALIZED
;
1400 tex_instruction_ptr
->m_Word2
.f
.offset_x
= 0x0;
1401 tex_instruction_ptr
->m_Word2
.f
.offset_y
= 0x0;
1402 tex_instruction_ptr
->m_Word2
.f
.offset_z
= 0x0;
1404 tex_instruction_ptr
->m_Word2
.f
.sampler_id
= texture_unit_source
->reg
;
1407 if ( (pAsm
->D
.dst
.rtype
== DST_REG_TEMPORARY
) ||
1408 (pAsm
->D
.dst
.rtype
== DST_REG_OUT
) )
1410 tex_instruction_ptr
->m_Word0
.f
.src_gpr
= texture_coordinate_source
->reg
;
1411 tex_instruction_ptr
->m_Word0
.f
.src_rel
= SQ_ABSOLUTE
;
1413 tex_instruction_ptr
->m_Word1
.f
.dst_gpr
= pAsm
->D
.dst
.reg
;
1414 tex_instruction_ptr
->m_Word1
.f
.dst_rel
= SQ_ABSOLUTE
;
1416 tex_instruction_ptr
->m_Word1
.f
.dst_sel_x
= (pAsm
->D
.dst
.writex
? texture_unit_source
->swizzlex
: SQ_SEL_MASK
);
1417 tex_instruction_ptr
->m_Word1
.f
.dst_sel_y
= (pAsm
->D
.dst
.writey
? texture_unit_source
->swizzley
: SQ_SEL_MASK
);
1418 tex_instruction_ptr
->m_Word1
.f
.dst_sel_z
= (pAsm
->D
.dst
.writez
? texture_unit_source
->swizzlez
: SQ_SEL_MASK
);
1419 tex_instruction_ptr
->m_Word1
.f
.dst_sel_w
= (pAsm
->D
.dst
.writew
? texture_unit_source
->swizzlew
: SQ_SEL_MASK
);
1422 tex_instruction_ptr
->m_Word2
.f
.src_sel_x
= texture_coordinate_source
->swizzlex
;
1423 tex_instruction_ptr
->m_Word2
.f
.src_sel_y
= texture_coordinate_source
->swizzley
;
1424 tex_instruction_ptr
->m_Word2
.f
.src_sel_z
= texture_coordinate_source
->swizzlez
;
1425 tex_instruction_ptr
->m_Word2
.f
.src_sel_w
= texture_coordinate_source
->swizzlew
;
1429 radeon_error("Only temp destination registers supported for TEX dest regs.\n");
1433 if( GL_FALSE
== add_tex_instruction(pAsm
, tex_instruction_ptr
) )
1441 void initialize(r700_AssemblerBase
*pAsm
)
1443 GLuint cycle
, component
;
1445 for (cycle
=0; cycle
<NUMBER_OF_CYCLES
; cycle
++)
1447 for (component
=0; component
<NUMBER_OF_COMPONENTS
; component
++)
1449 pAsm
->hw_gpr
[cycle
][component
] = (-1);
1452 for (component
=0; component
<NUMBER_OF_COMPONENTS
; component
++)
1454 pAsm
->hw_cfile_addr
[component
] = (-1);
1455 pAsm
->hw_cfile_chan
[component
] = (-1);
1459 GLboolean
assemble_alu_src(R700ALUInstruction
* alu_instruction_ptr
,
1462 BITS scalar_channel_index
)
1469 //--------------------------------------------------------------------------
1470 // Source for operands src0, src1.
1471 // Values [0,127] correspond to GPR[0..127].
1472 // Values [256,511] correspond to cfile constants c[0..255].
1474 //--------------------------------------------------------------------------
1475 // Other special values are shown in the list below.
1477 // 248 SQ_ALU_SRC_0: special constant 0.0.
1478 // 249 SQ_ALU_SRC_1: special constant 1.0 float.
1480 // 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
1481 // 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
1483 // 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
1484 // 253 SQ_ALU_SRC_LITERAL: literal constant.
1486 // 254 SQ_ALU_SRC_PV: previous vector result.
1487 // 255 SQ_ALU_SRC_PS: previous scalar result.
1488 //--------------------------------------------------------------------------
1490 BITS channel_swizzle
;
1491 switch (scalar_channel_index
)
1493 case 0: channel_swizzle
= pSource
->swizzlex
; break;
1494 case 1: channel_swizzle
= pSource
->swizzley
; break;
1495 case 2: channel_swizzle
= pSource
->swizzlez
; break;
1496 case 3: channel_swizzle
= pSource
->swizzlew
; break;
1497 default: channel_swizzle
= SQ_SEL_MASK
; break;
1500 if(channel_swizzle
== SQ_SEL_0
)
1502 src_sel
= SQ_ALU_SRC_0
;
1504 else if (channel_swizzle
== SQ_SEL_1
)
1506 src_sel
= SQ_ALU_SRC_1
;
1510 if ( (pSource
->rtype
== SRC_REG_TEMPORARY
) ||
1511 (pSource
->rtype
== SRC_REG_INPUT
)
1514 src_sel
= pSource
->reg
;
1516 else if (pSource
->rtype
== SRC_REG_CONSTANT
)
1518 src_sel
= pSource
->reg
+ CFILE_REGISTER_OFFSET
;
1522 radeon_error("Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.\n",
1523 source_index
, pSource
->rtype
);
1528 if( ADDR_ABSOLUTE
== addrmode_PVSSRC(pSource
) )
1530 src_rel
= SQ_ABSOLUTE
;
1534 src_rel
= SQ_RELATIVE
;
1537 switch (channel_swizzle
)
1540 src_chan
= SQ_CHAN_X
;
1543 src_chan
= SQ_CHAN_Y
;
1546 src_chan
= SQ_CHAN_Z
;
1549 src_chan
= SQ_CHAN_W
;
1553 // Does not matter since src_sel controls
1554 src_chan
= SQ_CHAN_X
;
1557 radeon_error("Unknown source select value (%d) in assemble_alu_src().\n", channel_swizzle
);
1562 switch (scalar_channel_index
)
1564 case 0: src_neg
= pSource
->negx
; break;
1565 case 1: src_neg
= pSource
->negy
; break;
1566 case 2: src_neg
= pSource
->negz
; break;
1567 case 3: src_neg
= pSource
->negw
; break;
1568 default: src_neg
= 0; break;
1571 switch (source_index
)
1574 alu_instruction_ptr
->m_Word0
.f
.src0_sel
= src_sel
;
1575 alu_instruction_ptr
->m_Word0
.f
.src0_rel
= src_rel
;
1576 alu_instruction_ptr
->m_Word0
.f
.src0_chan
= src_chan
;
1577 alu_instruction_ptr
->m_Word0
.f
.src0_neg
= src_neg
;
1580 alu_instruction_ptr
->m_Word0
.f
.src1_sel
= src_sel
;
1581 alu_instruction_ptr
->m_Word0
.f
.src1_rel
= src_rel
;
1582 alu_instruction_ptr
->m_Word0
.f
.src1_chan
= src_chan
;
1583 alu_instruction_ptr
->m_Word0
.f
.src1_neg
= src_neg
;
1586 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_sel
= src_sel
;
1587 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_rel
= src_rel
;
1588 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_chan
= src_chan
;
1589 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_neg
= src_neg
;
1592 radeon_error("Only three sources allowed in ALU opcodes.\n");
1600 GLboolean
add_alu_instruction(r700_AssemblerBase
* pAsm
,
1601 R700ALUInstruction
* alu_instruction_ptr
,
1602 GLuint contiguous_slots_needed
)
1604 if( GL_FALSE
== check_current_clause(pAsm
, CF_ALU_CLAUSE
) )
1609 if ( pAsm
->cf_current_alu_clause_ptr
== NULL
||
1610 ( (pAsm
->cf_current_alu_clause_ptr
!= NULL
) &&
1611 (pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
>= (GetCFMaxInstructions(pAsm
->cf_current_alu_clause_ptr
->m_ShaderInstType
)-contiguous_slots_needed
-1) )
1615 //new cf inst for this clause
1616 pAsm
->cf_current_alu_clause_ptr
= (R700ControlFlowALUClause
*) CALLOC_STRUCT(R700ControlFlowALUClause
);
1618 // link the new cf to cf segment
1619 if(NULL
!= pAsm
->cf_current_alu_clause_ptr
)
1621 Init_R700ControlFlowALUClause(pAsm
->cf_current_alu_clause_ptr
);
1622 AddCFInstruction( pAsm
->pR700Shader
,
1623 (R700ControlFlowInstruction
*)pAsm
->cf_current_alu_clause_ptr
);
1627 radeon_error("Could not allocate a new ALU CF instruction.\n");
1631 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_bank0
= 0x0;
1632 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_bank1
= 0x0;
1633 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_mode0
= SQ_CF_KCACHE_NOP
;
1635 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_mode1
= SQ_CF_KCACHE_NOP
;
1636 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_addr0
= 0x0;
1637 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_addr1
= 0x0;
1639 //cf_current_alu_clause_ptr->m_Word1.f.count = number_of_scalar_operations - 1;
1640 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
= 0x0;
1641 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_ALU
;
1643 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
1645 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
1649 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
++;
1652 // If this clause constains any instruction that is forward dependent on a TEX instruction,
1653 // set the whole_quad_mode for this clause
1654 if ( pAsm
->pInstDeps
[pAsm
->uiCurInst
].nDstDep
> (-1) )
1656 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x1;
1659 if (pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
>= (GetCFMaxInstructions(pAsm
->cf_current_alu_clause_ptr
->m_ShaderInstType
)-1) )
1661 alu_instruction_ptr
->m_Word0
.f
.last
= 1;
1664 if(NULL
== pAsm
->cf_current_alu_clause_ptr
->m_pLinkedALUInstruction
)
1666 pAsm
->cf_current_alu_clause_ptr
->m_pLinkedALUInstruction
= alu_instruction_ptr
;
1667 alu_instruction_ptr
->m_pLinkedALUClause
= pAsm
->cf_current_alu_clause_ptr
;
1670 AddALUInstruction(pAsm
->pR700Shader
, alu_instruction_ptr
);
1675 void get_src_properties(R700ALUInstruction
* alu_instruction_ptr
,
1682 switch (source_index
)
1685 *psrc_sel
= alu_instruction_ptr
->m_Word0
.f
.src0_sel
;
1686 *psrc_rel
= alu_instruction_ptr
->m_Word0
.f
.src0_rel
;
1687 *psrc_chan
= alu_instruction_ptr
->m_Word0
.f
.src0_chan
;
1688 *psrc_neg
= alu_instruction_ptr
->m_Word0
.f
.src0_neg
;
1692 *psrc_sel
= alu_instruction_ptr
->m_Word0
.f
.src1_sel
;
1693 *psrc_rel
= alu_instruction_ptr
->m_Word0
.f
.src1_rel
;
1694 *psrc_chan
= alu_instruction_ptr
->m_Word0
.f
.src1_chan
;
1695 *psrc_neg
= alu_instruction_ptr
->m_Word0
.f
.src1_neg
;
1699 *psrc_sel
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_sel
;
1700 *psrc_rel
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_rel
;
1701 *psrc_chan
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_chan
;
1702 *psrc_neg
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_neg
;
1707 int is_cfile(BITS sel
)
1709 if (sel
> 255 && sel
< 512)
1716 int is_const(BITS sel
)
1722 else if(sel
>= SQ_ALU_SRC_0
&& sel
<= SQ_ALU_SRC_LITERAL
)
1729 int is_gpr(BITS sel
)
1731 if (sel
>= 0 && sel
< 128)
1738 const GLuint BANK_SWIZZLE_VEC
[8] = {SQ_ALU_VEC_210
, //000
1739 SQ_ALU_VEC_120
, //001
1740 SQ_ALU_VEC_102
, //010
1742 SQ_ALU_VEC_201
, //011
1743 SQ_ALU_VEC_012
, //100
1744 SQ_ALU_VEC_021
, //101
1746 SQ_ALU_VEC_012
, //110
1747 SQ_ALU_VEC_012
}; //111
1749 const GLuint BANK_SWIZZLE_SCL
[8] = {SQ_ALU_SCL_210
, //000
1750 SQ_ALU_SCL_122
, //001
1751 SQ_ALU_SCL_122
, //010
1753 SQ_ALU_SCL_221
, //011
1754 SQ_ALU_SCL_212
, //100
1755 SQ_ALU_SCL_122
, //101
1757 SQ_ALU_SCL_122
, //110
1758 SQ_ALU_SCL_122
}; //111
1760 GLboolean
reserve_cfile(r700_AssemblerBase
* pAsm
,
1764 int res_match
= (-1);
1765 int res_empty
= (-1);
1769 for (res
=3; res
>=0; res
--)
1771 if(pAsm
->hw_cfile_addr
[ res
] < 0)
1775 else if( (pAsm
->hw_cfile_addr
[res
] == (int)sel
)
1777 (pAsm
->hw_cfile_chan
[ res
] == (int) chan
) )
1785 // Read for this scalar component already reserved, nothing to do here.
1788 else if(res_empty
>= 0)
1790 pAsm
->hw_cfile_addr
[ res_empty
] = sel
;
1791 pAsm
->hw_cfile_chan
[ res_empty
] = chan
;
1795 radeon_error("All cfile read ports are used, cannot reference C$sel, channel $chan.\n");
1801 GLboolean
reserve_gpr(r700_AssemblerBase
* pAsm
, GLuint sel
, GLuint chan
, GLuint cycle
)
1803 if(pAsm
->hw_gpr
[cycle
][chan
] < 0)
1805 pAsm
->hw_gpr
[cycle
][chan
] = sel
;
1807 else if(pAsm
->hw_gpr
[cycle
][chan
] != (int)sel
)
1809 radeon_error("Another scalar operation has already used GPR read port for given channel\n");
1816 GLboolean
cycle_for_scalar_bank_swizzle(const int swiz
, const int sel
, GLuint
* pCycle
)
1820 case SQ_ALU_SCL_210
:
1822 int table
[3] = {2, 1, 0};
1823 *pCycle
= table
[sel
];
1827 case SQ_ALU_SCL_122
:
1829 int table
[3] = {1, 2, 2};
1830 *pCycle
= table
[sel
];
1834 case SQ_ALU_SCL_212
:
1836 int table
[3] = {2, 1, 2};
1837 *pCycle
= table
[sel
];
1841 case SQ_ALU_SCL_221
:
1843 int table
[3] = {2, 2, 1};
1844 *pCycle
= table
[sel
];
1849 radeon_error("Bad Scalar bank swizzle value\n");
1856 GLboolean
cycle_for_vector_bank_swizzle(const int swiz
, const int sel
, GLuint
* pCycle
)
1860 case SQ_ALU_VEC_012
:
1862 int table
[3] = {0, 1, 2};
1863 *pCycle
= table
[sel
];
1866 case SQ_ALU_VEC_021
:
1868 int table
[3] = {0, 2, 1};
1869 *pCycle
= table
[sel
];
1872 case SQ_ALU_VEC_120
:
1874 int table
[3] = {1, 2, 0};
1875 *pCycle
= table
[sel
];
1878 case SQ_ALU_VEC_102
:
1880 int table
[3] = {1, 0, 2};
1881 *pCycle
= table
[sel
];
1884 case SQ_ALU_VEC_201
:
1886 int table
[3] = {2, 0, 1};
1887 *pCycle
= table
[sel
];
1890 case SQ_ALU_VEC_210
:
1892 int table
[3] = {2, 1, 0};
1893 *pCycle
= table
[sel
];
1897 radeon_error("Bad Vec bank swizzle value\n");
1905 GLboolean
check_scalar(r700_AssemblerBase
* pAsm
,
1906 R700ALUInstruction
* alu_instruction_ptr
)
1909 GLuint bank_swizzle
;
1910 GLuint const_count
= 0;
1919 BITS src_sel
[3] = {0,0,0};
1920 BITS src_chan
[3] = {0,0,0};
1921 BITS src_rel
[3] = {0,0,0};
1922 BITS src_neg
[3] = {0,0,0};
1926 GLuint number_of_operands
= r700GetNumOperands(pAsm
);
1928 for (src
=0; src
<number_of_operands
; src
++)
1930 get_src_properties(alu_instruction_ptr
,
1939 swizzle_key
= ( (is_const( src_sel
[0] ) ? 4 : 0) +
1940 (is_const( src_sel
[1] ) ? 2 : 0) +
1941 (is_const( src_sel
[2] ) ? 1 : 0) );
1943 alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
= BANK_SWIZZLE_SCL
[ swizzle_key
];
1945 for (src
=0; src
<number_of_operands
; src
++)
1947 sel
= src_sel
[src
];
1948 chan
= src_chan
[src
];
1949 rel
= src_rel
[src
];
1950 neg
= src_neg
[src
];
1952 if (is_const( sel
))
1954 // Any constant, including literal and inline constants
1957 if (is_cfile( sel
))
1959 reserve_cfile(pAsm
, sel
, chan
);
1965 for (src
=0; src
<number_of_operands
; src
++)
1967 sel
= src_sel
[src
];
1968 chan
= src_chan
[src
];
1969 rel
= src_rel
[src
];
1970 neg
= src_neg
[src
];
1974 bank_swizzle
= alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
;
1976 if( GL_FALSE
== cycle_for_scalar_bank_swizzle(bank_swizzle
, src
, &cycle
) )
1981 if(cycle
< const_count
)
1983 if( GL_FALSE
== reserve_gpr(pAsm
, sel
, chan
, cycle
) )
1994 GLboolean
check_vector(r700_AssemblerBase
* pAsm
,
1995 R700ALUInstruction
* alu_instruction_ptr
)
1998 GLuint bank_swizzle
;
1999 GLuint const_count
= 0;
2008 BITS src_sel
[3] = {0,0,0};
2009 BITS src_chan
[3] = {0,0,0};
2010 BITS src_rel
[3] = {0,0,0};
2011 BITS src_neg
[3] = {0,0,0};
2015 GLuint number_of_operands
= r700GetNumOperands(pAsm
);
2017 for (src
=0; src
<number_of_operands
; src
++)
2019 get_src_properties(alu_instruction_ptr
,
2028 swizzle_key
= ( (is_const( src_sel
[0] ) ? 4 : 0) +
2029 (is_const( src_sel
[1] ) ? 2 : 0) +
2030 (is_const( src_sel
[2] ) ? 1 : 0)
2033 alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
= BANK_SWIZZLE_VEC
[swizzle_key
];
2035 for (src
=0; src
<number_of_operands
; src
++)
2037 sel
= src_sel
[src
];
2038 chan
= src_chan
[src
];
2039 rel
= src_rel
[src
];
2040 neg
= src_neg
[src
];
2043 bank_swizzle
= alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
;
2047 if( GL_FALSE
== cycle_for_vector_bank_swizzle(bank_swizzle
, src
, &cycle
) )
2053 (sel
== src_sel
[0]) &&
2054 (chan
== src_chan
[0]) )
2059 if( GL_FALSE
== reserve_gpr(pAsm
, sel
, chan
, cycle
) )
2065 else if( is_const(sel
) )
2071 if( GL_FALSE
== reserve_cfile(pAsm
, sel
, chan
) )
2082 GLboolean
assemble_alu_instruction(r700_AssemblerBase
*pAsm
)
2084 GLuint number_of_scalar_operations
;
2085 GLboolean is_single_scalar_operation
;
2086 GLuint scalar_channel_index
;
2088 PVSSRC
* pcurrent_source
;
2089 int current_source_index
;
2090 GLuint contiguous_slots_needed
;
2092 GLuint uNumSrc
= r700GetNumOperands(pAsm
);
2093 //GLuint channel_swizzle, j;
2094 //GLuint chan_counter[4] = {0, 0, 0, 0};
2095 //PVSSRC * pSource[3];
2096 GLboolean bSplitInst
= GL_FALSE
;
2098 if (1 == pAsm
->D
.dst
.math
)
2100 is_single_scalar_operation
= GL_TRUE
;
2101 number_of_scalar_operations
= 1;
2105 is_single_scalar_operation
= GL_FALSE
;
2106 number_of_scalar_operations
= 4;
2108 /* current assembler doesn't do more than 1 register per source */
2110 /* check read port, only very preliminary algorithm, not count in
2111 src0/1 same comp case and prev slot repeat case; also not count relative
2112 addressing. TODO: improve performance. */
2113 for(j
=0; j
<uNumSrc
; j
++)
2115 pSource
[j
] = &(pAsm
->S
[j
].src
);
2117 for(scalar_channel_index
=0; scalar_channel_index
<4; scalar_channel_index
++)
2119 for(j
=0; j
<uNumSrc
; j
++)
2121 switch (scalar_channel_index
)
2123 case 0: channel_swizzle
= pSource
[j
]->swizzlex
; break;
2124 case 1: channel_swizzle
= pSource
[j
]->swizzley
; break;
2125 case 2: channel_swizzle
= pSource
[j
]->swizzlez
; break;
2126 case 3: channel_swizzle
= pSource
[j
]->swizzlew
; break;
2127 default: channel_swizzle
= SQ_SEL_MASK
; break;
2129 if ( ((pSource
[j
]->rtype
== SRC_REG_TEMPORARY
) ||
2130 (pSource
[j
]->rtype
== SRC_REG_INPUT
))
2131 && (channel_swizzle
<= SQ_SEL_W
) )
2133 chan_counter
[channel_swizzle
]++;
2137 if( (chan_counter
[SQ_SEL_X
] > 3)
2138 || (chan_counter
[SQ_SEL_Y
] > 3)
2139 || (chan_counter
[SQ_SEL_Z
] > 3)
2140 || (chan_counter
[SQ_SEL_W
] > 3) ) /* each chan bank has only 3 ports. */
2142 bSplitInst
= GL_TRUE
;
2147 contiguous_slots_needed
= 0;
2149 if(GL_TRUE
== is_reduction_opcode(&(pAsm
->D
)) )
2151 contiguous_slots_needed
= 4;
2156 for (scalar_channel_index
=0;
2157 scalar_channel_index
< number_of_scalar_operations
;
2158 scalar_channel_index
++)
2160 R700ALUInstruction
* alu_instruction_ptr
= (R700ALUInstruction
*) CALLOC_STRUCT(R700ALUInstruction
);
2161 if (alu_instruction_ptr
== NULL
)
2165 Init_R700ALUInstruction(alu_instruction_ptr
);
2168 current_source_index
= 0;
2169 pcurrent_source
= &(pAsm
->S
[0].src
);
2171 if (GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2172 current_source_index
,
2174 scalar_channel_index
) )
2182 current_source_index
= 1;
2183 pcurrent_source
= &(pAsm
->S
[current_source_index
].src
);
2185 if (GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2186 current_source_index
,
2188 scalar_channel_index
) )
2195 alu_instruction_ptr
->m_Word0
.f
.index_mode
= SQ_INDEX_AR_X
;
2197 if( (is_single_scalar_operation
== GL_TRUE
)
2198 || (GL_TRUE
== bSplitInst
) )
2200 alu_instruction_ptr
->m_Word0
.f
.last
= 1;
2204 alu_instruction_ptr
->m_Word0
.f
.last
= (scalar_channel_index
== 3) ? 1 : 0;
2207 alu_instruction_ptr
->m_Word0
.f
.pred_sel
= 0x0;
2208 alu_instruction_ptr
->m_Word1_OP2
.f
.update_pred
= 0x0;
2209 alu_instruction_ptr
->m_Word1_OP2
.f
.update_execute_mask
= 0x0;
2212 if( (pAsm
->D
.dst
.rtype
== DST_REG_TEMPORARY
) ||
2213 (pAsm
->D
.dst
.rtype
== DST_REG_OUT
) )
2215 alu_instruction_ptr
->m_Word1
.f
.dst_gpr
= pAsm
->D
.dst
.reg
;
2219 radeon_error("Only temp destination registers supported for ALU dest regs.\n");
2223 alu_instruction_ptr
->m_Word1
.f
.dst_rel
= SQ_ABSOLUTE
; //D.rtype
2225 if ( is_single_scalar_operation
== GL_TRUE
)
2227 // Override scalar_channel_index since only one scalar value will be written
2228 if(pAsm
->D
.dst
.writex
)
2230 scalar_channel_index
= 0;
2232 else if(pAsm
->D
.dst
.writey
)
2234 scalar_channel_index
= 1;
2236 else if(pAsm
->D
.dst
.writez
)
2238 scalar_channel_index
= 2;
2240 else if(pAsm
->D
.dst
.writew
)
2242 scalar_channel_index
= 3;
2246 alu_instruction_ptr
->m_Word1
.f
.dst_chan
= scalar_channel_index
;
2248 alu_instruction_ptr
->m_Word1
.f
.clamp
= pAsm
->pILInst
[pAsm
->uiCurInst
].SaturateMode
;
2250 if (pAsm
->D
.dst
.op3
)
2254 alu_instruction_ptr
->m_Word1_OP3
.f
.alu_inst
= pAsm
->D
.dst
.opcode
;
2256 //There's 3rd src for op3
2257 current_source_index
= 2;
2258 pcurrent_source
= &(pAsm
->S
[current_source_index
].src
);
2260 if ( GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2261 current_source_index
,
2263 scalar_channel_index
) )
2273 alu_instruction_ptr
->m_Word1_OP2
.f6
.alu_inst
= pAsm
->D
.dst
.opcode
;
2275 alu_instruction_ptr
->m_Word1_OP2
.f6
.src0_abs
= 0x0;
2276 alu_instruction_ptr
->m_Word1_OP2
.f6
.src1_abs
= 0x0;
2278 //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
2279 //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0;
2280 switch (scalar_channel_index
)
2283 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writex
;
2286 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writey
;
2289 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writez
;
2292 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writew
;
2295 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= 1; //SQ_SEL_MASK;
2298 alu_instruction_ptr
->m_Word1_OP2
.f6
.omod
= SQ_ALU_OMOD_OFF
;
2302 alu_instruction_ptr
->m_Word1_OP2
.f
.alu_inst
= pAsm
->D
.dst
.opcode
;
2304 alu_instruction_ptr
->m_Word1_OP2
.f
.src0_abs
= 0x0;
2305 alu_instruction_ptr
->m_Word1_OP2
.f
.src1_abs
= 0x0;
2307 //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2308 //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2309 switch (scalar_channel_index
)
2312 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writex
;
2315 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writey
;
2318 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writez
;
2321 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writew
;
2324 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= 1; //SQ_SEL_MASK;
2327 alu_instruction_ptr
->m_Word1_OP2
.f
.omod
= SQ_ALU_OMOD_OFF
;
2331 if(GL_FALSE
== add_alu_instruction(pAsm
, alu_instruction_ptr
, contiguous_slots_needed
) )
2337 * Judge the type of current instruction, is it vector or scalar
2340 if (is_single_scalar_operation
)
2342 if(GL_FALSE
== check_scalar(pAsm
, alu_instruction_ptr
) )
2349 if(GL_FALSE
== check_vector(pAsm
, alu_instruction_ptr
) )
2355 contiguous_slots_needed
= 0;
2361 GLboolean
next_ins(r700_AssemblerBase
*pAsm
)
2363 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
2365 if( GL_TRUE
== pAsm
->is_tex
)
2367 if (pILInst
->TexSrcTarget
== TEXTURE_RECT_INDEX
) {
2368 if( GL_FALSE
== assemble_tex_instruction(pAsm
, GL_FALSE
) )
2370 radeon_error("Error assembling TEX instruction\n");
2374 if( GL_FALSE
== assemble_tex_instruction(pAsm
, GL_TRUE
) )
2376 radeon_error("Error assembling TEX instruction\n");
2383 if( GL_FALSE
== assemble_alu_instruction(pAsm
) )
2385 radeon_error("Error assembling ALU instruction\n");
2390 if(pAsm
->D
.dst
.rtype
== DST_REG_OUT
)
2394 // There is no mask for OP3 instructions, so all channels are written
2395 pAsm
->pucOutMask
[pAsm
->D
.dst
.reg
- pAsm
->starting_export_register_number
] = 0xF;
2399 pAsm
->pucOutMask
[pAsm
->D
.dst
.reg
- pAsm
->starting_export_register_number
]
2400 |= (unsigned char)pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
;
2404 //reset for next inst.
2406 pAsm
->S
[0].bits
= 0;
2407 pAsm
->S
[1].bits
= 0;
2408 pAsm
->S
[2].bits
= 0;
2409 pAsm
->is_tex
= GL_FALSE
;
2410 pAsm
->need_tex_barrier
= GL_FALSE
;
2414 GLboolean
assemble_math_function(r700_AssemblerBase
* pAsm
, BITS opcode
)
2420 tmp
= gethelpr(pAsm
);
2422 // opcode tmp.x, a.x
2425 pAsm
->D
.dst
.opcode
= opcode
;
2426 pAsm
->D
.dst
.math
= 1;
2428 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2429 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2430 pAsm
->D
.dst
.reg
= tmp
;
2431 pAsm
->D
.dst
.writex
= 1;
2433 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2438 if ( GL_FALSE
== next_ins(pAsm
) )
2443 // Now replicate result to all necessary channels in destination
2444 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
2446 if( GL_FALSE
== assemble_dst(pAsm
) )
2451 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2452 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
2453 pAsm
->S
[0].src
.reg
= tmp
;
2455 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
2456 noneg_PVSSRC(&(pAsm
->S
[0].src
));
2458 if( GL_FALSE
== next_ins(pAsm
) )
2466 GLboolean
assemble_ABS(r700_AssemblerBase
*pAsm
)
2470 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
2472 if( GL_FALSE
== assemble_dst(pAsm
) )
2476 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2481 pAsm
->S
[1].bits
= pAsm
->S
[0].bits
;
2482 flipneg_PVSSRC(&(pAsm
->S
[1].src
));
2484 if ( GL_FALSE
== next_ins(pAsm
) )
2492 GLboolean
assemble_ADD(r700_AssemblerBase
*pAsm
)
2494 if( GL_FALSE
== checkop2(pAsm
) )
2499 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
2501 if( GL_FALSE
== assemble_dst(pAsm
) )
2506 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2511 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
2516 if(pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_SUB
)
2518 flipneg_PVSSRC(&(pAsm
->S
[1].src
));
2521 if( GL_FALSE
== next_ins(pAsm
) )
2529 GLboolean
assemble_ARL(r700_AssemblerBase
*pAsm
)
2530 { /* TODO: ar values dont' persist between clauses */
2531 if( GL_FALSE
== checkop1(pAsm
) )
2536 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOVA_FLOOR
;
2537 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2538 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2539 pAsm
->D
.dst
.reg
= 0;
2540 pAsm
->D
.dst
.writex
= 0;
2541 pAsm
->D
.dst
.writey
= 0;
2542 pAsm
->D
.dst
.writez
= 0;
2543 pAsm
->D
.dst
.writew
= 0;
2545 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2550 if( GL_FALSE
== next_ins(pAsm
) )
2558 GLboolean
assemble_BAD(char *opcode_str
)
2560 radeon_error("Not yet implemented instruction (%s)\n", opcode_str
);
2564 GLboolean
assemble_CMP(r700_AssemblerBase
*pAsm
)
2568 if( GL_FALSE
== checkop3(pAsm
) )
2573 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_CNDGE
;
2574 pAsm
->D
.dst
.op3
= 1;
2578 if(0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
2580 //OP3 has no support for write mask
2581 tmp
= gethelpr(pAsm
);
2583 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2584 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2585 pAsm
->D
.dst
.reg
= tmp
;
2587 nomask_PVSDST(&(pAsm
->D
.dst
));
2591 if( GL_FALSE
== assemble_dst(pAsm
) )
2597 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2602 if( GL_FALSE
== assemble_src(pAsm
, 2, 1) )
2607 if( GL_FALSE
== assemble_src(pAsm
, 1, 2) )
2612 if ( GL_FALSE
== next_ins(pAsm
) )
2617 if (0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
2619 if( GL_FALSE
== assemble_dst(pAsm
) )
2624 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
2627 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2628 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
2629 pAsm
->S
[0].src
.reg
= tmp
;
2631 noneg_PVSSRC(&(pAsm
->S
[0].src
));
2632 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
2634 if( GL_FALSE
== next_ins(pAsm
) )
2643 GLboolean
assemble_COS(r700_AssemblerBase
*pAsm
)
2645 return assemble_math_function(pAsm
, SQ_OP2_INST_COS
);
2648 GLboolean
assemble_DOT(r700_AssemblerBase
*pAsm
)
2650 if( GL_FALSE
== checkop2(pAsm
) )
2655 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_DOT4
;
2657 if( GL_FALSE
== assemble_dst(pAsm
) )
2662 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2667 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
2672 if(OPCODE_DP3
== pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
)
2674 zerocomp_PVSSRC(&(pAsm
->S
[0].src
), 3);
2675 zerocomp_PVSSRC(&(pAsm
->S
[1].src
), 3);
2677 else if(pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_DPH
)
2679 onecomp_PVSSRC(&(pAsm
->S
[0].src
), 3);
2682 if ( GL_FALSE
== next_ins(pAsm
) )
2690 GLboolean
assemble_DST(r700_AssemblerBase
*pAsm
)
2692 if( GL_FALSE
== checkop2(pAsm
) )
2697 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
2699 if( GL_FALSE
== assemble_dst(pAsm
) )
2704 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2709 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
2714 onecomp_PVSSRC(&(pAsm
->S
[0].src
), 0);
2715 onecomp_PVSSRC(&(pAsm
->S
[0].src
), 3);
2717 onecomp_PVSSRC(&(pAsm
->S
[1].src
), 0);
2718 onecomp_PVSSRC(&(pAsm
->S
[1].src
), 2);
2720 if ( GL_FALSE
== next_ins(pAsm
) )
2728 GLboolean
assemble_EX2(r700_AssemblerBase
*pAsm
)
2730 return assemble_math_function(pAsm
, SQ_OP2_INST_EXP_IEEE
);
2733 GLboolean
assemble_EXP(r700_AssemblerBase
*pAsm
)
2739 tmp
= gethelpr(pAsm
);
2744 if (pAsm
->pILInst
->DstReg
.WriteMask
& 0x1) {
2745 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FLOOR
;
2747 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2748 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2749 pAsm
->D
.dst
.reg
= tmp
;
2750 pAsm
->D
.dst
.writex
= 1;
2752 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2757 if( GL_FALSE
== next_ins(pAsm
) )
2762 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
2763 pAsm
->D
.dst
.math
= 1;
2765 if( GL_FALSE
== assemble_dst(pAsm
) )
2770 pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writez
= pAsm
->D
.dst
.writew
= 0;
2772 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2773 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
2774 pAsm
->S
[0].src
.reg
= tmp
;
2776 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
2777 noneg_PVSSRC(&(pAsm
->S
[0].src
));
2779 if( GL_FALSE
== next_ins(pAsm
) )
2787 if ((pAsm
->pILInst
->DstReg
.WriteMask
>> 1) & 0x1) {
2788 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FRACT
;
2790 if( GL_FALSE
== assemble_dst(pAsm
) )
2795 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2800 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writez
= pAsm
->D
.dst
.writew
= 0;
2802 if( GL_FALSE
== next_ins(pAsm
) )
2810 if ((pAsm
->pILInst
->DstReg
.WriteMask
>> 2) & 0x1) {
2811 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
2812 pAsm
->D
.dst
.math
= 1;
2814 if( GL_FALSE
== assemble_dst(pAsm
) )
2819 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2824 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writew
= 0;
2826 if( GL_FALSE
== next_ins(pAsm
) )
2834 if ((pAsm
->pILInst
->DstReg
.WriteMask
>> 3) & 0x1) {
2835 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
2837 if( GL_FALSE
== assemble_dst(pAsm
) )
2842 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writez
= 0;
2844 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2845 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
2846 pAsm
->S
[0].src
.reg
= tmp
;
2848 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_1
);
2849 noneg_PVSSRC(&(pAsm
->S
[0].src
));
2851 if( GL_FALSE
== next_ins(pAsm
) )
2860 GLboolean
assemble_FLR(r700_AssemblerBase
*pAsm
)
2864 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FLOOR
;
2866 if ( GL_FALSE
== assemble_dst(pAsm
) )
2871 if ( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2876 if ( GL_FALSE
== next_ins(pAsm
) )
2884 GLboolean
assemble_FLR_INT(r700_AssemblerBase
*pAsm
)
2886 return assemble_math_function(pAsm
, SQ_OP2_INST_FLT_TO_INT
);
2889 GLboolean
assemble_FRC(r700_AssemblerBase
*pAsm
)
2893 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FRACT
;
2895 if ( GL_FALSE
== assemble_dst(pAsm
) )
2900 if ( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2905 if ( GL_FALSE
== next_ins(pAsm
) )
2913 GLboolean
assemble_KIL(r700_AssemblerBase
*pAsm
)
2915 /* TODO: doc says KILL has to be last(end) ALU clause */
2919 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_KILLGT
;
2921 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2922 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2923 pAsm
->D
.dst
.reg
= 0;
2924 pAsm
->D
.dst
.writex
= 0;
2925 pAsm
->D
.dst
.writey
= 0;
2926 pAsm
->D
.dst
.writez
= 0;
2927 pAsm
->D
.dst
.writew
= 0;
2929 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2930 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
2931 pAsm
->S
[0].src
.reg
= 0;
2933 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_0
);
2934 noneg_PVSSRC(&(pAsm
->S
[0].src
));
2936 if ( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
2941 if ( GL_FALSE
== next_ins(pAsm
) )
2946 pAsm
->pR700Shader
->killIsUsed
= GL_TRUE
;
2951 GLboolean
assemble_LG2(r700_AssemblerBase
*pAsm
)
2953 return assemble_math_function(pAsm
, SQ_OP2_INST_LOG_IEEE
);
2956 GLboolean
assemble_LRP(r700_AssemblerBase
*pAsm
)
2960 if( GL_FALSE
== checkop3(pAsm
) )
2965 tmp
= gethelpr(pAsm
);
2967 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
2969 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2970 pAsm
->D
.dst
.reg
= tmp
;
2971 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2972 nomask_PVSDST(&(pAsm
->D
.dst
));
2975 if( GL_FALSE
== assemble_src(pAsm
, 1, 0) )
2980 if ( GL_FALSE
== assemble_src(pAsm
, 2, 1) )
2985 neg_PVSSRC(&(pAsm
->S
[1].src
));
2987 if( GL_FALSE
== next_ins(pAsm
) )
2992 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
2993 pAsm
->D
.dst
.op3
= 1;
2995 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2996 pAsm
->D
.dst
.reg
= tmp
;
2997 nomask_PVSDST(&(pAsm
->D
.dst
));
2998 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3000 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3001 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3002 pAsm
->S
[0].src
.reg
= tmp
;
3003 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3006 if( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
3010 if( GL_FALSE
== assemble_src(pAsm
, 2, -1) )
3015 if( GL_FALSE
== next_ins(pAsm
) )
3020 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3022 if( GL_FALSE
== assemble_dst(pAsm
) )
3027 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3028 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3029 pAsm
->S
[0].src
.reg
= tmp
;
3030 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3032 if( GL_FALSE
== next_ins(pAsm
) )
3040 GLboolean
assemble_LOG(r700_AssemblerBase
*pAsm
)
3042 BITS tmp1
, tmp2
, tmp3
;
3046 tmp1
= gethelpr(pAsm
);
3047 tmp2
= gethelpr(pAsm
);
3048 tmp3
= gethelpr(pAsm
);
3050 // FIXME: The hardware can do fabs() directly on input
3051 // elements, but the compiler doesn't have the
3052 // capability to use that.
3054 // MAX tmp1.x, a.x, -a.x (fabs(a.x))
3056 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
3058 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3059 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3060 pAsm
->D
.dst
.reg
= tmp1
;
3061 pAsm
->D
.dst
.writex
= 1;
3063 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3068 pAsm
->S
[1].bits
= pAsm
->S
[0].bits
;
3069 flipneg_PVSSRC(&(pAsm
->S
[1].src
));
3071 if ( GL_FALSE
== next_ins(pAsm
) )
3078 // LG2 tmp2.x, tmp1.x
3079 // FLOOR tmp3.x, tmp2.x
3080 // MOV dst.x, tmp3.x
3081 // ADD tmp3.x, tmp2.x, -tmp3.x
3082 // EX2 dst.y, tmp3.x
3083 // MOV dst.z, tmp2.x
3086 // LG2 tmp2.x, tmp1.x
3087 // FLOOR tmp3.x, tmp2.x
3089 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_LOG_IEEE
;
3090 pAsm
->D
.dst
.math
= 1;
3092 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3093 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3094 pAsm
->D
.dst
.reg
= tmp2
;
3095 pAsm
->D
.dst
.writex
= 1;
3097 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3098 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3099 pAsm
->S
[0].src
.reg
= tmp1
;
3101 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3102 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3104 if( GL_FALSE
== next_ins(pAsm
) )
3109 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FLOOR
;
3111 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3112 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3113 pAsm
->D
.dst
.reg
= tmp3
;
3114 pAsm
->D
.dst
.writex
= 1;
3116 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3117 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3118 pAsm
->S
[0].src
.reg
= tmp2
;
3120 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3121 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3123 if( GL_FALSE
== next_ins(pAsm
) )
3128 // MOV dst.x, tmp3.x
3130 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3132 if( GL_FALSE
== assemble_dst(pAsm
) )
3137 pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writez
= pAsm
->D
.dst
.writew
= 0;
3139 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3140 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3141 pAsm
->S
[0].src
.reg
= tmp3
;
3143 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3144 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3146 if( GL_FALSE
== next_ins(pAsm
) )
3151 // ADD tmp3.x, tmp2.x, -tmp3.x
3152 // EX2 dst.y, tmp3.x
3154 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
3156 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3157 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3158 pAsm
->D
.dst
.reg
= tmp3
;
3159 pAsm
->D
.dst
.writex
= 1;
3161 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3162 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3163 pAsm
->S
[0].src
.reg
= tmp2
;
3165 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3166 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3168 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
3169 pAsm
->S
[1].src
.rtype
= DST_REG_TEMPORARY
;
3170 pAsm
->S
[1].src
.reg
= tmp3
;
3172 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_X
);
3173 neg_PVSSRC(&(pAsm
->S
[1].src
));
3175 if( GL_FALSE
== next_ins(pAsm
) )
3180 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
3181 pAsm
->D
.dst
.math
= 1;
3183 if( GL_FALSE
== assemble_dst(pAsm
) )
3188 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writez
= pAsm
->D
.dst
.writew
= 0;
3190 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3191 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3192 pAsm
->S
[0].src
.reg
= tmp3
;
3194 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3195 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3197 if( GL_FALSE
== next_ins(pAsm
) )
3202 // MOV dst.z, tmp2.x
3204 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3206 if( GL_FALSE
== assemble_dst(pAsm
) )
3211 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writew
= 0;
3213 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3214 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3215 pAsm
->S
[0].src
.reg
= tmp2
;
3217 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3218 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3220 if( GL_FALSE
== next_ins(pAsm
) )
3227 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3229 if( GL_FALSE
== assemble_dst(pAsm
) )
3234 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writez
= 0;
3236 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3237 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3238 pAsm
->S
[0].src
.reg
= tmp1
;
3240 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_1
);
3241 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3243 if( GL_FALSE
== next_ins(pAsm
) )
3251 GLboolean
assemble_MAD(struct r700_AssemblerBase
*pAsm
)
3254 GLboolean bReplaceDst
= GL_FALSE
;
3255 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
3257 if( GL_FALSE
== checkop3(pAsm
) )
3262 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
3263 pAsm
->D
.dst
.op3
= 1;
3267 if(PROGRAM_TEMPORARY
== pILInst
->DstReg
.File
)
3268 { /* TODO : more investigation on MAD src and dst using same register */
3269 for(ii
=0; ii
<3; ii
++)
3271 if( (PROGRAM_TEMPORARY
== pILInst
->SrcReg
[ii
].File
)
3272 && (pILInst
->DstReg
.Index
== pILInst
->SrcReg
[ii
].Index
) )
3274 bReplaceDst
= GL_TRUE
;
3279 if(0xF != pILInst
->DstReg
.WriteMask
)
3280 { /* OP3 has no support for write mask */
3281 bReplaceDst
= GL_TRUE
;
3284 if(GL_TRUE
== bReplaceDst
)
3286 tmp
= gethelpr(pAsm
);
3288 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3289 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3290 pAsm
->D
.dst
.reg
= tmp
;
3292 nomask_PVSDST(&(pAsm
->D
.dst
));
3296 if( GL_FALSE
== assemble_dst(pAsm
) )
3302 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3307 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3312 if( GL_FALSE
== assemble_src(pAsm
, 2, -1) )
3317 if ( GL_FALSE
== next_ins(pAsm
) )
3322 if (GL_TRUE
== bReplaceDst
)
3324 if( GL_FALSE
== assemble_dst(pAsm
) )
3329 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3332 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3333 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3334 pAsm
->S
[0].src
.reg
= tmp
;
3336 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3337 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3339 if( GL_FALSE
== next_ins(pAsm
) )
3349 GLboolean
assemble_LIT(r700_AssemblerBase
*pAsm
)
3351 unsigned int dstReg
;
3352 unsigned int dstType
;
3353 unsigned int srcReg
;
3354 unsigned int srcType
;
3356 int tmp
= gethelpr(pAsm
);
3358 if( GL_FALSE
== assemble_dst(pAsm
) )
3362 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3366 dstReg
= pAsm
->D
.dst
.reg
;
3367 dstType
= pAsm
->D
.dst
.rtype
;
3368 srcReg
= pAsm
->S
[0].src
.reg
;
3369 srcType
= pAsm
->S
[0].src
.rtype
;
3371 /* dst.xw, <- 1.0 */
3372 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3373 pAsm
->D
.dst
.rtype
= dstType
;
3374 pAsm
->D
.dst
.reg
= dstReg
;
3375 pAsm
->D
.dst
.writex
= 1;
3376 pAsm
->D
.dst
.writey
= 0;
3377 pAsm
->D
.dst
.writez
= 0;
3378 pAsm
->D
.dst
.writew
= 1;
3379 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3380 pAsm
->S
[0].src
.reg
= tmp
;
3381 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3382 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3383 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_1
;
3384 pAsm
->S
[0].src
.swizzley
= SQ_SEL_1
;
3385 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_1
;
3386 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_1
;
3387 if( GL_FALSE
== next_ins(pAsm
) )
3392 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3397 /* dst.y = max(src.x, 0.0) */
3398 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
3399 pAsm
->D
.dst
.rtype
= dstType
;
3400 pAsm
->D
.dst
.reg
= dstReg
;
3401 pAsm
->D
.dst
.writex
= 0;
3402 pAsm
->D
.dst
.writey
= 1;
3403 pAsm
->D
.dst
.writez
= 0;
3404 pAsm
->D
.dst
.writew
= 0;
3405 pAsm
->S
[0].src
.rtype
= srcType
;
3406 pAsm
->S
[0].src
.reg
= srcReg
;
3407 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3408 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
);
3409 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
3410 pAsm
->S
[1].src
.reg
= tmp
;
3411 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
3412 noneg_PVSSRC(&(pAsm
->S
[1].src
));
3413 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_0
;
3414 pAsm
->S
[1].src
.swizzley
= SQ_SEL_0
;
3415 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_0
;
3416 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_0
;
3417 if( GL_FALSE
== next_ins(pAsm
) )
3422 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3427 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Y
, SQ_SEL_Y
, SQ_SEL_Y
, SQ_SEL_Y
);
3429 /* dst.z = log(src.y) */
3430 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_LOG_CLAMPED
;
3431 pAsm
->D
.dst
.math
= 1;
3432 pAsm
->D
.dst
.rtype
= dstType
;
3433 pAsm
->D
.dst
.reg
= dstReg
;
3434 pAsm
->D
.dst
.writex
= 0;
3435 pAsm
->D
.dst
.writey
= 0;
3436 pAsm
->D
.dst
.writez
= 1;
3437 pAsm
->D
.dst
.writew
= 0;
3438 pAsm
->S
[0].src
.rtype
= srcType
;
3439 pAsm
->S
[0].src
.reg
= srcReg
;
3440 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3441 if( GL_FALSE
== next_ins(pAsm
) )
3446 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3451 if( GL_FALSE
== assemble_src(pAsm
, 0, 2) )
3456 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
);
3458 swizzleagain_PVSSRC(&(pAsm
->S
[2].src
), SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
);
3460 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
3461 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MUL_LIT
;
3462 pAsm
->D
.dst
.math
= 1;
3463 pAsm
->D
.dst
.op3
= 1;
3464 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3465 pAsm
->D
.dst
.reg
= tmp
;
3466 pAsm
->D
.dst
.writex
= 1;
3467 pAsm
->D
.dst
.writey
= 0;
3468 pAsm
->D
.dst
.writez
= 0;
3469 pAsm
->D
.dst
.writew
= 0;
3471 pAsm
->S
[0].src
.rtype
= srcType
;
3472 pAsm
->S
[0].src
.reg
= srcReg
;
3473 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3475 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
3476 pAsm
->S
[1].src
.reg
= dstReg
;
3477 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
3478 noneg_PVSSRC(&(pAsm
->S
[1].src
));
3479 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_Z
;
3480 pAsm
->S
[1].src
.swizzley
= SQ_SEL_Z
;
3481 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_Z
;
3482 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_Z
;
3484 pAsm
->S
[2].src
.rtype
= srcType
;
3485 pAsm
->S
[2].src
.reg
= srcReg
;
3486 setaddrmode_PVSSRC(&(pAsm
->S
[2].src
), ADDR_ABSOLUTE
);
3488 if( GL_FALSE
== next_ins(pAsm
) )
3493 /* dst.z = exp(tmp.x) */
3494 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
3495 pAsm
->D
.dst
.math
= 1;
3496 pAsm
->D
.dst
.rtype
= dstType
;
3497 pAsm
->D
.dst
.reg
= dstReg
;
3498 pAsm
->D
.dst
.writex
= 0;
3499 pAsm
->D
.dst
.writey
= 0;
3500 pAsm
->D
.dst
.writez
= 1;
3501 pAsm
->D
.dst
.writew
= 0;
3503 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3504 pAsm
->S
[0].src
.reg
= tmp
;
3505 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3506 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3507 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_X
;
3508 pAsm
->S
[0].src
.swizzley
= SQ_SEL_X
;
3509 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_X
;
3510 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_X
;
3512 if( GL_FALSE
== next_ins(pAsm
) )
3520 GLboolean
assemble_MAX(r700_AssemblerBase
*pAsm
)
3522 if( GL_FALSE
== checkop2(pAsm
) )
3527 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
3529 if( GL_FALSE
== assemble_dst(pAsm
) )
3534 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3539 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3544 if( GL_FALSE
== next_ins(pAsm
) )
3552 GLboolean
assemble_MIN(r700_AssemblerBase
*pAsm
)
3554 if( GL_FALSE
== checkop2(pAsm
) )
3559 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MIN
;
3561 if( GL_FALSE
== assemble_dst(pAsm
) )
3566 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3571 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3576 if( GL_FALSE
== next_ins(pAsm
) )
3584 GLboolean
assemble_MOV(r700_AssemblerBase
*pAsm
)
3588 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3590 if (GL_FALSE
== assemble_dst(pAsm
))
3595 if (GL_FALSE
== assemble_src(pAsm
, 0, -1))
3600 if ( GL_FALSE
== next_ins(pAsm
) )
3608 GLboolean
assemble_MUL(r700_AssemblerBase
*pAsm
)
3610 if( GL_FALSE
== checkop2(pAsm
) )
3615 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
3617 if( GL_FALSE
== assemble_dst(pAsm
) )
3622 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3627 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3632 if( GL_FALSE
== next_ins(pAsm
) )
3640 GLboolean
assemble_POW(r700_AssemblerBase
*pAsm
)
3646 tmp
= gethelpr(pAsm
);
3648 // LG2 tmp.x, a.swizzle
3649 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_LOG_IEEE
;
3650 pAsm
->D
.dst
.math
= 1;
3652 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3653 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3654 pAsm
->D
.dst
.reg
= tmp
;
3655 nomask_PVSDST(&(pAsm
->D
.dst
));
3657 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3662 if( GL_FALSE
== next_ins(pAsm
) )
3667 // MUL tmp.x, tmp.x, b.swizzle
3668 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
3670 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3671 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3672 pAsm
->D
.dst
.reg
= tmp
;
3673 nomask_PVSDST(&(pAsm
->D
.dst
));
3675 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3676 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3677 pAsm
->S
[0].src
.reg
= tmp
;
3678 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3679 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3681 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3686 if( GL_FALSE
== next_ins(pAsm
) )
3691 // EX2 dst.mask, tmp.x
3693 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
3694 pAsm
->D
.dst
.math
= 1;
3696 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3697 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3698 pAsm
->D
.dst
.reg
= tmp
;
3699 nomask_PVSDST(&(pAsm
->D
.dst
));
3701 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3702 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3703 pAsm
->S
[0].src
.reg
= tmp
;
3704 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3705 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3707 if( GL_FALSE
== next_ins(pAsm
) )
3712 // Now replicate result to all necessary channels in destination
3713 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3715 if( GL_FALSE
== assemble_dst(pAsm
) )
3720 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3721 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3722 pAsm
->S
[0].src
.reg
= tmp
;
3724 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3725 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3727 if( GL_FALSE
== next_ins(pAsm
) )
3735 GLboolean
assemble_RCP(r700_AssemblerBase
*pAsm
)
3737 return assemble_math_function(pAsm
, SQ_OP2_INST_RECIP_IEEE
);
3740 GLboolean
assemble_RSQ(r700_AssemblerBase
*pAsm
)
3742 return assemble_math_function(pAsm
, SQ_OP2_INST_RECIPSQRT_IEEE
);
3745 GLboolean
assemble_SIN(r700_AssemblerBase
*pAsm
)
3747 return assemble_math_function(pAsm
, SQ_OP2_INST_SIN
);
3750 GLboolean
assemble_SCS(r700_AssemblerBase
*pAsm
)
3756 tmp
= gethelpr(pAsm
);
3759 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_COS
;
3760 pAsm
->D
.dst
.math
= 1;
3762 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3763 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3764 pAsm
->D
.dst
.reg
= tmp
;
3765 pAsm
->D
.dst
.writex
= 1;
3767 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3772 if ( GL_FALSE
== next_ins(pAsm
) )
3778 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_SIN
;
3779 pAsm
->D
.dst
.math
= 1;
3781 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3782 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3783 pAsm
->D
.dst
.reg
= tmp
;
3784 pAsm
->D
.dst
.writey
= 1;
3786 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3791 if( GL_FALSE
== next_ins(pAsm
) )
3796 // MOV dst.mask, tmp
3797 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3799 if( GL_FALSE
== assemble_dst(pAsm
) )
3804 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3805 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3806 pAsm
->S
[0].src
.reg
= tmp
;
3808 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3809 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_0
;
3810 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_0
;
3812 if ( GL_FALSE
== next_ins(pAsm
) )
3820 GLboolean
assemble_SGE(r700_AssemblerBase
*pAsm
)
3822 if( GL_FALSE
== checkop2(pAsm
) )
3827 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_SETGE
;
3829 if( GL_FALSE
== assemble_dst(pAsm
) )
3834 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3839 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3844 if( GL_FALSE
== next_ins(pAsm
) )
3852 GLboolean
assemble_SLT(r700_AssemblerBase
*pAsm
)
3854 if( GL_FALSE
== checkop2(pAsm
) )
3859 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_SETGT
;
3861 if( GL_FALSE
== assemble_dst(pAsm
) )
3866 if( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
3871 if( GL_FALSE
== assemble_src(pAsm
, 1, 0) )
3876 if( GL_FALSE
== next_ins(pAsm
) )
3884 GLboolean
assemble_STP(r700_AssemblerBase
*pAsm
)
3889 GLboolean
assemble_TEX(r700_AssemblerBase
*pAsm
)
3891 GLboolean src_const
;
3892 GLboolean need_barrier
= GL_FALSE
;
3896 switch (pAsm
->pILInst
[pAsm
->uiCurInst
].SrcReg
[0].File
)
3898 case PROGRAM_CONSTANT
:
3899 case PROGRAM_LOCAL_PARAM
:
3900 case PROGRAM_ENV_PARAM
:
3901 case PROGRAM_STATE_VAR
:
3902 src_const
= GL_TRUE
;
3904 case PROGRAM_TEMPORARY
:
3907 src_const
= GL_FALSE
;
3911 if (GL_TRUE
== src_const
)
3913 if ( GL_FALSE
== mov_temp(pAsm
, 0) )
3915 need_barrier
= GL_TRUE
;
3918 switch (pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
)
3923 radeon_error("do not support TXB yet\n");
3929 radeon_error("Internal error: bad texture op (not TEX)\n");
3934 if (pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_TXP
)
3936 GLuint tmp
= gethelpr(pAsm
);
3937 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_RECIP_IEEE
;
3938 pAsm
->D
.dst
.math
= 1;
3939 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3940 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3941 pAsm
->D
.dst
.reg
= tmp
;
3942 pAsm
->D
.dst
.writew
= 1;
3944 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3948 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
);
3949 if( GL_FALSE
== next_ins(pAsm
) )
3954 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
3955 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3956 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3957 pAsm
->D
.dst
.reg
= tmp
;
3958 pAsm
->D
.dst
.writex
= 1;
3959 pAsm
->D
.dst
.writey
= 1;
3960 pAsm
->D
.dst
.writez
= 1;
3961 pAsm
->D
.dst
.writew
= 0;
3963 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3967 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
3968 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
3969 pAsm
->S
[1].src
.reg
= tmp
;
3970 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_W
);
3972 if( GL_FALSE
== next_ins(pAsm
) )
3977 pAsm
->aArgSubst
[1] = tmp
;
3978 need_barrier
= GL_TRUE
;
3981 if (pAsm
->pILInst
[pAsm
->uiCurInst
].TexSrcTarget
== TEXTURE_CUBE_INDEX
)
3983 GLuint tmp1
= gethelpr(pAsm
);
3984 GLuint tmp2
= gethelpr(pAsm
);
3986 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
3987 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_CUBE
;
3988 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3989 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3990 pAsm
->D
.dst
.reg
= tmp1
;
3991 nomask_PVSDST(&(pAsm
->D
.dst
));
3993 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3998 if( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
4003 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Z
, SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_Y
);
4004 swizzleagain_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Y
, SQ_SEL_X
, SQ_SEL_Z
, SQ_SEL_Z
);
4006 if( GL_FALSE
== next_ins(pAsm
) )
4011 /* tmp1.z = ABS(tmp1.z) dont have abs support in assembler currently
4012 * have to do explicit instruction
4014 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
4015 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4016 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4017 pAsm
->D
.dst
.reg
= tmp1
;
4018 pAsm
->D
.dst
.writez
= 1;
4020 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4021 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4022 pAsm
->S
[0].src
.reg
= tmp1
;
4023 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
4024 pAsm
->S
[1].bits
= pAsm
->S
[0].bits
;
4025 flipneg_PVSSRC(&(pAsm
->S
[1].src
));
4029 /* tmp1.z = RCP_e(|tmp1.z|) */
4030 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_RECIP_IEEE
;
4031 pAsm
->D
.dst
.math
= 1;
4032 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4033 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4034 pAsm
->D
.dst
.reg
= tmp1
;
4035 pAsm
->D
.dst
.writez
= 1;
4037 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4038 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4039 pAsm
->S
[0].src
.reg
= tmp1
;
4040 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_Z
;
4044 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
4045 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
4046 * muladd has no writemask, have to use another temp
4047 * also no support for imm constants, so add 1 here
4049 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
4050 pAsm
->D
.dst
.op3
= 1;
4051 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4052 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4053 pAsm
->D
.dst
.reg
= tmp2
;
4055 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4056 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4057 pAsm
->S
[0].src
.reg
= tmp1
;
4058 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
4059 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
4060 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
4061 pAsm
->S
[1].src
.reg
= tmp1
;
4062 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Z
);
4063 setaddrmode_PVSSRC(&(pAsm
->S
[2].src
), ADDR_ABSOLUTE
);
4064 pAsm
->S
[2].src
.rtype
= SRC_REG_TEMPORARY
;
4065 pAsm
->S
[2].src
.reg
= tmp1
;
4066 setswizzle_PVSSRC(&(pAsm
->S
[2].src
), SQ_SEL_1
);
4070 /* ADD the remaining .5 */
4071 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
4072 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4073 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4074 pAsm
->D
.dst
.reg
= tmp2
;
4075 pAsm
->D
.dst
.writex
= 1;
4076 pAsm
->D
.dst
.writey
= 1;
4077 pAsm
->D
.dst
.writez
= 0;
4078 pAsm
->D
.dst
.writew
= 0;
4080 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4081 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4082 pAsm
->S
[0].src
.reg
= tmp2
;
4083 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
4084 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
4085 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
4086 pAsm
->S
[1].src
.reg
= 252; // SQ_ALU_SRC_0_5
4087 noswizzle_PVSSRC(&(pAsm
->S
[1].src
));
4091 /* tmp1.xy = temp2.xy */
4092 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4093 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4094 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4095 pAsm
->D
.dst
.reg
= tmp1
;
4096 pAsm
->D
.dst
.writex
= 1;
4097 pAsm
->D
.dst
.writey
= 1;
4098 pAsm
->D
.dst
.writez
= 0;
4099 pAsm
->D
.dst
.writew
= 0;
4101 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4102 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4103 pAsm
->S
[0].src
.reg
= tmp2
;
4104 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
4107 pAsm
->aArgSubst
[1] = tmp1
;
4108 need_barrier
= GL_TRUE
;
4112 pAsm
->D
.dst
.opcode
= SQ_TEX_INST_SAMPLE
;
4113 pAsm
->is_tex
= GL_TRUE
;
4114 if ( GL_TRUE
== need_barrier
)
4116 pAsm
->need_tex_barrier
= GL_TRUE
;
4118 // Set src1 to tex unit id
4119 pAsm
->S
[1].src
.reg
= pAsm
->pILInst
[pAsm
->uiCurInst
].TexSrcUnit
;
4120 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
4122 //No sw info from mesa compiler, so hard code here.
4123 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_X
;
4124 pAsm
->S
[1].src
.swizzley
= SQ_SEL_Y
;
4125 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_Z
;
4126 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_W
;
4128 if( GL_FALSE
== tex_dst(pAsm
) )
4133 if( GL_FALSE
== tex_src(pAsm
) )
4138 if(pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_TXP
)
4140 /* hopefully did swizzles before */
4141 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
4144 if(pAsm
->pILInst
[pAsm
->uiCurInst
].TexSrcTarget
== TEXTURE_CUBE_INDEX
)
4146 /* SAMPLE dst, tmp.yxwy, CUBE */
4147 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_Y
;
4148 pAsm
->S
[0].src
.swizzley
= SQ_SEL_X
;
4149 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_W
;
4150 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_Y
;
4153 if ( GL_FALSE
== next_ins(pAsm
) )
4161 GLboolean
assemble_XPD(r700_AssemblerBase
*pAsm
)
4165 if( GL_FALSE
== checkop2(pAsm
) )
4170 tmp
= gethelpr(pAsm
);
4172 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
4174 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4175 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4176 pAsm
->D
.dst
.reg
= tmp
;
4177 nomask_PVSDST(&(pAsm
->D
.dst
));
4179 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4184 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4189 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_Y
, SQ_SEL_0
);
4190 swizzleagain_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Y
, SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_0
);
4192 if( GL_FALSE
== next_ins(pAsm
) )
4197 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
4198 pAsm
->D
.dst
.op3
= 1;
4200 if(0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
4202 tmp
= gethelpr(pAsm
);
4204 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4205 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4206 pAsm
->D
.dst
.reg
= tmp
;
4208 nomask_PVSDST(&(pAsm
->D
.dst
));
4212 if( GL_FALSE
== assemble_dst(pAsm
) )
4218 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4223 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4228 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Y
, SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_0
);
4229 swizzleagain_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_Y
, SQ_SEL_0
);
4231 // result1 + (neg) result0
4232 setaddrmode_PVSSRC(&(pAsm
->S
[2].src
),ADDR_ABSOLUTE
);
4233 pAsm
->S
[2].src
.rtype
= SRC_REG_TEMPORARY
;
4234 pAsm
->S
[2].src
.reg
= tmp
;
4236 neg_PVSSRC(&(pAsm
->S
[2].src
));
4237 noswizzle_PVSSRC(&(pAsm
->S
[2].src
));
4239 if( GL_FALSE
== next_ins(pAsm
) )
4245 if(0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
4247 if( GL_FALSE
== assemble_dst(pAsm
) )
4252 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4254 // Use tmp as source
4255 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4256 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4257 pAsm
->S
[0].src
.reg
= tmp
;
4259 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4260 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
4262 if( GL_FALSE
== next_ins(pAsm
) )
4271 GLboolean
assemble_EXPORT(r700_AssemblerBase
*pAsm
)
4276 GLboolean
assemble_IF(r700_AssemblerBase
*pAsm
)
4281 GLboolean
assemble_ENDIF(r700_AssemblerBase
*pAsm
)
4286 GLboolean
AssembleInstr(GLuint uiNumberInsts
,
4287 struct prog_instruction
*pILInst
,
4288 r700_AssemblerBase
*pR700AsmCode
)
4292 pR700AsmCode
->pILInst
= pILInst
;
4293 for(i
=0; i
<uiNumberInsts
; i
++)
4295 pR700AsmCode
->uiCurInst
= i
;
4297 switch (pILInst
[i
].Opcode
)
4300 if ( GL_FALSE
== assemble_ABS(pR700AsmCode
) )
4305 if ( GL_FALSE
== assemble_ADD(pR700AsmCode
) )
4310 if ( GL_FALSE
== assemble_ARL(pR700AsmCode
) )
4314 radeon_error("Not yet implemented instruction OPCODE_ARR \n");
4315 //if ( GL_FALSE == assemble_BAD("ARR") )
4320 if ( GL_FALSE
== assemble_CMP(pR700AsmCode
) )
4324 if ( GL_FALSE
== assemble_COS(pR700AsmCode
) )
4331 if ( GL_FALSE
== assemble_DOT(pR700AsmCode
) )
4336 if ( GL_FALSE
== assemble_DST(pR700AsmCode
) )
4341 if ( GL_FALSE
== assemble_EX2(pR700AsmCode
) )
4345 if ( GL_FALSE
== assemble_EXP(pR700AsmCode
) )
4350 if ( GL_FALSE
== assemble_FLR(pR700AsmCode
) )
4354 // if ( GL_FALSE == assemble_FLR_INT() )
4359 if ( GL_FALSE
== assemble_FRC(pR700AsmCode
) )
4364 if ( GL_FALSE
== assemble_KIL(pR700AsmCode
) )
4368 if ( GL_FALSE
== assemble_LG2(pR700AsmCode
) )
4372 if ( GL_FALSE
== assemble_LIT(pR700AsmCode
) )
4376 if ( GL_FALSE
== assemble_LRP(pR700AsmCode
) )
4380 if ( GL_FALSE
== assemble_LOG(pR700AsmCode
) )
4385 if ( GL_FALSE
== assemble_MAD(pR700AsmCode
) )
4389 if ( GL_FALSE
== assemble_MAX(pR700AsmCode
) )
4393 if ( GL_FALSE
== assemble_MIN(pR700AsmCode
) )
4398 if ( GL_FALSE
== assemble_MOV(pR700AsmCode
) )
4402 if ( GL_FALSE
== assemble_MUL(pR700AsmCode
) )
4407 if ( GL_FALSE
== assemble_POW(pR700AsmCode
) )
4411 if ( GL_FALSE
== assemble_RCP(pR700AsmCode
) )
4415 if ( GL_FALSE
== assemble_RSQ(pR700AsmCode
) )
4419 if ( GL_FALSE
== assemble_SIN(pR700AsmCode
) )
4423 if ( GL_FALSE
== assemble_SCS(pR700AsmCode
) )
4428 if ( GL_FALSE
== assemble_SGE(pR700AsmCode
) )
4432 if ( GL_FALSE
== assemble_SLT(pR700AsmCode
) )
4437 // if ( GL_FALSE == assemble_STP(pR700AsmCode) )
4442 if ( GL_FALSE
== assemble_MOV(pR700AsmCode
) )
4448 if( (i
+1)<uiNumberInsts
)
4450 if(OPCODE_END
!= pILInst
[i
+1].Opcode
)
4452 if( GL_TRUE
== IsTex(pILInst
[i
+1].Opcode
) )
4454 pR700AsmCode
->pInstDeps
[i
+1].nDstDep
= i
+1; //=1?
4464 if ( GL_FALSE
== assemble_TEX(pR700AsmCode
) )
4469 if ( GL_FALSE
== assemble_XPD(pR700AsmCode
) )
4474 if ( GL_FALSE
== assemble_IF(pR700AsmCode
) )
4478 radeon_error("Not yet implemented instruction OPCODE_ELSE \n");
4479 //if ( GL_FALSE == assemble_BAD("ELSE") )
4483 if ( GL_FALSE
== assemble_ENDIF(pR700AsmCode
) )
4487 //case OPCODE_EXPORT:
4488 // if ( GL_FALSE == assemble_EXPORT() )
4493 //pR700AsmCode->uiCurInst = i;
4494 //This is to remaind that if in later exoort there is depth/stencil
4495 //export, we need a mov to re-arrange DST channel, where using a
4496 //psuedo inst, we will use this end inst to do it.
4500 radeon_error("internal: unknown instruction\n");
4508 GLboolean
Process_Export(r700_AssemblerBase
* pAsm
,
4510 GLuint export_starting_index
,
4511 GLuint export_count
,
4512 GLuint starting_register_number
,
4513 GLboolean is_depth_export
)
4515 unsigned char ucWriteMask
;
4517 check_current_clause(pAsm
, CF_EMPTY_CLAUSE
);
4518 check_current_clause(pAsm
, CF_EXPORT_CLAUSE
); //alloc the cf_current_export_clause_ptr
4520 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.type
= type
;
4524 case SQ_EXPORT_PIXEL
:
4525 if(GL_TRUE
== is_depth_export
)
4527 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= SQ_CF_PIXEL_Z
;
4531 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= SQ_CF_PIXEL_MRT0
+ export_starting_index
;
4536 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= SQ_CF_POS_0
+ export_starting_index
;
4539 case SQ_EXPORT_PARAM
:
4540 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= 0x0 + export_starting_index
;
4544 radeon_error("Unknown export type: %d\n", type
);
4549 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.rw_gpr
= starting_register_number
;
4551 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.rw_rel
= SQ_ABSOLUTE
;
4552 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.index_gpr
= 0x0;
4553 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.elem_size
= 0x3;
4555 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.burst_count
= (export_count
- 1);
4556 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
4557 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
4558 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT
; // _DONE
4559 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
4560 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
4562 if (export_count
== 1)
4564 ucWriteMask
= pAsm
->pucOutMask
[starting_register_number
- pAsm
->starting_export_register_number
];
4565 /* exports Z as a float into Red channel */
4566 if (GL_TRUE
== is_depth_export
)
4569 if( (ucWriteMask
& 0x1) != 0)
4571 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_X
;
4575 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_MASK
;
4577 if( ((ucWriteMask
>>1) & 0x1) != 0)
4579 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_Y
;
4583 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_MASK
;
4585 if( ((ucWriteMask
>>2) & 0x1) != 0)
4587 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_Z
;
4591 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_MASK
;
4593 if( ((ucWriteMask
>>3) & 0x1) != 0)
4595 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_W
;
4599 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_MASK
;
4604 // This should only be used if all components for all registers have been written
4605 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_X
;
4606 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_Y
;
4607 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_Z
;
4608 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_W
;
4611 pAsm
->cf_last_export_ptr
= pAsm
->cf_current_export_clause_ptr
;
4616 GLboolean
Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase
*pAsm
, BITS depth_channel_select
)
4618 gl_inst_opcode Opcode_save
= pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
; //Should be OPCODE_END
4619 pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
= OPCODE_MOV
;
4621 // MOV depth_export_register.hw_depth_channel, depth_export_register.depth_channel_select
4623 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4625 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4626 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4627 pAsm
->D
.dst
.reg
= pAsm
->depth_export_register_number
;
4629 pAsm
->D
.dst
.writex
= 1; // depth goes in R channel for HW
4631 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4632 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
4633 pAsm
->S
[0].src
.reg
= pAsm
->depth_export_register_number
;
4635 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), depth_channel_select
);
4637 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4639 if( GL_FALSE
== next_ins(pAsm
) )
4644 pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
= Opcode_save
;
4649 GLboolean
Process_Fragment_Exports(r700_AssemblerBase
*pR700AsmCode
,
4650 GLbitfield OutputsWritten
)
4653 GLuint export_count
= 0;
4655 if(pR700AsmCode
->depth_export_register_number
>= 0)
4657 if( GL_FALSE
== Move_Depth_Exports_To_Correct_Channels(pR700AsmCode
, SQ_SEL_Z
) ) // depth
4663 unBit
= 1 << FRAG_RESULT_COLOR
;
4664 if(OutputsWritten
& unBit
)
4666 if( GL_FALSE
== Process_Export(pR700AsmCode
,
4670 pR700AsmCode
->uiFP_OutputMap
[FRAG_RESULT_COLOR
],
4677 unBit
= 1 << FRAG_RESULT_DEPTH
;
4678 if(OutputsWritten
& unBit
)
4680 if( GL_FALSE
== Process_Export(pR700AsmCode
,
4684 pR700AsmCode
->uiFP_OutputMap
[FRAG_RESULT_DEPTH
],
4691 /* Need to export something, otherwise we'll hang
4692 * results are undefined anyway */
4693 if(export_count
== 0)
4695 Process_Export(pR700AsmCode
, SQ_EXPORT_PIXEL
, 0, 1, 0, GL_FALSE
);
4698 if(pR700AsmCode
->cf_last_export_ptr
!= NULL
)
4700 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
4701 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.end_of_program
= 0x1;
4707 GLboolean
Process_Vertex_Exports(r700_AssemblerBase
*pR700AsmCode
,
4708 GLbitfield OutputsWritten
)
4713 GLuint export_starting_index
= 0;
4714 GLuint export_count
= pR700AsmCode
->number_of_exports
;
4716 unBit
= 1 << VERT_RESULT_HPOS
;
4717 if(OutputsWritten
& unBit
)
4719 if( GL_FALSE
== Process_Export(pR700AsmCode
,
4721 export_starting_index
,
4723 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_HPOS
],
4731 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
4734 pR700AsmCode
->number_of_exports
= export_count
;
4736 unBit
= 1 << VERT_RESULT_COL0
;
4737 if(OutputsWritten
& unBit
)
4739 if( GL_FALSE
== Process_Export(pR700AsmCode
,
4741 export_starting_index
,
4743 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_COL0
],
4749 export_starting_index
++;
4752 unBit
= 1 << VERT_RESULT_COL1
;
4753 if(OutputsWritten
& unBit
)
4755 if( GL_FALSE
== Process_Export(pR700AsmCode
,
4757 export_starting_index
,
4759 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_COL1
],
4765 export_starting_index
++;
4768 unBit
= 1 << VERT_RESULT_FOGC
;
4769 if(OutputsWritten
& unBit
)
4771 if( GL_FALSE
== Process_Export(pR700AsmCode
,
4773 export_starting_index
,
4775 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_FOGC
],
4781 export_starting_index
++;
4786 unBit
= 1 << (VERT_RESULT_TEX0
+ i
);
4787 if(OutputsWritten
& unBit
)
4789 if( GL_FALSE
== Process_Export(pR700AsmCode
,
4791 export_starting_index
,
4793 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_TEX0
+ i
],
4799 export_starting_index
++;
4803 // At least one param should be exported
4806 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
4810 if( GL_FALSE
== Process_Export(pR700AsmCode
,
4814 pR700AsmCode
->starting_export_register_number
,
4820 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_0
;
4821 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_0
;
4822 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_0
;
4823 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_1
;
4824 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
4827 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.end_of_program
= 0x1;
4832 GLboolean
Clean_Up_Assembler(r700_AssemblerBase
*pR700AsmCode
)
4834 FREE(pR700AsmCode
->pucOutMask
);
4835 FREE(pR700AsmCode
->pInstDeps
);