2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
33 #include "main/mtypes.h"
34 #include "main/imports.h"
35 #include "program/prog_parameter.h"
37 #include "radeon_debug.h"
38 #include "r600_context.h"
40 #include "r700_assembler.h"
42 #define USE_CF_FOR_CONTINUE_BREAK 1
43 #define USE_CF_FOR_POP_AFTER 1
45 struct prog_instruction noise1_insts
[12] = {
46 {OPCODE_BGNSUB
, {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
47 {OPCODE_MOV
, {{0, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 2, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
48 {OPCODE_MOV
, {{8, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 4, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
49 {OPCODE_MOV
, {{8, 0, 585, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 8, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
50 {OPCODE_SGT
, {{0, 0, 585, 0, 0, 0}, {8, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 1, 1, 0, 8, 1672, 0}, 1, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
51 {OPCODE_IF
, {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 7, 0, 0}, 0, 0, 0, 1, 0, 0, 0, 15, 0, 0, 0},
52 {OPCODE_MOV
, {{0, 0, 1755, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
53 {OPCODE_RET
, {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
54 {OPCODE_ENDIF
, {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
55 {OPCODE_MOV
, {{0, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
56 {OPCODE_RET
, {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
57 {OPCODE_ENDSUB
, {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}
59 float noise1_const
[2][4] = {
60 {0.300000f
, 0.900000f
, 0.500000f
, 0.300000f
}
63 COMPILED_SUB noise1_presub
= {
78 BITS
addrmode_PVSDST(PVSDST
* pPVSDST
)
80 return pPVSDST
->addrmode0
| ((BITS
)pPVSDST
->addrmode1
<< 1);
83 void setaddrmode_PVSDST(PVSDST
* pPVSDST
, BITS addrmode
)
85 pPVSDST
->addrmode0
= addrmode
& 1;
86 pPVSDST
->addrmode1
= (addrmode
>> 1) & 1;
89 void nomask_PVSDST(PVSDST
* pPVSDST
)
91 pPVSDST
->writex
= pPVSDST
->writey
= pPVSDST
->writez
= pPVSDST
->writew
= 1;
94 BITS
addrmode_PVSSRC(PVSSRC
* pPVSSRC
)
96 return pPVSSRC
->addrmode0
| ((BITS
)pPVSSRC
->addrmode1
<< 1);
99 void setaddrmode_PVSSRC(PVSSRC
* pPVSSRC
, BITS addrmode
)
101 pPVSSRC
->addrmode0
= addrmode
& 1;
102 pPVSSRC
->addrmode1
= (addrmode
>> 1) & 1;
106 void setswizzle_PVSSRC(PVSSRC
* pPVSSRC
, BITS swz
)
111 pPVSSRC
->swizzlew
= swz
;
114 void noswizzle_PVSSRC(PVSSRC
* pPVSSRC
)
116 pPVSSRC
->swizzlex
= SQ_SEL_X
;
117 pPVSSRC
->swizzley
= SQ_SEL_Y
;
118 pPVSSRC
->swizzlez
= SQ_SEL_Z
;
119 pPVSSRC
->swizzlew
= SQ_SEL_W
;
123 swizzleagain_PVSSRC(PVSSRC
* pPVSSRC
, BITS x
, BITS y
, BITS z
, BITS w
)
127 case SQ_SEL_X
: x
= pPVSSRC
->swizzlex
;
129 case SQ_SEL_Y
: x
= pPVSSRC
->swizzley
;
131 case SQ_SEL_Z
: x
= pPVSSRC
->swizzlez
;
133 case SQ_SEL_W
: x
= pPVSSRC
->swizzlew
;
140 case SQ_SEL_X
: y
= pPVSSRC
->swizzlex
;
142 case SQ_SEL_Y
: y
= pPVSSRC
->swizzley
;
144 case SQ_SEL_Z
: y
= pPVSSRC
->swizzlez
;
146 case SQ_SEL_W
: y
= pPVSSRC
->swizzlew
;
153 case SQ_SEL_X
: z
= pPVSSRC
->swizzlex
;
155 case SQ_SEL_Y
: z
= pPVSSRC
->swizzley
;
157 case SQ_SEL_Z
: z
= pPVSSRC
->swizzlez
;
159 case SQ_SEL_W
: z
= pPVSSRC
->swizzlew
;
166 case SQ_SEL_X
: w
= pPVSSRC
->swizzlex
;
168 case SQ_SEL_Y
: w
= pPVSSRC
->swizzley
;
170 case SQ_SEL_Z
: w
= pPVSSRC
->swizzlez
;
172 case SQ_SEL_W
: w
= pPVSSRC
->swizzlew
;
177 pPVSSRC
->swizzlex
= x
;
178 pPVSSRC
->swizzley
= y
;
179 pPVSSRC
->swizzlez
= z
;
180 pPVSSRC
->swizzlew
= w
;
183 void neg_PVSSRC(PVSSRC
* pPVSSRC
)
191 void noneg_PVSSRC(PVSSRC
* pPVSSRC
)
199 // negate argument (for SUB instead of ADD and alike)
200 void flipneg_PVSSRC(PVSSRC
* pPVSSRC
)
202 pPVSSRC
->negx
= !pPVSSRC
->negx
;
203 pPVSSRC
->negy
= !pPVSSRC
->negy
;
204 pPVSSRC
->negz
= !pPVSSRC
->negz
;
205 pPVSSRC
->negw
= !pPVSSRC
->negw
;
208 void zerocomp_PVSSRC(PVSSRC
* pPVSSRC
, int c
)
212 case 0: pPVSSRC
->swizzlex
= SQ_SEL_0
; pPVSSRC
->negx
= 0; break;
213 case 1: pPVSSRC
->swizzley
= SQ_SEL_0
; pPVSSRC
->negy
= 0; break;
214 case 2: pPVSSRC
->swizzlez
= SQ_SEL_0
; pPVSSRC
->negz
= 0; break;
215 case 3: pPVSSRC
->swizzlew
= SQ_SEL_0
; pPVSSRC
->negw
= 0; break;
220 void onecomp_PVSSRC(PVSSRC
* pPVSSRC
, int c
)
224 case 0: pPVSSRC
->swizzlex
= SQ_SEL_1
; pPVSSRC
->negx
= 0; break;
225 case 1: pPVSSRC
->swizzley
= SQ_SEL_1
; pPVSSRC
->negy
= 0; break;
226 case 2: pPVSSRC
->swizzlez
= SQ_SEL_1
; pPVSSRC
->negz
= 0; break;
227 case 3: pPVSSRC
->swizzlew
= SQ_SEL_1
; pPVSSRC
->negw
= 0; break;
232 BITS
is_misc_component_exported(VAP_OUT_VTX_FMT_0
* pOutVTXFmt0
)
234 return (pOutVTXFmt0
->point_size
|
235 pOutVTXFmt0
->edge_flag
|
236 pOutVTXFmt0
->rta_index
|
237 pOutVTXFmt0
->kill_flag
|
238 pOutVTXFmt0
->viewport_index
);
241 BITS
is_depth_component_exported(OUT_FRAGMENT_FMT_0
* pFPOutFmt
)
243 return (pFPOutFmt
->depth
|
244 pFPOutFmt
->stencil_ref
|
246 pFPOutFmt
->coverage_to_mask
);
249 GLboolean
is_reduction_opcode(PVSDWORD
* dest
)
251 if (dest
->dst
.op3
== 0)
253 if ( (dest
->dst
.opcode
== SQ_OP2_INST_DOT4
|| dest
->dst
.opcode
== SQ_OP2_INST_DOT4_IEEE
|| dest
->dst
.opcode
== SQ_OP2_INST_CUBE
) )
261 GLuint
GetSurfaceFormat(GLenum eType
, GLuint nChannels
, GLuint
* pClient_size
)
263 GLuint format
= FMT_INVALID
;
264 GLuint uiElemSize
= 0;
269 case GL_UNSIGNED_BYTE
:
274 format
= FMT_8
; break;
276 format
= FMT_8_8
; break;
278 format
= FMT_8_8_8
; break;
280 format
= FMT_8_8_8_8
; break;
286 case GL_UNSIGNED_SHORT
:
292 format
= FMT_16
; break;
294 format
= FMT_16_16
; break;
296 /* 3 comp GL_SHORT vertex format doesnt work on r700
297 4 somehow works, test - sauerbraten */
298 format
= FMT_16_16_16_16
; break;
300 format
= FMT_16_16_16_16
; break;
306 case GL_UNSIGNED_INT
:
312 format
= FMT_32
; break;
314 format
= FMT_32_32
; break;
316 format
= FMT_32_32_32
; break;
318 format
= FMT_32_32_32_32
; break;
329 format
= FMT_32_FLOAT
; break;
331 format
= FMT_32_32_FLOAT
; break;
333 format
= FMT_32_32_32_FLOAT
; break;
335 format
= FMT_32_32_32_32_FLOAT
; break;
345 format
= FMT_32_FLOAT
; break;
347 format
= FMT_32_32_FLOAT
; break;
349 format
= FMT_32_32_32_FLOAT
; break;
351 format
= FMT_32_32_32_32_FLOAT
; break;
358 //GL_ASSERT_NO_CASE();
361 if(NULL
!= pClient_size
)
363 *pClient_size
= uiElemSize
* nChannels
;
369 unsigned int r700GetNumOperands(GLuint opcode
, GLuint nIsOp3
)
378 case SQ_OP2_INST_ADD
:
379 case SQ_OP2_INST_KILLE
:
380 case SQ_OP2_INST_KILLGT
:
381 case SQ_OP2_INST_KILLGE
:
382 case SQ_OP2_INST_KILLNE
:
383 case SQ_OP2_INST_MUL
:
384 case SQ_OP2_INST_MAX
:
385 case SQ_OP2_INST_MIN
:
386 //case SQ_OP2_INST_MAX_DX10:
387 //case SQ_OP2_INST_MIN_DX10:
388 case SQ_OP2_INST_SETE
:
389 case SQ_OP2_INST_SETNE
:
390 case SQ_OP2_INST_SETGT
:
391 case SQ_OP2_INST_SETGE
:
392 case SQ_OP2_INST_PRED_SETE
:
393 case SQ_OP2_INST_PRED_SETGT
:
394 case SQ_OP2_INST_PRED_SETGE
:
395 case SQ_OP2_INST_PRED_SETNE
:
396 case SQ_OP2_INST_DOT4
:
397 case SQ_OP2_INST_DOT4_IEEE
:
398 case SQ_OP2_INST_CUBE
:
401 case SQ_OP2_INST_MOV
:
402 case SQ_OP2_INST_MOVA_FLOOR
:
403 case SQ_OP2_INST_FRACT
:
404 case SQ_OP2_INST_FLOOR
:
405 case SQ_OP2_INST_TRUNC
:
406 case SQ_OP2_INST_EXP_IEEE
:
407 case SQ_OP2_INST_LOG_CLAMPED
:
408 case SQ_OP2_INST_LOG_IEEE
:
409 case SQ_OP2_INST_RECIP_IEEE
:
410 case SQ_OP2_INST_RECIPSQRT_IEEE
:
411 case SQ_OP2_INST_FLT_TO_INT
:
412 case SQ_OP2_INST_SIN
:
413 case SQ_OP2_INST_COS
:
416 default: radeon_error(
417 "Need instruction operand number for %x.\n", opcode
);
423 int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt
, r700_AssemblerBase
* pAsm
, R700_Shader
* pShader
)
427 Init_R700_Shader(pShader
);
428 pAsm
->pR700Shader
= pShader
;
429 pAsm
->currentShaderType
= spt
;
431 pAsm
->cf_last_export_ptr
= NULL
;
433 pAsm
->cf_current_export_clause_ptr
= NULL
;
434 pAsm
->cf_current_alu_clause_ptr
= NULL
;
435 pAsm
->cf_current_tex_clause_ptr
= NULL
;
436 pAsm
->cf_current_vtx_clause_ptr
= NULL
;
437 pAsm
->cf_current_cf_clause_ptr
= NULL
;
439 // No clause has been created yet
440 pAsm
->cf_current_clause_type
= CF_EMPTY_CLAUSE
;
442 pAsm
->number_of_colorandz_exports
= 0;
443 pAsm
->number_of_exports
= 0;
444 pAsm
->number_of_export_opcodes
= 0;
446 pAsm
->alu_x_opcode
= 0;
455 pAsm
->uLastPosUpdate
= 0;
457 *(BITS
*) &pAsm
->fp_stOutFmt0
= 0;
461 pAsm
->number_used_registers
= 0;
462 pAsm
->uUsedConsts
= 256;
466 pAsm
->uBoolConsts
= 0;
467 pAsm
->uIntConsts
= 0;
472 pAsm
->fc_stack
[0].type
= FC_NONE
;
477 pAsm
->aArgSubst
[3] = (-1);
481 for (i
=0; i
<NUMBER_OF_OUTPUT_COLORS
; i
++)
483 pAsm
->color_export_register_number
[i
] = (-1);
487 pAsm
->depth_export_register_number
= (-1);
488 pAsm
->stencil_export_register_number
= (-1);
489 pAsm
->coverage_to_mask_export_register_number
= (-1);
490 pAsm
->mask_export_register_number
= (-1);
492 pAsm
->starting_export_register_number
= 0;
493 pAsm
->starting_vfetch_register_number
= 0;
494 pAsm
->starting_temp_register_number
= 0;
495 pAsm
->uFirstHelpReg
= 0;
497 pAsm
->input_position_is_used
= GL_FALSE
;
498 pAsm
->input_normal_is_used
= GL_FALSE
;
500 for (i
=0; i
<NUMBER_OF_INPUT_COLORS
; i
++)
502 pAsm
->input_color_is_used
[ i
] = GL_FALSE
;
505 for (i
=0; i
<NUMBER_OF_TEXTURE_UNITS
; i
++)
507 pAsm
->input_texture_unit_is_used
[ i
] = GL_FALSE
;
510 for (i
=0; i
<VERT_ATTRIB_MAX
; i
++)
512 pAsm
->vfetch_instruction_ptr_array
[ i
] = NULL
;
515 pAsm
->number_of_inputs
= 0;
517 pAsm
->is_tex
= GL_FALSE
;
518 pAsm
->need_tex_barrier
= GL_FALSE
;
521 pAsm
->unSubArraySize
= 0;
522 pAsm
->unSubArrayPointer
= 0;
523 pAsm
->callers
= NULL
;
524 pAsm
->unCallerArraySize
= 0;
525 pAsm
->unCallerArrayPointer
= 0;
528 pAsm
->CALLSTACK
[0].FCSP_BeforeEntry
= 0;
529 pAsm
->CALLSTACK
[0].plstCFInstructions_local
530 = &(pAsm
->pR700Shader
->lstCFInstructions
);
532 pAsm
->CALLSTACK
[0].max
= 0;
533 pAsm
->CALLSTACK
[0].current
= 0;
535 SetActiveCFlist(pAsm
->pR700Shader
, pAsm
->CALLSTACK
[0].plstCFInstructions_local
);
539 pAsm
->presubs
= NULL
;
540 pAsm
->unPresubArraySize
= 0;
541 pAsm
->unNumPresub
= 0;
542 pAsm
->unCurNumILInsts
= 0;
544 pAsm
->unVetTexBits
= 0;
549 GLboolean
IsTex(gl_inst_opcode Opcode
)
551 if( (OPCODE_TEX
==Opcode
) || (OPCODE_TXP
==Opcode
) || (OPCODE_TXB
==Opcode
) ||
552 (OPCODE_DDX
==Opcode
) || (OPCODE_DDY
==Opcode
) )
559 GLboolean
IsAlu(gl_inst_opcode Opcode
)
561 //TODO : more for fc and ex for higher spec.
569 int check_current_clause(r700_AssemblerBase
* pAsm
,
570 CF_CLAUSE_TYPE new_clause_type
)
572 if (pAsm
->cf_current_clause_type
!= new_clause_type
)
573 { //Close last open clause
574 switch (pAsm
->cf_current_clause_type
)
577 if ( pAsm
->cf_current_alu_clause_ptr
!= NULL
)
579 pAsm
->cf_current_alu_clause_ptr
= NULL
;
583 if ( pAsm
->cf_current_vtx_clause_ptr
!= NULL
)
585 pAsm
->cf_current_vtx_clause_ptr
= NULL
;
589 if ( pAsm
->cf_current_tex_clause_ptr
!= NULL
)
591 pAsm
->cf_current_tex_clause_ptr
= NULL
;
594 case CF_EXPORT_CLAUSE
:
595 if ( pAsm
->cf_current_export_clause_ptr
!= NULL
)
597 pAsm
->cf_current_export_clause_ptr
= NULL
;
600 case CF_OTHER_CLAUSE
:
601 if ( pAsm
->cf_current_cf_clause_ptr
!= NULL
)
603 pAsm
->cf_current_cf_clause_ptr
= NULL
;
606 case CF_EMPTY_CLAUSE
:
610 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type
);
614 pAsm
->cf_current_clause_type
= CF_EMPTY_CLAUSE
;
617 switch (new_clause_type
)
620 pAsm
->cf_current_clause_type
= CF_ALU_CLAUSE
;
623 pAsm
->cf_current_clause_type
= CF_VTX_CLAUSE
;
626 pAsm
->cf_current_clause_type
= CF_TEX_CLAUSE
;
628 case CF_EXPORT_CLAUSE
:
630 R700ControlFlowSXClause
* pR700ControlFlowSXClause
631 = (R700ControlFlowSXClause
*) CALLOC_STRUCT(R700ControlFlowSXClause
);
633 // Add new export instruction to control flow program
634 if (pR700ControlFlowSXClause
!= 0)
636 pAsm
->cf_current_export_clause_ptr
= pR700ControlFlowSXClause
;
637 Init_R700ControlFlowSXClause(pR700ControlFlowSXClause
);
638 AddCFInstruction( pAsm
->pR700Shader
,
639 (R700ControlFlowInstruction
*)pR700ControlFlowSXClause
);
644 "Error allocating new EXPORT CF instruction in check_current_clause. \n");
647 pAsm
->cf_current_clause_type
= CF_EXPORT_CLAUSE
;
650 case CF_EMPTY_CLAUSE
:
652 case CF_OTHER_CLAUSE
:
653 pAsm
->cf_current_clause_type
= CF_OTHER_CLAUSE
;
657 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type
);
665 GLboolean
add_cf_instruction(r700_AssemblerBase
* pAsm
)
667 if(GL_FALSE
== check_current_clause(pAsm
, CF_OTHER_CLAUSE
))
672 pAsm
->cf_current_cf_clause_ptr
=
673 (R700ControlFlowGenericClause
*) CALLOC_STRUCT(R700ControlFlowGenericClause
);
675 if (pAsm
->cf_current_cf_clause_ptr
!= NULL
)
677 Init_R700ControlFlowGenericClause(pAsm
->cf_current_cf_clause_ptr
);
678 AddCFInstruction( pAsm
->pR700Shader
,
679 (R700ControlFlowInstruction
*)pAsm
->cf_current_cf_clause_ptr
);
683 radeon_error("Could not allocate a new VFetch CF instruction.\n");
690 GLboolean
add_vfetch_instruction(r700_AssemblerBase
* pAsm
,
691 R700VertexInstruction
* vertex_instruction_ptr
)
693 if( GL_FALSE
== check_current_clause(pAsm
, CF_VTX_CLAUSE
) )
698 if( pAsm
->cf_current_vtx_clause_ptr
== NULL
||
699 ( (pAsm
->cf_current_vtx_clause_ptr
!= NULL
) &&
700 (pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.count
>= GetCFMaxInstructions(pAsm
->cf_current_vtx_clause_ptr
->m_ShaderInstType
)-1)
703 // Create new Vfetch control flow instruction for this new clause
704 pAsm
->cf_current_vtx_clause_ptr
= (R700ControlFlowGenericClause
*) CALLOC_STRUCT(R700ControlFlowGenericClause
);
706 if (pAsm
->cf_current_vtx_clause_ptr
!= NULL
)
708 Init_R700ControlFlowGenericClause(pAsm
->cf_current_vtx_clause_ptr
);
709 AddCFInstruction( pAsm
->pR700Shader
,
710 (R700ControlFlowInstruction
*)pAsm
->cf_current_vtx_clause_ptr
);
714 radeon_error("Could not allocate a new VFetch CF instruction.\n");
718 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.pop_count
= 0x0;
719 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
720 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
721 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.count
= 0x0;
722 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
723 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
724 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_VTX
;
725 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
726 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
728 LinkVertexInstruction(pAsm
->cf_current_vtx_clause_ptr
, vertex_instruction_ptr
);
732 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.count
++;
735 AddVTXInstruction(pAsm
->pR700Shader
, vertex_instruction_ptr
);
740 GLboolean
add_tex_instruction(r700_AssemblerBase
* pAsm
,
741 R700TextureInstruction
* tex_instruction_ptr
)
743 if ( GL_FALSE
== check_current_clause(pAsm
, CF_TEX_CLAUSE
) )
748 if ( pAsm
->cf_current_tex_clause_ptr
== NULL
||
749 ( (pAsm
->cf_current_tex_clause_ptr
!= NULL
) &&
750 (pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.count
>= GetCFMaxInstructions(pAsm
->cf_current_tex_clause_ptr
->m_ShaderInstType
)-1)
753 // new tex cf instruction for this new clause
754 pAsm
->cf_current_tex_clause_ptr
= (R700ControlFlowGenericClause
*) CALLOC_STRUCT(R700ControlFlowGenericClause
);
756 if (pAsm
->cf_current_tex_clause_ptr
!= NULL
)
758 Init_R700ControlFlowGenericClause(pAsm
->cf_current_tex_clause_ptr
);
759 AddCFInstruction( pAsm
->pR700Shader
,
760 (R700ControlFlowInstruction
*)pAsm
->cf_current_tex_clause_ptr
);
764 radeon_error("Could not allocate a new TEX CF instruction.\n");
768 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.pop_count
= 0x0;
769 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
770 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
772 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
773 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
774 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_TEX
;
775 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
776 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.barrier
= 0x0; //0x1;
780 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.count
++;
783 // If this clause constains any TEX instruction that is dependent on a previous instruction,
784 // set the barrier bit
785 if( pAsm
->pInstDeps
[pAsm
->uiCurInst
].nDstDep
> (-1) || pAsm
->need_tex_barrier
== GL_TRUE
)
787 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
790 if(NULL
== pAsm
->cf_current_tex_clause_ptr
->m_pLinkedTEXInstruction
)
792 pAsm
->cf_current_tex_clause_ptr
->m_pLinkedTEXInstruction
= tex_instruction_ptr
;
793 tex_instruction_ptr
->m_pLinkedGenericClause
= pAsm
->cf_current_tex_clause_ptr
;
796 AddTEXInstruction(pAsm
->pR700Shader
, tex_instruction_ptr
);
801 GLboolean
assemble_vfetch_instruction(r700_AssemblerBase
* pAsm
,
803 GLuint destination_register
,
804 GLuint number_of_elements
,
805 GLenum dataElementType
,
806 VTX_FETCH_METHOD
* pFetchMethod
)
808 GLuint client_size_inbyte
;
810 GLuint mega_fetch_count
;
811 GLuint is_mega_fetch_flag
;
813 R700VertexGenericFetch
* vfetch_instruction_ptr
;
814 R700VertexGenericFetch
* assembled_vfetch_instruction_ptr
= pAsm
->vfetch_instruction_ptr_array
[ gl_client_id
];
816 if (assembled_vfetch_instruction_ptr
== NULL
)
818 vfetch_instruction_ptr
= (R700VertexGenericFetch
*) CALLOC_STRUCT(R700VertexGenericFetch
);
819 if (vfetch_instruction_ptr
== NULL
)
823 Init_R700VertexGenericFetch(vfetch_instruction_ptr
);
827 vfetch_instruction_ptr
= assembled_vfetch_instruction_ptr
;
830 data_format
= GetSurfaceFormat(dataElementType
, number_of_elements
, &client_size_inbyte
);
832 if(GL_TRUE
== pFetchMethod
->bEnableMini
) //More conditions here
835 mega_fetch_count
= 0;
836 is_mega_fetch_flag
= 0;
840 mega_fetch_count
= MEGA_FETCH_BYTES
- 1;
841 is_mega_fetch_flag
= 0x1;
842 pFetchMethod
->mega_fetch_remainder
= MEGA_FETCH_BYTES
- client_size_inbyte
;
845 vfetch_instruction_ptr
->m_Word0
.f
.vtx_inst
= SQ_VTX_INST_FETCH
;
846 vfetch_instruction_ptr
->m_Word0
.f
.fetch_type
= SQ_VTX_FETCH_VERTEX_DATA
;
847 vfetch_instruction_ptr
->m_Word0
.f
.fetch_whole_quad
= 0x0;
849 vfetch_instruction_ptr
->m_Word0
.f
.buffer_id
= gl_client_id
;
850 vfetch_instruction_ptr
->m_Word0
.f
.src_gpr
= 0x0;
851 vfetch_instruction_ptr
->m_Word0
.f
.src_rel
= SQ_ABSOLUTE
;
852 vfetch_instruction_ptr
->m_Word0
.f
.src_sel_x
= SQ_SEL_X
;
853 vfetch_instruction_ptr
->m_Word0
.f
.mega_fetch_count
= mega_fetch_count
;
855 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_x
= (number_of_elements
< 1) ? SQ_SEL_0
: SQ_SEL_X
;
856 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_y
= (number_of_elements
< 2) ? SQ_SEL_0
: SQ_SEL_Y
;
857 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_z
= (number_of_elements
< 3) ? SQ_SEL_0
: SQ_SEL_Z
;
858 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_w
= (number_of_elements
< 4) ? SQ_SEL_1
: SQ_SEL_W
;
860 vfetch_instruction_ptr
->m_Word1
.f
.use_const_fields
= 1;
862 // Destination register
863 vfetch_instruction_ptr
->m_Word1_GPR
.f
.dst_gpr
= destination_register
;
864 vfetch_instruction_ptr
->m_Word1_GPR
.f
.dst_rel
= SQ_ABSOLUTE
;
866 vfetch_instruction_ptr
->m_Word2
.f
.offset
= 0;
867 vfetch_instruction_ptr
->m_Word2
.f
.const_buf_no_stride
= 0x0;
869 vfetch_instruction_ptr
->m_Word2
.f
.mega_fetch
= is_mega_fetch_flag
;
871 if (assembled_vfetch_instruction_ptr
== NULL
)
873 if ( GL_FALSE
== add_vfetch_instruction(pAsm
, (R700VertexInstruction
*)vfetch_instruction_ptr
) )
878 if (pAsm
->vfetch_instruction_ptr_array
[ gl_client_id
] != NULL
)
884 pAsm
->vfetch_instruction_ptr_array
[ gl_client_id
] = vfetch_instruction_ptr
;
891 GLboolean
assemble_vfetch_instruction2(r700_AssemblerBase
* pAsm
,
892 GLuint destination_register
,
899 VTX_FETCH_METHOD
* pFetchMethod
)
901 GLuint client_size_inbyte
;
903 GLuint mega_fetch_count
;
904 GLuint is_mega_fetch_flag
;
906 R700VertexGenericFetch
* vfetch_instruction_ptr
;
907 R700VertexGenericFetch
* assembled_vfetch_instruction_ptr
908 = pAsm
->vfetch_instruction_ptr_array
[element
];
910 if (assembled_vfetch_instruction_ptr
== NULL
)
912 vfetch_instruction_ptr
= (R700VertexGenericFetch
*) CALLOC_STRUCT(R700VertexGenericFetch
);
913 if (vfetch_instruction_ptr
== NULL
)
917 Init_R700VertexGenericFetch(vfetch_instruction_ptr
);
921 vfetch_instruction_ptr
= assembled_vfetch_instruction_ptr
;
924 data_format
= GetSurfaceFormat(type
, size
, &client_size_inbyte
);
926 if(GL_TRUE
== pFetchMethod
->bEnableMini
) //More conditions here
929 mega_fetch_count
= 0;
930 is_mega_fetch_flag
= 0;
934 mega_fetch_count
= MEGA_FETCH_BYTES
- 1;
935 is_mega_fetch_flag
= 0x1;
936 pFetchMethod
->mega_fetch_remainder
= MEGA_FETCH_BYTES
- client_size_inbyte
;
939 vfetch_instruction_ptr
->m_Word0
.f
.vtx_inst
= SQ_VTX_INST_FETCH
;
940 vfetch_instruction_ptr
->m_Word0
.f
.fetch_type
= SQ_VTX_FETCH_VERTEX_DATA
;
941 vfetch_instruction_ptr
->m_Word0
.f
.fetch_whole_quad
= 0x0;
943 vfetch_instruction_ptr
->m_Word0
.f
.buffer_id
= element
;
944 vfetch_instruction_ptr
->m_Word0
.f
.src_gpr
= 0x0;
945 vfetch_instruction_ptr
->m_Word0
.f
.src_rel
= SQ_ABSOLUTE
;
946 vfetch_instruction_ptr
->m_Word0
.f
.src_sel_x
= SQ_SEL_X
;
947 vfetch_instruction_ptr
->m_Word0
.f
.mega_fetch_count
= mega_fetch_count
;
949 if(format
== GL_BGRA
)
951 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_x
= (size
< 1) ? SQ_SEL_0
: SQ_SEL_Z
;
952 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_y
= (size
< 2) ? SQ_SEL_0
: SQ_SEL_Y
;
953 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_z
= (size
< 3) ? SQ_SEL_0
: SQ_SEL_X
;
954 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_w
= (size
< 4) ? SQ_SEL_1
: SQ_SEL_W
;
958 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_x
= (size
< 1) ? SQ_SEL_0
: SQ_SEL_X
;
959 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_y
= (size
< 2) ? SQ_SEL_0
: SQ_SEL_Y
;
960 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_z
= (size
< 3) ? SQ_SEL_0
: SQ_SEL_Z
;
961 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_w
= (size
< 4) ? SQ_SEL_1
: SQ_SEL_W
;
965 vfetch_instruction_ptr
->m_Word1
.f
.use_const_fields
= 1;
966 vfetch_instruction_ptr
->m_Word1
.f
.data_format
= data_format
;
967 vfetch_instruction_ptr
->m_Word2
.f
.endian_swap
= SQ_ENDIAN_NONE
;
971 vfetch_instruction_ptr
->m_Word1
.f
.format_comp_all
= SQ_FORMAT_COMP_SIGNED
;
975 vfetch_instruction_ptr
->m_Word1
.f
.format_comp_all
= SQ_FORMAT_COMP_UNSIGNED
;
978 if(GL_TRUE
== normalize
)
980 vfetch_instruction_ptr
->m_Word1
.f
.num_format_all
= SQ_NUM_FORMAT_NORM
;
984 vfetch_instruction_ptr
->m_Word1
.f
.num_format_all
= SQ_NUM_FORMAT_INT
;
987 // Destination register
988 vfetch_instruction_ptr
->m_Word1_GPR
.f
.dst_gpr
= destination_register
;
989 vfetch_instruction_ptr
->m_Word1_GPR
.f
.dst_rel
= SQ_ABSOLUTE
;
991 vfetch_instruction_ptr
->m_Word2
.f
.offset
= 0;
992 vfetch_instruction_ptr
->m_Word2
.f
.const_buf_no_stride
= 0x0;
994 vfetch_instruction_ptr
->m_Word2
.f
.mega_fetch
= is_mega_fetch_flag
;
996 if (assembled_vfetch_instruction_ptr
== NULL
)
998 if ( GL_FALSE
== add_vfetch_instruction(pAsm
, (R700VertexInstruction
*)vfetch_instruction_ptr
) )
1003 if (pAsm
->vfetch_instruction_ptr_array
[element
] != NULL
)
1009 pAsm
->vfetch_instruction_ptr_array
[element
] = vfetch_instruction_ptr
;
1016 GLboolean
cleanup_vfetch_instructions(r700_AssemblerBase
* pAsm
)
1019 pAsm
->cf_current_clause_type
= CF_EMPTY_CLAUSE
;
1020 pAsm
->cf_current_vtx_clause_ptr
= NULL
;
1022 for (i
=0; i
<VERT_ATTRIB_MAX
; i
++)
1024 pAsm
->vfetch_instruction_ptr_array
[ i
] = NULL
;
1027 cleanup_vfetch_shaderinst(pAsm
->pR700Shader
);
1032 GLuint
gethelpr(r700_AssemblerBase
* pAsm
)
1034 GLuint r
= pAsm
->uHelpReg
;
1036 if (pAsm
->uHelpReg
> pAsm
->number_used_registers
)
1038 pAsm
->number_used_registers
= pAsm
->uHelpReg
;
1042 void resethelpr(r700_AssemblerBase
* pAsm
)
1044 pAsm
->uHelpReg
= pAsm
->uFirstHelpReg
;
1047 void checkop_init(r700_AssemblerBase
* pAsm
)
1050 pAsm
->aArgSubst
[0] =
1051 pAsm
->aArgSubst
[1] =
1052 pAsm
->aArgSubst
[2] =
1053 pAsm
->aArgSubst
[3] = -1;
1056 static GLboolean
next_ins(r700_AssemblerBase
*pAsm
)
1058 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1060 if (GL_TRUE
== pAsm
->is_tex
)
1062 if (pILInst
->TexSrcTarget
== TEXTURE_RECT_INDEX
)
1064 if (GL_FALSE
== assemble_tex_instruction(pAsm
, GL_FALSE
))
1066 radeon_error("Error assembling TEX instruction\n");
1072 if (GL_FALSE
== assemble_tex_instruction(pAsm
, GL_TRUE
))
1074 radeon_error("Error assembling TEX instruction\n");
1081 if (GL_FALSE
== assemble_alu_instruction(pAsm
))
1083 radeon_error("Error assembling ALU instruction\n");
1088 if (pAsm
->D
.dst
.rtype
== DST_REG_OUT
)
1090 assert(pAsm
->D
.dst
.reg
>= pAsm
->starting_export_register_number
);
1092 if (pAsm
->D
.dst
.op3
)
1094 // There is no mask for OP3 instructions, so all channels are written
1095 pAsm
->pucOutMask
[pAsm
->D
.dst
.reg
- pAsm
->starting_export_register_number
] = 0xF;
1099 pAsm
->pucOutMask
[pAsm
->D
.dst
.reg
- pAsm
->starting_export_register_number
]
1100 |= (unsigned char)pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
;
1104 //reset for next inst.
1107 pAsm
->S
[0].bits
= 0;
1108 pAsm
->S
[1].bits
= 0;
1109 pAsm
->S
[2].bits
= 0;
1110 pAsm
->is_tex
= GL_FALSE
;
1111 pAsm
->need_tex_barrier
= GL_FALSE
;
1113 pAsm
->C
[0].bits
= pAsm
->C
[1].bits
= pAsm
->C
[2].bits
= pAsm
->C
[3].bits
= 0;
1117 GLboolean
mov_temp(r700_AssemblerBase
* pAsm
, int src
)
1119 GLuint tmp
= gethelpr(pAsm
);
1121 //mov src to temp helper gpr.
1122 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
1124 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1126 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
1127 pAsm
->D
.dst
.reg
= tmp
;
1129 nomask_PVSDST(&(pAsm
->D
.dst
));
1131 if( GL_FALSE
== assemble_src(pAsm
, src
, 0) )
1136 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
1137 noneg_PVSSRC(&(pAsm
->S
[0].src
));
1139 if( GL_FALSE
== next_ins(pAsm
) )
1144 pAsm
->aArgSubst
[1 + src
] = tmp
;
1149 GLboolean
checkop1(r700_AssemblerBase
* pAsm
)
1155 GLboolean
checkop2(r700_AssemblerBase
* pAsm
)
1157 GLboolean bSrcConst
[2];
1158 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1162 if( (pILInst
->SrcReg
[0].File
== PROGRAM_UNIFORM
) ||
1163 (pILInst
->SrcReg
[0].File
== PROGRAM_CONSTANT
) ||
1164 (pILInst
->SrcReg
[0].File
== PROGRAM_LOCAL_PARAM
) ||
1165 (pILInst
->SrcReg
[0].File
== PROGRAM_ENV_PARAM
) ||
1166 (pILInst
->SrcReg
[0].File
== PROGRAM_STATE_VAR
) )
1168 bSrcConst
[0] = GL_TRUE
;
1172 bSrcConst
[0] = GL_FALSE
;
1174 if( (pILInst
->SrcReg
[1].File
== PROGRAM_UNIFORM
) ||
1175 (pILInst
->SrcReg
[1].File
== PROGRAM_CONSTANT
) ||
1176 (pILInst
->SrcReg
[1].File
== PROGRAM_LOCAL_PARAM
) ||
1177 (pILInst
->SrcReg
[1].File
== PROGRAM_ENV_PARAM
) ||
1178 (pILInst
->SrcReg
[1].File
== PROGRAM_STATE_VAR
) )
1180 bSrcConst
[1] = GL_TRUE
;
1184 bSrcConst
[1] = GL_FALSE
;
1187 if( (bSrcConst
[0] == GL_TRUE
) && (bSrcConst
[1] == GL_TRUE
) )
1189 if(pILInst
->SrcReg
[0].Index
!= pILInst
->SrcReg
[1].Index
)
1191 if( GL_FALSE
== mov_temp(pAsm
, 1) )
1201 GLboolean
checkop3(r700_AssemblerBase
* pAsm
)
1203 GLboolean bSrcConst
[3];
1204 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1208 if( (pILInst
->SrcReg
[0].File
== PROGRAM_UNIFORM
) ||
1209 (pILInst
->SrcReg
[0].File
== PROGRAM_CONSTANT
) ||
1210 (pILInst
->SrcReg
[0].File
== PROGRAM_LOCAL_PARAM
) ||
1211 (pILInst
->SrcReg
[0].File
== PROGRAM_ENV_PARAM
) ||
1212 (pILInst
->SrcReg
[0].File
== PROGRAM_STATE_VAR
) )
1214 bSrcConst
[0] = GL_TRUE
;
1218 bSrcConst
[0] = GL_FALSE
;
1220 if( (pILInst
->SrcReg
[1].File
== PROGRAM_UNIFORM
) ||
1221 (pILInst
->SrcReg
[1].File
== PROGRAM_CONSTANT
) ||
1222 (pILInst
->SrcReg
[1].File
== PROGRAM_LOCAL_PARAM
) ||
1223 (pILInst
->SrcReg
[1].File
== PROGRAM_ENV_PARAM
) ||
1224 (pILInst
->SrcReg
[1].File
== PROGRAM_STATE_VAR
) )
1226 bSrcConst
[1] = GL_TRUE
;
1230 bSrcConst
[1] = GL_FALSE
;
1232 if( (pILInst
->SrcReg
[2].File
== PROGRAM_UNIFORM
) ||
1233 (pILInst
->SrcReg
[2].File
== PROGRAM_CONSTANT
) ||
1234 (pILInst
->SrcReg
[2].File
== PROGRAM_LOCAL_PARAM
) ||
1235 (pILInst
->SrcReg
[2].File
== PROGRAM_ENV_PARAM
) ||
1236 (pILInst
->SrcReg
[2].File
== PROGRAM_STATE_VAR
) )
1238 bSrcConst
[2] = GL_TRUE
;
1242 bSrcConst
[2] = GL_FALSE
;
1245 if( (GL_TRUE
== bSrcConst
[0]) &&
1246 (GL_TRUE
== bSrcConst
[1]) &&
1247 (GL_TRUE
== bSrcConst
[2]) )
1249 if( GL_FALSE
== mov_temp(pAsm
, 1) )
1253 if( GL_FALSE
== mov_temp(pAsm
, 2) )
1260 else if( (GL_TRUE
== bSrcConst
[0]) &&
1261 (GL_TRUE
== bSrcConst
[1]) )
1263 if(pILInst
->SrcReg
[0].Index
!= pILInst
->SrcReg
[1].Index
)
1265 if( GL_FALSE
== mov_temp(pAsm
, 1) )
1273 else if ( (GL_TRUE
== bSrcConst
[0]) &&
1274 (GL_TRUE
== bSrcConst
[2]) )
1276 if(pILInst
->SrcReg
[0].Index
!= pILInst
->SrcReg
[2].Index
)
1278 if( GL_FALSE
== mov_temp(pAsm
, 2) )
1286 else if( (GL_TRUE
== bSrcConst
[1]) &&
1287 (GL_TRUE
== bSrcConst
[2]) )
1289 if(pILInst
->SrcReg
[1].Index
!= pILInst
->SrcReg
[2].Index
)
1291 if( GL_FALSE
== mov_temp(pAsm
, 2) )
1303 GLboolean
assemble_src(r700_AssemblerBase
*pAsm
,
1307 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1314 if(pAsm
->aArgSubst
[1+src
] >= 0)
1317 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1318 pAsm
->S
[fld
].src
.rtype
= SRC_REG_TEMPORARY
;
1319 pAsm
->S
[fld
].src
.reg
= pAsm
->aArgSubst
[1+src
];
1323 switch (pILInst
->SrcReg
[src
].File
)
1325 case PROGRAM_TEMPORARY
:
1326 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1327 pAsm
->S
[fld
].src
.rtype
= SRC_REG_TEMPORARY
;
1328 pAsm
->S
[fld
].src
.reg
= pILInst
->SrcReg
[src
].Index
+ pAsm
->starting_temp_register_number
;
1330 case PROGRAM_CONSTANT
:
1331 case PROGRAM_LOCAL_PARAM
:
1332 case PROGRAM_ENV_PARAM
:
1333 case PROGRAM_STATE_VAR
:
1334 case PROGRAM_UNIFORM
:
1335 if (1 == pILInst
->SrcReg
[src
].RelAddr
)
1337 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_RELATIVE_A0
);
1341 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1344 pAsm
->S
[fld
].src
.rtype
= SRC_REG_CONSTANT
;
1345 if(pILInst
->SrcReg
[src
].Index
< 0)
1347 WARN_ONCE("Negative register offsets not supported yet!\n");
1348 pAsm
->S
[fld
].src
.reg
= 0;
1352 pAsm
->S
[fld
].src
.reg
= pILInst
->SrcReg
[src
].Index
;
1356 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1357 pAsm
->S
[fld
].src
.rtype
= SRC_REG_INPUT
;
1358 switch (pAsm
->currentShaderType
)
1361 pAsm
->S
[fld
].src
.reg
= pAsm
->uiFP_AttributeMap
[pILInst
->SrcReg
[src
].Index
];
1364 pAsm
->S
[fld
].src
.reg
= pAsm
->ucVP_AttributeMap
[pILInst
->SrcReg
[src
].Index
];
1369 radeon_error("Invalid source argument type : %d \n", pILInst
->SrcReg
[src
].File
);
1374 pAsm
->S
[fld
].src
.swizzlex
= pILInst
->SrcReg
[src
].Swizzle
& 0x7;
1375 pAsm
->S
[fld
].src
.swizzley
= (pILInst
->SrcReg
[src
].Swizzle
>> 3) & 0x7;
1376 pAsm
->S
[fld
].src
.swizzlez
= (pILInst
->SrcReg
[src
].Swizzle
>> 6) & 0x7;
1377 pAsm
->S
[fld
].src
.swizzlew
= (pILInst
->SrcReg
[src
].Swizzle
>> 9) & 0x7;
1379 pAsm
->S
[fld
].src
.negx
= pILInst
->SrcReg
[src
].Negate
& 0x1;
1380 pAsm
->S
[fld
].src
.negy
= (pILInst
->SrcReg
[src
].Negate
>> 1) & 0x1;
1381 pAsm
->S
[fld
].src
.negz
= (pILInst
->SrcReg
[src
].Negate
>> 2) & 0x1;
1382 pAsm
->S
[fld
].src
.negw
= (pILInst
->SrcReg
[src
].Negate
>> 3) & 0x1;
1387 GLboolean
assemble_dst(r700_AssemblerBase
*pAsm
)
1389 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1390 switch (pILInst
->DstReg
.File
)
1392 case PROGRAM_TEMPORARY
:
1393 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1394 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
1395 pAsm
->D
.dst
.reg
= pILInst
->DstReg
.Index
+ pAsm
->starting_temp_register_number
;
1397 case PROGRAM_ADDRESS
:
1398 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1399 pAsm
->D
.dst
.rtype
= DST_REG_A0
;
1400 pAsm
->D
.dst
.reg
= 0;
1402 case PROGRAM_OUTPUT
:
1403 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1404 pAsm
->D
.dst
.rtype
= DST_REG_OUT
;
1405 switch (pAsm
->currentShaderType
)
1408 pAsm
->D
.dst
.reg
= pAsm
->uiFP_OutputMap
[pILInst
->DstReg
.Index
];
1411 pAsm
->D
.dst
.reg
= pAsm
->ucVP_OutputMap
[pILInst
->DstReg
.Index
];
1416 radeon_error("Invalid destination output argument type\n");
1420 pAsm
->D
.dst
.writex
= pILInst
->DstReg
.WriteMask
& 0x1;
1421 pAsm
->D
.dst
.writey
= (pILInst
->DstReg
.WriteMask
>> 1) & 0x1;
1422 pAsm
->D
.dst
.writez
= (pILInst
->DstReg
.WriteMask
>> 2) & 0x1;
1423 pAsm
->D
.dst
.writew
= (pILInst
->DstReg
.WriteMask
>> 3) & 0x1;
1425 if(pILInst
->SaturateMode
== SATURATE_ZERO_ONE
)
1427 pAsm
->D2
.dst2
.SaturateMode
= 1;
1431 pAsm
->D2
.dst2
.SaturateMode
= 0;
1437 GLboolean
tex_dst(r700_AssemblerBase
*pAsm
)
1439 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1441 if(PROGRAM_TEMPORARY
== pILInst
->DstReg
.File
)
1443 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
1444 pAsm
->D
.dst
.reg
= pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.Index
+ pAsm
->starting_temp_register_number
;
1446 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1448 else if(PROGRAM_OUTPUT
== pILInst
->DstReg
.File
)
1450 pAsm
->D
.dst
.rtype
= DST_REG_OUT
;
1451 switch (pAsm
->currentShaderType
)
1454 pAsm
->D
.dst
.reg
= pAsm
->uiFP_OutputMap
[pILInst
->DstReg
.Index
];
1457 pAsm
->D
.dst
.reg
= pAsm
->ucVP_OutputMap
[pILInst
->DstReg
.Index
];
1461 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1465 radeon_error("Invalid destination output argument type\n");
1469 pAsm
->D
.dst
.writex
= pILInst
->DstReg
.WriteMask
& 0x1;
1470 pAsm
->D
.dst
.writey
= (pILInst
->DstReg
.WriteMask
>> 1) & 0x1;
1471 pAsm
->D
.dst
.writez
= (pILInst
->DstReg
.WriteMask
>> 2) & 0x1;
1472 pAsm
->D
.dst
.writew
= (pILInst
->DstReg
.WriteMask
>> 3) & 0x1;
1477 GLboolean
tex_src(r700_AssemblerBase
*pAsm
)
1479 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1481 GLboolean bValidTexCoord
= GL_FALSE
;
1483 if(pAsm
->aArgSubst
[1] >= 0)
1485 bValidTexCoord
= GL_TRUE
;
1486 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
1487 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
1488 pAsm
->S
[0].src
.reg
= pAsm
->aArgSubst
[1];
1492 switch (pILInst
->SrcReg
[0].File
) {
1493 case PROGRAM_UNIFORM
:
1494 case PROGRAM_CONSTANT
:
1495 case PROGRAM_LOCAL_PARAM
:
1496 case PROGRAM_ENV_PARAM
:
1497 case PROGRAM_STATE_VAR
:
1499 case PROGRAM_TEMPORARY
:
1500 bValidTexCoord
= GL_TRUE
;
1501 pAsm
->S
[0].src
.reg
= pILInst
->SrcReg
[0].Index
+
1502 pAsm
->starting_temp_register_number
;
1503 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
1506 if(SPT_VP
== pAsm
->currentShaderType
)
1508 switch (pILInst
->SrcReg
[0].Index
)
1510 case VERT_ATTRIB_TEX0
:
1511 case VERT_ATTRIB_TEX1
:
1512 case VERT_ATTRIB_TEX2
:
1513 case VERT_ATTRIB_TEX3
:
1514 case VERT_ATTRIB_TEX4
:
1515 case VERT_ATTRIB_TEX5
:
1516 case VERT_ATTRIB_TEX6
:
1517 case VERT_ATTRIB_TEX7
:
1518 bValidTexCoord
= GL_TRUE
;
1519 pAsm
->S
[0].src
.reg
=
1520 pAsm
->ucVP_AttributeMap
[pILInst
->SrcReg
[0].Index
];
1521 pAsm
->S
[0].src
.rtype
= SRC_REG_INPUT
;
1527 switch (pILInst
->SrcReg
[0].Index
)
1529 case FRAG_ATTRIB_WPOS
:
1530 case FRAG_ATTRIB_COL0
:
1531 case FRAG_ATTRIB_COL1
:
1532 case FRAG_ATTRIB_FOGC
:
1533 case FRAG_ATTRIB_TEX0
:
1534 case FRAG_ATTRIB_TEX1
:
1535 case FRAG_ATTRIB_TEX2
:
1536 case FRAG_ATTRIB_TEX3
:
1537 case FRAG_ATTRIB_TEX4
:
1538 case FRAG_ATTRIB_TEX5
:
1539 case FRAG_ATTRIB_TEX6
:
1540 case FRAG_ATTRIB_TEX7
:
1541 bValidTexCoord
= GL_TRUE
;
1542 pAsm
->S
[0].src
.reg
=
1543 pAsm
->uiFP_AttributeMap
[pILInst
->SrcReg
[0].Index
];
1544 pAsm
->S
[0].src
.rtype
= SRC_REG_INPUT
;
1546 case FRAG_ATTRIB_FACE
:
1547 fprintf(stderr
, "FRAG_ATTRIB_FACE unsupported\n");
1549 case FRAG_ATTRIB_PNTC
:
1550 fprintf(stderr
, "FRAG_ATTRIB_PNTC unsupported\n");
1554 if( (pILInst
->SrcReg
[0].Index
>= FRAG_ATTRIB_VAR0
) ||
1555 (pILInst
->SrcReg
[0].Index
< FRAG_ATTRIB_MAX
) )
1557 bValidTexCoord
= GL_TRUE
;
1558 pAsm
->S
[0].src
.reg
=
1559 pAsm
->uiFP_AttributeMap
[pILInst
->SrcReg
[0].Index
];
1560 pAsm
->S
[0].src
.rtype
= SRC_REG_INPUT
;
1568 if(GL_TRUE
== bValidTexCoord
)
1570 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
1574 radeon_error("Invalid source texcoord for TEX instruction\n");
1578 pAsm
->S
[0].src
.swizzlex
= pILInst
->SrcReg
[0].Swizzle
& 0x7;
1579 pAsm
->S
[0].src
.swizzley
= (pILInst
->SrcReg
[0].Swizzle
>> 3) & 0x7;
1580 pAsm
->S
[0].src
.swizzlez
= (pILInst
->SrcReg
[0].Swizzle
>> 6) & 0x7;
1581 pAsm
->S
[0].src
.swizzlew
= (pILInst
->SrcReg
[0].Swizzle
>> 9) & 0x7;
1583 pAsm
->S
[0].src
.negx
= pILInst
->SrcReg
[0].Negate
& 0x1;
1584 pAsm
->S
[0].src
.negy
= (pILInst
->SrcReg
[0].Negate
>> 1) & 0x1;
1585 pAsm
->S
[0].src
.negz
= (pILInst
->SrcReg
[0].Negate
>> 2) & 0x1;
1586 pAsm
->S
[0].src
.negw
= (pILInst
->SrcReg
[0].Negate
>> 3) & 0x1;
1591 GLboolean
assemble_tex_instruction(r700_AssemblerBase
*pAsm
, GLboolean normalized
)
1593 PVSSRC
* texture_coordinate_source
;
1594 PVSSRC
* texture_unit_source
;
1596 R700TextureInstruction
* tex_instruction_ptr
= (R700TextureInstruction
*) CALLOC_STRUCT(R700TextureInstruction
);
1597 if (tex_instruction_ptr
== NULL
)
1601 Init_R700TextureInstruction(tex_instruction_ptr
);
1603 texture_coordinate_source
= &(pAsm
->S
[0].src
);
1604 texture_unit_source
= &(pAsm
->S
[1].src
);
1606 tex_instruction_ptr
->m_Word0
.f
.tex_inst
= pAsm
->D
.dst
.opcode
;
1607 tex_instruction_ptr
->m_Word0
.f
.bc_frac_mode
= 0x0;
1608 tex_instruction_ptr
->m_Word0
.f
.fetch_whole_quad
= 0x0;
1609 tex_instruction_ptr
->m_Word0
.f
.alt_const
= 0;
1611 if(SPT_VP
== pAsm
->currentShaderType
)
1613 tex_instruction_ptr
->m_Word0
.f
.resource_id
= texture_unit_source
->reg
+ VERT_ATTRIB_MAX
;
1614 pAsm
->unVetTexBits
|= 1 << texture_unit_source
->reg
;
1618 tex_instruction_ptr
->m_Word0
.f
.resource_id
= texture_unit_source
->reg
;
1621 tex_instruction_ptr
->m_Word1
.f
.lod_bias
= 0x0;
1623 tex_instruction_ptr
->m_Word1
.f
.coord_type_x
= SQ_TEX_NORMALIZED
;
1624 tex_instruction_ptr
->m_Word1
.f
.coord_type_y
= SQ_TEX_NORMALIZED
;
1625 tex_instruction_ptr
->m_Word1
.f
.coord_type_z
= SQ_TEX_NORMALIZED
;
1626 tex_instruction_ptr
->m_Word1
.f
.coord_type_w
= SQ_TEX_NORMALIZED
;
1628 /* XXX: UNNORMALIZED tex coords have limited wrap modes */
1629 tex_instruction_ptr
->m_Word1
.f
.coord_type_x
= SQ_TEX_UNNORMALIZED
;
1630 tex_instruction_ptr
->m_Word1
.f
.coord_type_y
= SQ_TEX_UNNORMALIZED
;
1631 tex_instruction_ptr
->m_Word1
.f
.coord_type_z
= SQ_TEX_UNNORMALIZED
;
1632 tex_instruction_ptr
->m_Word1
.f
.coord_type_w
= SQ_TEX_UNNORMALIZED
;
1635 tex_instruction_ptr
->m_Word2
.f
.offset_x
= 0x0;
1636 tex_instruction_ptr
->m_Word2
.f
.offset_y
= 0x0;
1637 tex_instruction_ptr
->m_Word2
.f
.offset_z
= 0x0;
1638 tex_instruction_ptr
->m_Word2
.f
.sampler_id
= texture_unit_source
->reg
;
1641 if ( (pAsm
->D
.dst
.rtype
== DST_REG_TEMPORARY
) ||
1642 (pAsm
->D
.dst
.rtype
== DST_REG_OUT
) )
1644 tex_instruction_ptr
->m_Word0
.f
.src_gpr
= texture_coordinate_source
->reg
;
1645 tex_instruction_ptr
->m_Word0
.f
.src_rel
= SQ_ABSOLUTE
;
1647 tex_instruction_ptr
->m_Word1
.f
.dst_gpr
= pAsm
->D
.dst
.reg
;
1648 tex_instruction_ptr
->m_Word1
.f
.dst_rel
= SQ_ABSOLUTE
;
1650 tex_instruction_ptr
->m_Word1
.f
.dst_sel_x
= (pAsm
->D
.dst
.writex
? texture_unit_source
->swizzlex
: SQ_SEL_MASK
);
1651 tex_instruction_ptr
->m_Word1
.f
.dst_sel_y
= (pAsm
->D
.dst
.writey
? texture_unit_source
->swizzley
: SQ_SEL_MASK
);
1652 tex_instruction_ptr
->m_Word1
.f
.dst_sel_z
= (pAsm
->D
.dst
.writez
? texture_unit_source
->swizzlez
: SQ_SEL_MASK
);
1653 tex_instruction_ptr
->m_Word1
.f
.dst_sel_w
= (pAsm
->D
.dst
.writew
? texture_unit_source
->swizzlew
: SQ_SEL_MASK
);
1656 tex_instruction_ptr
->m_Word2
.f
.src_sel_x
= texture_coordinate_source
->swizzlex
;
1657 tex_instruction_ptr
->m_Word2
.f
.src_sel_y
= texture_coordinate_source
->swizzley
;
1658 tex_instruction_ptr
->m_Word2
.f
.src_sel_z
= texture_coordinate_source
->swizzlez
;
1659 tex_instruction_ptr
->m_Word2
.f
.src_sel_w
= texture_coordinate_source
->swizzlew
;
1663 radeon_error("Only temp destination registers supported for TEX dest regs.\n");
1667 if( GL_FALSE
== add_tex_instruction(pAsm
, tex_instruction_ptr
) )
1675 void initialize(r700_AssemblerBase
*pAsm
)
1677 GLuint cycle
, component
;
1679 for (cycle
=0; cycle
<NUMBER_OF_CYCLES
; cycle
++)
1681 for (component
=0; component
<NUMBER_OF_COMPONENTS
; component
++)
1683 pAsm
->hw_gpr
[cycle
][component
] = (-1);
1686 for (component
=0; component
<NUMBER_OF_COMPONENTS
; component
++)
1688 pAsm
->hw_cfile_addr
[component
] = (-1);
1689 pAsm
->hw_cfile_chan
[component
] = (-1);
1693 GLboolean
assemble_alu_src(R700ALUInstruction
* alu_instruction_ptr
,
1696 BITS scalar_channel_index
)
1703 //--------------------------------------------------------------------------
1704 // Source for operands src0, src1.
1705 // Values [0,127] correspond to GPR[0..127].
1706 // Values [256,511] correspond to cfile constants c[0..255].
1708 //--------------------------------------------------------------------------
1709 // Other special values are shown in the list below.
1711 // 248 SQ_ALU_SRC_0: special constant 0.0.
1712 // 249 SQ_ALU_SRC_1: special constant 1.0 float.
1714 // 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
1715 // 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
1717 // 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
1718 // 253 SQ_ALU_SRC_LITERAL: literal constant.
1720 // 254 SQ_ALU_SRC_PV: previous vector result.
1721 // 255 SQ_ALU_SRC_PS: previous scalar result.
1722 //--------------------------------------------------------------------------
1724 BITS channel_swizzle
;
1725 switch (scalar_channel_index
)
1727 case 0: channel_swizzle
= pSource
->swizzlex
; break;
1728 case 1: channel_swizzle
= pSource
->swizzley
; break;
1729 case 2: channel_swizzle
= pSource
->swizzlez
; break;
1730 case 3: channel_swizzle
= pSource
->swizzlew
; break;
1731 default: channel_swizzle
= SQ_SEL_MASK
; break;
1734 if(channel_swizzle
== SQ_SEL_0
)
1736 src_sel
= SQ_ALU_SRC_0
;
1738 else if (channel_swizzle
== SQ_SEL_1
)
1740 src_sel
= SQ_ALU_SRC_1
;
1744 if ( (pSource
->rtype
== SRC_REG_TEMPORARY
) ||
1745 (pSource
->rtype
== SRC_REG_INPUT
)
1748 src_sel
= pSource
->reg
;
1750 else if (pSource
->rtype
== SRC_REG_CONSTANT
)
1752 src_sel
= pSource
->reg
+ CFILE_REGISTER_OFFSET
;
1754 else if (pSource
->rtype
== SRC_REC_LITERAL
)
1756 src_sel
= SQ_ALU_SRC_LITERAL
;
1760 radeon_error("Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.\n",
1761 source_index
, pSource
->rtype
);
1766 if( ADDR_ABSOLUTE
== addrmode_PVSSRC(pSource
) )
1768 src_rel
= SQ_ABSOLUTE
;
1772 src_rel
= SQ_RELATIVE
;
1775 switch (channel_swizzle
)
1778 src_chan
= SQ_CHAN_X
;
1781 src_chan
= SQ_CHAN_Y
;
1784 src_chan
= SQ_CHAN_Z
;
1787 src_chan
= SQ_CHAN_W
;
1791 // Does not matter since src_sel controls
1792 src_chan
= SQ_CHAN_X
;
1795 radeon_error("Unknown source select value (%d) in assemble_alu_src().\n", channel_swizzle
);
1800 switch (scalar_channel_index
)
1802 case 0: src_neg
= pSource
->negx
; break;
1803 case 1: src_neg
= pSource
->negy
; break;
1804 case 2: src_neg
= pSource
->negz
; break;
1805 case 3: src_neg
= pSource
->negw
; break;
1806 default: src_neg
= 0; break;
1809 switch (source_index
)
1812 assert(alu_instruction_ptr
);
1813 alu_instruction_ptr
->m_Word0
.f
.src0_sel
= src_sel
;
1814 alu_instruction_ptr
->m_Word0
.f
.src0_rel
= src_rel
;
1815 alu_instruction_ptr
->m_Word0
.f
.src0_chan
= src_chan
;
1816 alu_instruction_ptr
->m_Word0
.f
.src0_neg
= src_neg
;
1819 assert(alu_instruction_ptr
);
1820 alu_instruction_ptr
->m_Word0
.f
.src1_sel
= src_sel
;
1821 alu_instruction_ptr
->m_Word0
.f
.src1_rel
= src_rel
;
1822 alu_instruction_ptr
->m_Word0
.f
.src1_chan
= src_chan
;
1823 alu_instruction_ptr
->m_Word0
.f
.src1_neg
= src_neg
;
1826 assert(alu_instruction_ptr
);
1827 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_sel
= src_sel
;
1828 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_rel
= src_rel
;
1829 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_chan
= src_chan
;
1830 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_neg
= src_neg
;
1833 radeon_error("Only three sources allowed in ALU opcodes.\n");
1841 GLboolean
add_alu_instruction(r700_AssemblerBase
* pAsm
,
1842 R700ALUInstruction
* alu_instruction_ptr
,
1843 GLuint contiguous_slots_needed
)
1845 if( GL_FALSE
== check_current_clause(pAsm
, CF_ALU_CLAUSE
) )
1850 if ( pAsm
->alu_x_opcode
!= 0 ||
1851 pAsm
->cf_current_alu_clause_ptr
== NULL
||
1852 ( (pAsm
->cf_current_alu_clause_ptr
!= NULL
) &&
1853 (pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
>= (GetCFMaxInstructions(pAsm
->cf_current_alu_clause_ptr
->m_ShaderInstType
)-contiguous_slots_needed
-1) )
1857 //new cf inst for this clause
1858 pAsm
->cf_current_alu_clause_ptr
= (R700ControlFlowALUClause
*) CALLOC_STRUCT(R700ControlFlowALUClause
);
1860 // link the new cf to cf segment
1861 if(NULL
!= pAsm
->cf_current_alu_clause_ptr
)
1863 Init_R700ControlFlowALUClause(pAsm
->cf_current_alu_clause_ptr
);
1864 AddCFInstruction( pAsm
->pR700Shader
,
1865 (R700ControlFlowInstruction
*)pAsm
->cf_current_alu_clause_ptr
);
1869 radeon_error("Could not allocate a new ALU CF instruction.\n");
1873 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_bank0
= 0x0;
1874 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_bank1
= 0x0;
1875 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_mode0
= SQ_CF_KCACHE_NOP
;
1877 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_mode1
= SQ_CF_KCACHE_NOP
;
1878 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_addr0
= 0x0;
1879 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_addr1
= 0x0;
1881 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
= 0x0;
1883 if(pAsm
->alu_x_opcode
!= 0)
1885 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.cf_inst
= pAsm
->alu_x_opcode
;
1886 pAsm
->alu_x_opcode
= 0;
1890 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_ALU
;
1893 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
1895 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
1899 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
+= (GetInstructionSize(alu_instruction_ptr
->m_ShaderInstType
) / 2);
1902 // If this clause constains any instruction that is forward dependent on a TEX instruction,
1903 // set the whole_quad_mode for this clause
1904 if ( pAsm
->pInstDeps
[pAsm
->uiCurInst
].nDstDep
> (-1) )
1906 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x1;
1909 if (pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
>= (GetCFMaxInstructions(pAsm
->cf_current_alu_clause_ptr
->m_ShaderInstType
)-1) )
1911 alu_instruction_ptr
->m_Word0
.f
.last
= 1;
1914 if(NULL
== pAsm
->cf_current_alu_clause_ptr
->m_pLinkedALUInstruction
)
1916 pAsm
->cf_current_alu_clause_ptr
->m_pLinkedALUInstruction
= alu_instruction_ptr
;
1917 alu_instruction_ptr
->m_pLinkedALUClause
= pAsm
->cf_current_alu_clause_ptr
;
1920 AddALUInstruction(pAsm
->pR700Shader
, alu_instruction_ptr
);
1925 void get_src_properties(R700ALUInstruction
* alu_instruction_ptr
,
1932 switch (source_index
)
1935 *psrc_sel
= alu_instruction_ptr
->m_Word0
.f
.src0_sel
;
1936 *psrc_rel
= alu_instruction_ptr
->m_Word0
.f
.src0_rel
;
1937 *psrc_chan
= alu_instruction_ptr
->m_Word0
.f
.src0_chan
;
1938 *psrc_neg
= alu_instruction_ptr
->m_Word0
.f
.src0_neg
;
1942 *psrc_sel
= alu_instruction_ptr
->m_Word0
.f
.src1_sel
;
1943 *psrc_rel
= alu_instruction_ptr
->m_Word0
.f
.src1_rel
;
1944 *psrc_chan
= alu_instruction_ptr
->m_Word0
.f
.src1_chan
;
1945 *psrc_neg
= alu_instruction_ptr
->m_Word0
.f
.src1_neg
;
1949 *psrc_sel
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_sel
;
1950 *psrc_rel
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_rel
;
1951 *psrc_chan
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_chan
;
1952 *psrc_neg
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_neg
;
1957 int is_cfile(BITS sel
)
1959 if (sel
> 255 && sel
< 512)
1966 int is_const(BITS sel
)
1972 else if(sel
>= SQ_ALU_SRC_0
&& sel
<= SQ_ALU_SRC_LITERAL
)
1979 int is_gpr(BITS sel
)
1981 if (sel
>= 0 && sel
< 128)
1988 const GLuint BANK_SWIZZLE_VEC
[8] = {SQ_ALU_VEC_210
, //000
1989 SQ_ALU_VEC_120
, //001
1990 SQ_ALU_VEC_102
, //010
1992 SQ_ALU_VEC_201
, //011
1993 SQ_ALU_VEC_012
, //100
1994 SQ_ALU_VEC_021
, //101
1996 SQ_ALU_VEC_012
, //110
1997 SQ_ALU_VEC_012
}; //111
1999 const GLuint BANK_SWIZZLE_SCL
[8] = {SQ_ALU_SCL_210
, //000
2000 SQ_ALU_SCL_122
, //001
2001 SQ_ALU_SCL_122
, //010
2003 SQ_ALU_SCL_221
, //011
2004 SQ_ALU_SCL_212
, //100
2005 SQ_ALU_SCL_122
, //101
2007 SQ_ALU_SCL_122
, //110
2008 SQ_ALU_SCL_122
}; //111
2010 GLboolean
reserve_cfile(r700_AssemblerBase
* pAsm
,
2014 int res_match
= (-1);
2015 int res_empty
= (-1);
2019 for (res
=3; res
>=0; res
--)
2021 if(pAsm
->hw_cfile_addr
[ res
] < 0)
2025 else if( (pAsm
->hw_cfile_addr
[res
] == (int)sel
)
2027 (pAsm
->hw_cfile_chan
[ res
] == (int) chan
) )
2035 // Read for this scalar component already reserved, nothing to do here.
2038 else if(res_empty
>= 0)
2040 pAsm
->hw_cfile_addr
[ res_empty
] = sel
;
2041 pAsm
->hw_cfile_chan
[ res_empty
] = chan
;
2045 radeon_error("All cfile read ports are used, cannot reference C$sel, channel $chan.\n");
2051 GLboolean
reserve_gpr(r700_AssemblerBase
* pAsm
, GLuint sel
, GLuint chan
, GLuint cycle
)
2053 if(pAsm
->hw_gpr
[cycle
][chan
] < 0)
2055 pAsm
->hw_gpr
[cycle
][chan
] = sel
;
2057 else if(pAsm
->hw_gpr
[cycle
][chan
] != (int)sel
)
2059 radeon_error("Another scalar operation has already used GPR read port for given channel\n");
2066 GLboolean
cycle_for_scalar_bank_swizzle(const int swiz
, const int sel
, GLuint
* pCycle
)
2070 case SQ_ALU_SCL_210
:
2072 int table
[3] = {2, 1, 0};
2073 *pCycle
= table
[sel
];
2077 case SQ_ALU_SCL_122
:
2079 int table
[3] = {1, 2, 2};
2080 *pCycle
= table
[sel
];
2084 case SQ_ALU_SCL_212
:
2086 int table
[3] = {2, 1, 2};
2087 *pCycle
= table
[sel
];
2091 case SQ_ALU_SCL_221
:
2093 int table
[3] = {2, 2, 1};
2094 *pCycle
= table
[sel
];
2099 radeon_error("Bad Scalar bank swizzle value\n");
2106 GLboolean
cycle_for_vector_bank_swizzle(const int swiz
, const int sel
, GLuint
* pCycle
)
2110 case SQ_ALU_VEC_012
:
2112 int table
[3] = {0, 1, 2};
2113 *pCycle
= table
[sel
];
2116 case SQ_ALU_VEC_021
:
2118 int table
[3] = {0, 2, 1};
2119 *pCycle
= table
[sel
];
2122 case SQ_ALU_VEC_120
:
2124 int table
[3] = {1, 2, 0};
2125 *pCycle
= table
[sel
];
2128 case SQ_ALU_VEC_102
:
2130 int table
[3] = {1, 0, 2};
2131 *pCycle
= table
[sel
];
2134 case SQ_ALU_VEC_201
:
2136 int table
[3] = {2, 0, 1};
2137 *pCycle
= table
[sel
];
2140 case SQ_ALU_VEC_210
:
2142 int table
[3] = {2, 1, 0};
2143 *pCycle
= table
[sel
];
2147 radeon_error("Bad Vec bank swizzle value\n");
2155 GLboolean
check_scalar(r700_AssemblerBase
* pAsm
,
2156 R700ALUInstruction
* alu_instruction_ptr
)
2159 GLuint bank_swizzle
;
2160 GLuint const_count
= 0;
2169 BITS src_sel
[3] = {0,0,0};
2170 BITS src_chan
[3] = {0,0,0};
2171 BITS src_rel
[3] = {0,0,0};
2172 BITS src_neg
[3] = {0,0,0};
2176 GLuint number_of_operands
= r700GetNumOperands(pAsm
->D
.dst
.opcode
, pAsm
->D
.dst
.op3
);
2178 for (src
=0; src
<number_of_operands
; src
++)
2180 get_src_properties(alu_instruction_ptr
,
2189 swizzle_key
= ( (is_const( src_sel
[0] ) ? 4 : 0) +
2190 (is_const( src_sel
[1] ) ? 2 : 0) +
2191 (is_const( src_sel
[2] ) ? 1 : 0) );
2193 alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
= BANK_SWIZZLE_SCL
[ swizzle_key
];
2195 for (src
=0; src
<number_of_operands
; src
++)
2197 sel
= src_sel
[src
];
2198 chan
= src_chan
[src
];
2199 rel
= src_rel
[src
];
2200 neg
= src_neg
[src
];
2202 if (is_const( sel
))
2204 // Any constant, including literal and inline constants
2207 if (is_cfile( sel
))
2209 reserve_cfile(pAsm
, sel
, chan
);
2215 for (src
=0; src
<number_of_operands
; src
++)
2217 sel
= src_sel
[src
];
2218 chan
= src_chan
[src
];
2219 rel
= src_rel
[src
];
2220 neg
= src_neg
[src
];
2224 bank_swizzle
= alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
;
2226 if( GL_FALSE
== cycle_for_scalar_bank_swizzle(bank_swizzle
, src
, &cycle
) )
2231 if(cycle
< const_count
)
2233 if( GL_FALSE
== reserve_gpr(pAsm
, sel
, chan
, cycle
) )
2244 GLboolean
check_vector(r700_AssemblerBase
* pAsm
,
2245 R700ALUInstruction
* alu_instruction_ptr
)
2248 GLuint bank_swizzle
;
2249 GLuint const_count
= 0;
2258 BITS src_sel
[3] = {0,0,0};
2259 BITS src_chan
[3] = {0,0,0};
2260 BITS src_rel
[3] = {0,0,0};
2261 BITS src_neg
[3] = {0,0,0};
2265 GLuint number_of_operands
= r700GetNumOperands(pAsm
->D
.dst
.opcode
, pAsm
->D
.dst
.op3
);
2267 for (src
=0; src
<number_of_operands
; src
++)
2269 get_src_properties(alu_instruction_ptr
,
2278 swizzle_key
= ( (is_const( src_sel
[0] ) ? 4 : 0) +
2279 (is_const( src_sel
[1] ) ? 2 : 0) +
2280 (is_const( src_sel
[2] ) ? 1 : 0)
2283 alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
= BANK_SWIZZLE_VEC
[swizzle_key
];
2285 for (src
=0; src
<number_of_operands
; src
++)
2287 sel
= src_sel
[src
];
2288 chan
= src_chan
[src
];
2289 rel
= src_rel
[src
];
2290 neg
= src_neg
[src
];
2293 bank_swizzle
= alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
;
2297 if( GL_FALSE
== cycle_for_vector_bank_swizzle(bank_swizzle
, src
, &cycle
) )
2303 (sel
== src_sel
[0]) &&
2304 (chan
== src_chan
[0]) )
2309 if( GL_FALSE
== reserve_gpr(pAsm
, sel
, chan
, cycle
) )
2315 else if( is_const(sel
) )
2321 if( GL_FALSE
== reserve_cfile(pAsm
, sel
, chan
) )
2332 GLboolean
assemble_alu_instruction(r700_AssemblerBase
*pAsm
)
2334 R700ALUInstruction
* alu_instruction_ptr
= NULL
;
2335 R700ALUInstructionHalfLiteral
* alu_instruction_ptr_hl
;
2336 R700ALUInstructionFullLiteral
* alu_instruction_ptr_fl
;
2338 GLuint number_of_scalar_operations
;
2339 GLboolean is_single_scalar_operation
;
2340 GLuint scalar_channel_index
;
2342 PVSSRC
* pcurrent_source
;
2343 int current_source_index
;
2344 GLuint contiguous_slots_needed
;
2346 GLuint uNumSrc
= r700GetNumOperands(pAsm
->D
.dst
.opcode
, pAsm
->D
.dst
.op3
);
2347 //GLuint channel_swizzle, j;
2348 //GLuint chan_counter[4] = {0, 0, 0, 0};
2349 //PVSSRC * pSource[3];
2350 GLboolean bSplitInst
= GL_FALSE
;
2352 if (1 == pAsm
->D
.dst
.math
)
2354 is_single_scalar_operation
= GL_TRUE
;
2355 number_of_scalar_operations
= 1;
2359 is_single_scalar_operation
= GL_FALSE
;
2360 number_of_scalar_operations
= 4;
2362 /* current assembler doesn't do more than 1 register per source */
2364 /* check read port, only very preliminary algorithm, not count in
2365 src0/1 same comp case and prev slot repeat case; also not count relative
2366 addressing. TODO: improve performance. */
2367 for(j
=0; j
<uNumSrc
; j
++)
2369 pSource
[j
] = &(pAsm
->S
[j
].src
);
2371 for(scalar_channel_index
=0; scalar_channel_index
<4; scalar_channel_index
++)
2373 for(j
=0; j
<uNumSrc
; j
++)
2375 switch (scalar_channel_index
)
2377 case 0: channel_swizzle
= pSource
[j
]->swizzlex
; break;
2378 case 1: channel_swizzle
= pSource
[j
]->swizzley
; break;
2379 case 2: channel_swizzle
= pSource
[j
]->swizzlez
; break;
2380 case 3: channel_swizzle
= pSource
[j
]->swizzlew
; break;
2381 default: channel_swizzle
= SQ_SEL_MASK
; break;
2383 if ( ((pSource
[j
]->rtype
== SRC_REG_TEMPORARY
) ||
2384 (pSource
[j
]->rtype
== SRC_REG_INPUT
))
2385 && (channel_swizzle
<= SQ_SEL_W
) )
2387 chan_counter
[channel_swizzle
]++;
2391 if( (chan_counter
[SQ_SEL_X
] > 3)
2392 || (chan_counter
[SQ_SEL_Y
] > 3)
2393 || (chan_counter
[SQ_SEL_Z
] > 3)
2394 || (chan_counter
[SQ_SEL_W
] > 3) ) /* each chan bank has only 3 ports. */
2396 bSplitInst
= GL_TRUE
;
2401 contiguous_slots_needed
= 0;
2403 if(!is_single_scalar_operation
)
2405 contiguous_slots_needed
= 4;
2408 contiguous_slots_needed
+= pAsm
->D2
.dst2
.literal_slots
;
2412 for (scalar_channel_index
=0;
2413 scalar_channel_index
< number_of_scalar_operations
;
2414 scalar_channel_index
++)
2416 if(scalar_channel_index
== (number_of_scalar_operations
-1))
2418 switch(pAsm
->D2
.dst2
.literal_slots
)
2421 alu_instruction_ptr
= (R700ALUInstruction
*) CALLOC_STRUCT(R700ALUInstruction
);
2422 Init_R700ALUInstruction(alu_instruction_ptr
);
2425 alu_instruction_ptr_hl
= (R700ALUInstructionHalfLiteral
*) CALLOC_STRUCT(R700ALUInstructionHalfLiteral
);
2426 Init_R700ALUInstructionHalfLiteral(alu_instruction_ptr_hl
, pAsm
->C
[0].f
, pAsm
->C
[1].f
);
2427 alu_instruction_ptr
= (R700ALUInstruction
*)alu_instruction_ptr_hl
;
2430 alu_instruction_ptr_fl
= (R700ALUInstructionFullLiteral
*) CALLOC_STRUCT(R700ALUInstructionFullLiteral
);
2431 Init_R700ALUInstructionFullLiteral(alu_instruction_ptr_fl
,pAsm
->C
[0].f
, pAsm
->C
[1].f
, pAsm
->C
[2].f
, pAsm
->C
[3].f
);
2432 alu_instruction_ptr
= (R700ALUInstruction
*)alu_instruction_ptr_fl
;
2438 alu_instruction_ptr
= (R700ALUInstruction
*) CALLOC_STRUCT(R700ALUInstruction
);
2439 Init_R700ALUInstruction(alu_instruction_ptr
);
2443 current_source_index
= 0;
2444 pcurrent_source
= &(pAsm
->S
[0].src
);
2446 if (GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2447 current_source_index
,
2449 scalar_channel_index
) )
2457 current_source_index
= 1;
2458 pcurrent_source
= &(pAsm
->S
[current_source_index
].src
);
2460 if (GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2461 current_source_index
,
2463 scalar_channel_index
) )
2470 alu_instruction_ptr
->m_Word0
.f
.index_mode
= pAsm
->D2
.dst2
.index_mode
;
2472 if( (is_single_scalar_operation
== GL_TRUE
)
2473 || (GL_TRUE
== bSplitInst
) )
2475 alu_instruction_ptr
->m_Word0
.f
.last
= 1;
2479 alu_instruction_ptr
->m_Word0
.f
.last
= (scalar_channel_index
== 3) ? 1 : 0;
2482 alu_instruction_ptr
->m_Word0
.f
.pred_sel
= (pAsm
->D
.dst
.pred_inv
> 0) ? 1 : 0;
2483 if(1 == pAsm
->D
.dst
.predicated
)
2485 alu_instruction_ptr
->m_Word1_OP2
.f
.update_pred
= 0x1;
2486 alu_instruction_ptr
->m_Word1_OP2
.f
.update_execute_mask
= 0x1;
2490 alu_instruction_ptr
->m_Word1_OP2
.f
.update_pred
= 0x0;
2491 alu_instruction_ptr
->m_Word1_OP2
.f
.update_execute_mask
= 0x0;
2495 if( (pAsm
->D
.dst
.rtype
== DST_REG_TEMPORARY
) ||
2496 (pAsm
->D
.dst
.rtype
== DST_REG_OUT
) )
2498 alu_instruction_ptr
->m_Word1
.f
.dst_gpr
= pAsm
->D
.dst
.reg
;
2502 radeon_error("Only temp destination registers supported for ALU dest regs.\n");
2506 alu_instruction_ptr
->m_Word1
.f
.dst_rel
= SQ_ABSOLUTE
; //D.rtype
2508 if ( is_single_scalar_operation
== GL_TRUE
)
2510 // Override scalar_channel_index since only one scalar value will be written
2511 if(pAsm
->D
.dst
.writex
)
2513 scalar_channel_index
= 0;
2515 else if(pAsm
->D
.dst
.writey
)
2517 scalar_channel_index
= 1;
2519 else if(pAsm
->D
.dst
.writez
)
2521 scalar_channel_index
= 2;
2523 else if(pAsm
->D
.dst
.writew
)
2525 scalar_channel_index
= 3;
2529 alu_instruction_ptr
->m_Word1
.f
.dst_chan
= scalar_channel_index
;
2531 alu_instruction_ptr
->m_Word1
.f
.clamp
= pAsm
->D2
.dst2
.SaturateMode
;
2533 if (pAsm
->D
.dst
.op3
)
2537 alu_instruction_ptr
->m_Word1_OP3
.f
.alu_inst
= pAsm
->D
.dst
.opcode
;
2539 //There's 3rd src for op3
2540 current_source_index
= 2;
2541 pcurrent_source
= &(pAsm
->S
[current_source_index
].src
);
2543 if ( GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2544 current_source_index
,
2546 scalar_channel_index
) )
2556 alu_instruction_ptr
->m_Word1_OP2
.f6
.alu_inst
= pAsm
->D
.dst
.opcode
;
2558 alu_instruction_ptr
->m_Word1_OP2
.f6
.src0_abs
= pAsm
->S
[0].src
.abs
;
2559 alu_instruction_ptr
->m_Word1_OP2
.f6
.src1_abs
= pAsm
->S
[1].src
.abs
;
2561 //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
2562 //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0;
2563 switch (scalar_channel_index
)
2566 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writex
;
2569 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writey
;
2572 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writez
;
2575 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writew
;
2578 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= 1; //SQ_SEL_MASK;
2581 alu_instruction_ptr
->m_Word1_OP2
.f6
.omod
= SQ_ALU_OMOD_OFF
;
2585 alu_instruction_ptr
->m_Word1_OP2
.f
.alu_inst
= pAsm
->D
.dst
.opcode
;
2587 alu_instruction_ptr
->m_Word1_OP2
.f
.src0_abs
= pAsm
->S
[0].src
.abs
;
2588 alu_instruction_ptr
->m_Word1_OP2
.f
.src1_abs
= pAsm
->S
[1].src
.abs
;
2590 //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2591 //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2592 switch (scalar_channel_index
)
2595 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writex
;
2598 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writey
;
2601 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writez
;
2604 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writew
;
2607 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= 1; //SQ_SEL_MASK;
2610 alu_instruction_ptr
->m_Word1_OP2
.f
.omod
= SQ_ALU_OMOD_OFF
;
2614 if(GL_FALSE
== add_alu_instruction(pAsm
, alu_instruction_ptr
, contiguous_slots_needed
) )
2620 * Judge the type of current instruction, is it vector or scalar
2623 if (is_single_scalar_operation
)
2625 if(GL_FALSE
== check_scalar(pAsm
, alu_instruction_ptr
) )
2632 if(GL_FALSE
== check_vector(pAsm
, alu_instruction_ptr
) )
2638 contiguous_slots_needed
-= 1;
2644 GLboolean
assemble_math_function(r700_AssemblerBase
* pAsm
, BITS opcode
)
2650 tmp
= gethelpr(pAsm
);
2652 // opcode tmp.x, a.x
2655 pAsm
->D
.dst
.opcode
= opcode
;
2656 pAsm
->D
.dst
.math
= 1;
2658 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2659 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2660 pAsm
->D
.dst
.reg
= tmp
;
2661 pAsm
->D
.dst
.writex
= 1;
2663 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2668 if ( GL_FALSE
== next_ins(pAsm
) )
2673 // Now replicate result to all necessary channels in destination
2674 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
2676 if( GL_FALSE
== assemble_dst(pAsm
) )
2681 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2682 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
2683 pAsm
->S
[0].src
.reg
= tmp
;
2685 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
2686 noneg_PVSSRC(&(pAsm
->S
[0].src
));
2688 if( GL_FALSE
== next_ins(pAsm
) )
2696 GLboolean
assemble_ABS(r700_AssemblerBase
*pAsm
)
2700 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
2702 if( GL_FALSE
== assemble_dst(pAsm
) )
2706 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2711 pAsm
->S
[1].bits
= pAsm
->S
[0].bits
;
2712 flipneg_PVSSRC(&(pAsm
->S
[1].src
));
2714 if ( GL_FALSE
== next_ins(pAsm
) )
2722 GLboolean
assemble_ADD(r700_AssemblerBase
*pAsm
)
2724 if( GL_FALSE
== checkop2(pAsm
) )
2729 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
2731 if( GL_FALSE
== assemble_dst(pAsm
) )
2736 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2741 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
2746 if(pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_SUB
)
2748 flipneg_PVSSRC(&(pAsm
->S
[1].src
));
2751 if( GL_FALSE
== next_ins(pAsm
) )
2759 GLboolean
assemble_ARL(r700_AssemblerBase
*pAsm
)
2760 { /* TODO: ar values dont' persist between clauses */
2761 if( GL_FALSE
== checkop1(pAsm
) )
2766 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOVA_FLOOR
;
2767 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2768 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2769 pAsm
->D
.dst
.reg
= 0;
2770 pAsm
->D
.dst
.writex
= 0;
2771 pAsm
->D
.dst
.writey
= 0;
2772 pAsm
->D
.dst
.writez
= 0;
2773 pAsm
->D
.dst
.writew
= 0;
2775 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2780 if( GL_FALSE
== next_ins(pAsm
) )
2788 GLboolean
assemble_BAD(char *opcode_str
)
2790 radeon_error("Not yet implemented instruction (%s)\n", opcode_str
);
2794 GLboolean
assemble_CMP(r700_AssemblerBase
*pAsm
)
2798 if( GL_FALSE
== checkop3(pAsm
) )
2803 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_CNDGE
;
2804 pAsm
->D
.dst
.op3
= 1;
2808 if(0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
2810 //OP3 has no support for write mask
2811 tmp
= gethelpr(pAsm
);
2813 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2814 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2815 pAsm
->D
.dst
.reg
= tmp
;
2817 nomask_PVSDST(&(pAsm
->D
.dst
));
2821 if( GL_FALSE
== assemble_dst(pAsm
) )
2827 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2832 if( GL_FALSE
== assemble_src(pAsm
, 2, 1) )
2837 if( GL_FALSE
== assemble_src(pAsm
, 1, 2) )
2842 if ( GL_FALSE
== next_ins(pAsm
) )
2847 if (0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
2849 if( GL_FALSE
== assemble_dst(pAsm
) )
2854 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
2857 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2858 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
2859 pAsm
->S
[0].src
.reg
= tmp
;
2861 noneg_PVSSRC(&(pAsm
->S
[0].src
));
2862 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
2864 if( GL_FALSE
== next_ins(pAsm
) )
2873 GLboolean
assemble_TRIG(r700_AssemblerBase
*pAsm
, BITS opcode
)
2876 * r600 - trunc to -PI..PI range
2877 * r700 - normalize by dividing by 2PI
2884 tmp
= gethelpr(pAsm
);
2886 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
2887 pAsm
->D
.dst
.op3
= 1;
2889 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2890 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2891 pAsm
->D
.dst
.reg
= tmp
;
2893 assemble_src(pAsm
, 0, -1);
2895 pAsm
->S
[1].src
.rtype
= SRC_REC_LITERAL
;
2896 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_X
);
2898 pAsm
->S
[2].src
.rtype
= SRC_REC_LITERAL
;
2899 setswizzle_PVSSRC(&(pAsm
->S
[2].src
), SQ_SEL_Y
);
2901 pAsm
->D2
.dst2
.literal_slots
= 1;
2902 pAsm
->C
[0].f
= 1/(3.1415926535 * 2);
2903 pAsm
->C
[1].f
= 0.5f
;
2905 if ( GL_FALSE
== next_ins(pAsm
) )
2910 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FRACT
;
2912 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2913 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2914 pAsm
->D
.dst
.reg
= tmp
;
2915 pAsm
->D
.dst
.writex
= 1;
2917 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2918 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
2919 pAsm
->S
[0].src
.reg
= tmp
;
2920 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
2922 if(( GL_FALSE
== next_ins(pAsm
) ))
2926 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
2927 pAsm
->D
.dst
.op3
= 1;
2929 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2930 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2931 pAsm
->D
.dst
.reg
= tmp
;
2933 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2934 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
2935 pAsm
->S
[0].src
.reg
= tmp
;
2936 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
2938 pAsm
->S
[1].src
.rtype
= SRC_REC_LITERAL
;
2939 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_X
);
2941 pAsm
->S
[2].src
.rtype
= SRC_REC_LITERAL
;
2942 setswizzle_PVSSRC(&(pAsm
->S
[2].src
), SQ_SEL_Y
);
2944 pAsm
->D2
.dst2
.literal_slots
= 1;
2948 pAsm
->C
[0].f
= 3.1415926535897f
* 2.0f
;
2949 pAsm
->C
[1].f
= -3.1415926535897f
;
2953 pAsm
->C
[0].f
= 1.0f
;
2954 pAsm
->C
[1].f
= -0.5f
;
2957 if(( GL_FALSE
== next_ins(pAsm
) ))
2962 pAsm
->D
.dst
.opcode
= opcode
;
2963 pAsm
->D
.dst
.math
= 1;
2967 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2968 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
2969 pAsm
->S
[0].src
.reg
= tmp
;
2970 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
2971 noneg_PVSSRC(&(pAsm
->S
[0].src
));
2975 //TODO - replicate if more channels set in WriteMask
2980 GLboolean
assemble_DOT(r700_AssemblerBase
*pAsm
)
2982 if( GL_FALSE
== checkop2(pAsm
) )
2987 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_DOT4
;
2989 if( GL_FALSE
== assemble_dst(pAsm
) )
2994 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2999 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3004 if(OPCODE_DP3
== pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
)
3006 zerocomp_PVSSRC(&(pAsm
->S
[0].src
), 3);
3007 zerocomp_PVSSRC(&(pAsm
->S
[1].src
), 3);
3009 else if(pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_DPH
)
3011 onecomp_PVSSRC(&(pAsm
->S
[0].src
), 3);
3014 if ( GL_FALSE
== next_ins(pAsm
) )
3022 GLboolean
assemble_DST(r700_AssemblerBase
*pAsm
)
3024 if( GL_FALSE
== checkop2(pAsm
) )
3029 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
3031 if( GL_FALSE
== assemble_dst(pAsm
) )
3036 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3041 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3046 onecomp_PVSSRC(&(pAsm
->S
[0].src
), 0);
3047 onecomp_PVSSRC(&(pAsm
->S
[0].src
), 3);
3049 onecomp_PVSSRC(&(pAsm
->S
[1].src
), 0);
3050 onecomp_PVSSRC(&(pAsm
->S
[1].src
), 2);
3052 if ( GL_FALSE
== next_ins(pAsm
) )
3060 GLboolean
assemble_EX2(r700_AssemblerBase
*pAsm
)
3062 return assemble_math_function(pAsm
, SQ_OP2_INST_EXP_IEEE
);
3065 GLboolean
assemble_EXP(r700_AssemblerBase
*pAsm
)
3071 tmp
= gethelpr(pAsm
);
3076 if (pAsm
->pILInst
->DstReg
.WriteMask
& 0x1) {
3077 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FLOOR
;
3079 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3080 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3081 pAsm
->D
.dst
.reg
= tmp
;
3082 pAsm
->D
.dst
.writex
= 1;
3084 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3089 if( GL_FALSE
== next_ins(pAsm
) )
3094 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
3095 pAsm
->D
.dst
.math
= 1;
3097 if( GL_FALSE
== assemble_dst(pAsm
) )
3102 pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writez
= pAsm
->D
.dst
.writew
= 0;
3104 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3105 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3106 pAsm
->S
[0].src
.reg
= tmp
;
3108 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3109 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3111 if( GL_FALSE
== next_ins(pAsm
) )
3119 if ((pAsm
->pILInst
->DstReg
.WriteMask
>> 1) & 0x1) {
3120 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FRACT
;
3122 if( GL_FALSE
== assemble_dst(pAsm
) )
3127 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3132 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writez
= pAsm
->D
.dst
.writew
= 0;
3134 if( GL_FALSE
== next_ins(pAsm
) )
3142 if ((pAsm
->pILInst
->DstReg
.WriteMask
>> 2) & 0x1) {
3143 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
3144 pAsm
->D
.dst
.math
= 1;
3146 if( GL_FALSE
== assemble_dst(pAsm
) )
3151 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3156 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writew
= 0;
3158 if( GL_FALSE
== next_ins(pAsm
) )
3166 if ((pAsm
->pILInst
->DstReg
.WriteMask
>> 3) & 0x1) {
3167 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3169 if( GL_FALSE
== assemble_dst(pAsm
) )
3174 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writez
= 0;
3176 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3177 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3178 pAsm
->S
[0].src
.reg
= tmp
;
3180 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_1
);
3181 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3183 if( GL_FALSE
== next_ins(pAsm
) )
3192 GLboolean
assemble_FLR(r700_AssemblerBase
*pAsm
)
3196 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FLOOR
;
3198 if ( GL_FALSE
== assemble_dst(pAsm
) )
3203 if ( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3208 if ( GL_FALSE
== next_ins(pAsm
) )
3216 GLboolean
assemble_FLR_INT(r700_AssemblerBase
*pAsm
)
3218 return assemble_math_function(pAsm
, SQ_OP2_INST_FLT_TO_INT
);
3221 GLboolean
assemble_FRC(r700_AssemblerBase
*pAsm
)
3225 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FRACT
;
3227 if ( GL_FALSE
== assemble_dst(pAsm
) )
3232 if ( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3237 if ( GL_FALSE
== next_ins(pAsm
) )
3245 GLboolean
assemble_KIL(r700_AssemblerBase
*pAsm
, GLuint opcode
)
3247 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
3249 if(pILInst
->Opcode
== OPCODE_KIL
)
3252 pAsm
->D
.dst
.opcode
= opcode
;
3253 //pAsm->D.dst.math = 1;
3255 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3256 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3257 pAsm
->D
.dst
.reg
= 0;
3258 pAsm
->D
.dst
.writex
= 0;
3259 pAsm
->D
.dst
.writey
= 0;
3260 pAsm
->D
.dst
.writez
= 0;
3261 pAsm
->D
.dst
.writew
= 0;
3263 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3264 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3265 pAsm
->S
[0].src
.reg
= 0;
3266 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_0
);
3267 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3269 if(pILInst
->Opcode
== OPCODE_KIL_NV
)
3271 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
3272 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
3273 pAsm
->S
[1].src
.reg
= 0;
3274 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_1
);
3275 neg_PVSSRC(&(pAsm
->S
[1].src
));
3279 if( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
3286 if ( GL_FALSE
== next_ins(pAsm
) )
3291 /* Doc says KILL has to be last(end) ALU clause */
3292 pAsm
->pR700Shader
->killIsUsed
= GL_TRUE
;
3293 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
3298 GLboolean
assemble_LG2(r700_AssemblerBase
*pAsm
)
3300 return assemble_math_function(pAsm
, SQ_OP2_INST_LOG_IEEE
);
3303 GLboolean
assemble_LRP(r700_AssemblerBase
*pAsm
)
3307 if( GL_FALSE
== checkop3(pAsm
) )
3312 tmp
= gethelpr(pAsm
);
3314 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
3316 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3317 pAsm
->D
.dst
.reg
= tmp
;
3318 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3319 nomask_PVSDST(&(pAsm
->D
.dst
));
3322 if( GL_FALSE
== assemble_src(pAsm
, 1, 0) )
3327 if ( GL_FALSE
== assemble_src(pAsm
, 2, 1) )
3332 neg_PVSSRC(&(pAsm
->S
[1].src
));
3334 if( GL_FALSE
== next_ins(pAsm
) )
3339 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
3340 pAsm
->D
.dst
.op3
= 1;
3342 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3343 pAsm
->D
.dst
.reg
= tmp
;
3344 nomask_PVSDST(&(pAsm
->D
.dst
));
3345 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3347 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3348 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3349 pAsm
->S
[0].src
.reg
= tmp
;
3350 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3353 if( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
3358 if( GL_FALSE
== assemble_src(pAsm
, 2, -1) )
3363 if( GL_FALSE
== next_ins(pAsm
) )
3368 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3370 if( GL_FALSE
== assemble_dst(pAsm
) )
3375 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3376 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3377 pAsm
->S
[0].src
.reg
= tmp
;
3378 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3380 if( GL_FALSE
== next_ins(pAsm
) )
3388 GLboolean
assemble_LOG(r700_AssemblerBase
*pAsm
)
3390 BITS tmp1
, tmp2
, tmp3
;
3394 tmp1
= gethelpr(pAsm
);
3395 tmp2
= gethelpr(pAsm
);
3396 tmp3
= gethelpr(pAsm
);
3398 // FIXME: The hardware can do fabs() directly on input
3399 // elements, but the compiler doesn't have the
3400 // capability to use that.
3402 // MAX tmp1.x, a.x, -a.x (fabs(a.x))
3404 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
3406 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3407 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3408 pAsm
->D
.dst
.reg
= tmp1
;
3409 pAsm
->D
.dst
.writex
= 1;
3411 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3416 pAsm
->S
[1].bits
= pAsm
->S
[0].bits
;
3417 flipneg_PVSSRC(&(pAsm
->S
[1].src
));
3419 if ( GL_FALSE
== next_ins(pAsm
) )
3426 // LG2 tmp2.x, tmp1.x
3427 // FLOOR tmp3.x, tmp2.x
3428 // MOV dst.x, tmp3.x
3429 // ADD tmp3.x, tmp2.x, -tmp3.x
3430 // EX2 dst.y, tmp3.x
3431 // MOV dst.z, tmp2.x
3434 // LG2 tmp2.x, tmp1.x
3435 // FLOOR tmp3.x, tmp2.x
3437 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_LOG_IEEE
;
3438 pAsm
->D
.dst
.math
= 1;
3440 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3441 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3442 pAsm
->D
.dst
.reg
= tmp2
;
3443 pAsm
->D
.dst
.writex
= 1;
3445 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3446 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3447 pAsm
->S
[0].src
.reg
= tmp1
;
3449 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3450 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3452 if( GL_FALSE
== next_ins(pAsm
) )
3457 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FLOOR
;
3459 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3460 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3461 pAsm
->D
.dst
.reg
= tmp3
;
3462 pAsm
->D
.dst
.writex
= 1;
3464 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3465 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3466 pAsm
->S
[0].src
.reg
= tmp2
;
3468 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3469 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3471 if( GL_FALSE
== next_ins(pAsm
) )
3476 // MOV dst.x, tmp3.x
3478 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3480 if( GL_FALSE
== assemble_dst(pAsm
) )
3485 pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writez
= pAsm
->D
.dst
.writew
= 0;
3487 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3488 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3489 pAsm
->S
[0].src
.reg
= tmp3
;
3491 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3492 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3494 if( GL_FALSE
== next_ins(pAsm
) )
3499 // ADD tmp3.x, tmp2.x, -tmp3.x
3500 // EX2 dst.y, tmp3.x
3502 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
3504 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3505 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3506 pAsm
->D
.dst
.reg
= tmp3
;
3507 pAsm
->D
.dst
.writex
= 1;
3509 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3510 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3511 pAsm
->S
[0].src
.reg
= tmp2
;
3513 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3514 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3516 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
3517 pAsm
->S
[1].src
.rtype
= DST_REG_TEMPORARY
;
3518 pAsm
->S
[1].src
.reg
= tmp3
;
3520 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_X
);
3521 neg_PVSSRC(&(pAsm
->S
[1].src
));
3523 if( GL_FALSE
== next_ins(pAsm
) )
3528 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
3529 pAsm
->D
.dst
.math
= 1;
3531 if( GL_FALSE
== assemble_dst(pAsm
) )
3536 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writez
= pAsm
->D
.dst
.writew
= 0;
3538 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3539 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3540 pAsm
->S
[0].src
.reg
= tmp3
;
3542 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3543 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3545 if( GL_FALSE
== next_ins(pAsm
) )
3550 // MOV dst.z, tmp2.x
3552 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3554 if( GL_FALSE
== assemble_dst(pAsm
) )
3559 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writew
= 0;
3561 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3562 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3563 pAsm
->S
[0].src
.reg
= tmp2
;
3565 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3566 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3568 if( GL_FALSE
== next_ins(pAsm
) )
3575 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3577 if( GL_FALSE
== assemble_dst(pAsm
) )
3582 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writez
= 0;
3584 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3585 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3586 pAsm
->S
[0].src
.reg
= tmp1
;
3588 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_1
);
3589 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3591 if( GL_FALSE
== next_ins(pAsm
) )
3599 GLboolean
assemble_MAD(struct r700_AssemblerBase
*pAsm
)
3602 GLboolean bReplaceDst
= GL_FALSE
;
3603 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
3605 if( GL_FALSE
== checkop3(pAsm
) )
3610 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
3611 pAsm
->D
.dst
.op3
= 1;
3615 if(PROGRAM_TEMPORARY
== pILInst
->DstReg
.File
)
3616 { /* TODO : more investigation on MAD src and dst using same register */
3617 for(ii
=0; ii
<3; ii
++)
3619 if( (PROGRAM_TEMPORARY
== pILInst
->SrcReg
[ii
].File
)
3620 && (pILInst
->DstReg
.Index
== pILInst
->SrcReg
[ii
].Index
) )
3622 bReplaceDst
= GL_TRUE
;
3627 if(0xF != pILInst
->DstReg
.WriteMask
)
3628 { /* OP3 has no support for write mask */
3629 bReplaceDst
= GL_TRUE
;
3632 if(GL_TRUE
== bReplaceDst
)
3634 tmp
= gethelpr(pAsm
);
3636 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3637 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3638 pAsm
->D
.dst
.reg
= tmp
;
3640 nomask_PVSDST(&(pAsm
->D
.dst
));
3644 if( GL_FALSE
== assemble_dst(pAsm
) )
3650 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3655 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3660 if( GL_FALSE
== assemble_src(pAsm
, 2, -1) )
3665 if ( GL_FALSE
== next_ins(pAsm
) )
3670 if (GL_TRUE
== bReplaceDst
)
3672 if( GL_FALSE
== assemble_dst(pAsm
) )
3677 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3680 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3681 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3682 pAsm
->S
[0].src
.reg
= tmp
;
3684 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3685 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3687 if( GL_FALSE
== next_ins(pAsm
) )
3697 GLboolean
assemble_LIT(r700_AssemblerBase
*pAsm
)
3699 unsigned int dstReg
;
3700 unsigned int dstType
;
3701 unsigned int srcReg
;
3702 unsigned int srcType
;
3704 int tmp
= gethelpr(pAsm
);
3706 if( GL_FALSE
== assemble_dst(pAsm
) )
3710 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3714 dstReg
= pAsm
->D
.dst
.reg
;
3715 dstType
= pAsm
->D
.dst
.rtype
;
3716 srcReg
= pAsm
->S
[0].src
.reg
;
3717 srcType
= pAsm
->S
[0].src
.rtype
;
3719 /* dst.xw, <- 1.0 */
3720 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3721 pAsm
->D
.dst
.rtype
= dstType
;
3722 pAsm
->D
.dst
.reg
= dstReg
;
3723 pAsm
->D
.dst
.writex
= 1;
3724 pAsm
->D
.dst
.writey
= 0;
3725 pAsm
->D
.dst
.writez
= 0;
3726 pAsm
->D
.dst
.writew
= 1;
3727 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3728 pAsm
->S
[0].src
.reg
= tmp
;
3729 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3730 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3731 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_1
;
3732 pAsm
->S
[0].src
.swizzley
= SQ_SEL_1
;
3733 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_1
;
3734 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_1
;
3735 if( GL_FALSE
== next_ins(pAsm
) )
3740 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3745 /* dst.y = max(src.x, 0.0) */
3746 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
3747 pAsm
->D
.dst
.rtype
= dstType
;
3748 pAsm
->D
.dst
.reg
= dstReg
;
3749 pAsm
->D
.dst
.writex
= 0;
3750 pAsm
->D
.dst
.writey
= 1;
3751 pAsm
->D
.dst
.writez
= 0;
3752 pAsm
->D
.dst
.writew
= 0;
3753 pAsm
->S
[0].src
.rtype
= srcType
;
3754 pAsm
->S
[0].src
.reg
= srcReg
;
3755 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3756 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
);
3757 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
3758 pAsm
->S
[1].src
.reg
= tmp
;
3759 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
3760 noneg_PVSSRC(&(pAsm
->S
[1].src
));
3761 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_0
;
3762 pAsm
->S
[1].src
.swizzley
= SQ_SEL_0
;
3763 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_0
;
3764 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_0
;
3765 if( GL_FALSE
== next_ins(pAsm
) )
3770 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3775 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Y
, SQ_SEL_Y
, SQ_SEL_Y
, SQ_SEL_Y
);
3777 /* dst.z = log(src.y) */
3778 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_LOG_CLAMPED
;
3779 pAsm
->D
.dst
.math
= 1;
3780 pAsm
->D
.dst
.rtype
= dstType
;
3781 pAsm
->D
.dst
.reg
= dstReg
;
3782 pAsm
->D
.dst
.writex
= 0;
3783 pAsm
->D
.dst
.writey
= 0;
3784 pAsm
->D
.dst
.writez
= 1;
3785 pAsm
->D
.dst
.writew
= 0;
3786 pAsm
->S
[0].src
.rtype
= srcType
;
3787 pAsm
->S
[0].src
.reg
= srcReg
;
3788 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3789 if( GL_FALSE
== next_ins(pAsm
) )
3794 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3799 if( GL_FALSE
== assemble_src(pAsm
, 0, 2) )
3804 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
);
3806 swizzleagain_PVSSRC(&(pAsm
->S
[2].src
), SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
);
3808 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
3809 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MUL_LIT
;
3810 pAsm
->D
.dst
.math
= 1;
3811 pAsm
->D
.dst
.op3
= 1;
3812 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3813 pAsm
->D
.dst
.reg
= tmp
;
3814 pAsm
->D
.dst
.writex
= 1;
3815 pAsm
->D
.dst
.writey
= 0;
3816 pAsm
->D
.dst
.writez
= 0;
3817 pAsm
->D
.dst
.writew
= 0;
3819 pAsm
->S
[0].src
.rtype
= srcType
;
3820 pAsm
->S
[0].src
.reg
= srcReg
;
3821 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3823 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
3824 pAsm
->S
[1].src
.reg
= dstReg
;
3825 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
3826 noneg_PVSSRC(&(pAsm
->S
[1].src
));
3827 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_Z
;
3828 pAsm
->S
[1].src
.swizzley
= SQ_SEL_Z
;
3829 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_Z
;
3830 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_Z
;
3832 pAsm
->S
[2].src
.rtype
= srcType
;
3833 pAsm
->S
[2].src
.reg
= srcReg
;
3834 setaddrmode_PVSSRC(&(pAsm
->S
[2].src
), ADDR_ABSOLUTE
);
3836 if( GL_FALSE
== next_ins(pAsm
) )
3841 /* dst.z = exp(tmp.x) */
3842 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
3843 pAsm
->D
.dst
.math
= 1;
3844 pAsm
->D
.dst
.rtype
= dstType
;
3845 pAsm
->D
.dst
.reg
= dstReg
;
3846 pAsm
->D
.dst
.writex
= 0;
3847 pAsm
->D
.dst
.writey
= 0;
3848 pAsm
->D
.dst
.writez
= 1;
3849 pAsm
->D
.dst
.writew
= 0;
3851 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3852 pAsm
->S
[0].src
.reg
= tmp
;
3853 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3854 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3855 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_X
;
3856 pAsm
->S
[0].src
.swizzley
= SQ_SEL_X
;
3857 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_X
;
3858 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_X
;
3860 if( GL_FALSE
== next_ins(pAsm
) )
3868 GLboolean
assemble_MAX(r700_AssemblerBase
*pAsm
)
3870 if( GL_FALSE
== checkop2(pAsm
) )
3875 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
3877 if( GL_FALSE
== assemble_dst(pAsm
) )
3882 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3887 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3892 if( GL_FALSE
== next_ins(pAsm
) )
3900 GLboolean
assemble_MIN(r700_AssemblerBase
*pAsm
)
3902 if( GL_FALSE
== checkop2(pAsm
) )
3907 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MIN
;
3909 if( GL_FALSE
== assemble_dst(pAsm
) )
3914 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3919 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3924 if( GL_FALSE
== next_ins(pAsm
) )
3932 GLboolean
assemble_MOV(r700_AssemblerBase
*pAsm
)
3936 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3938 if (GL_FALSE
== assemble_dst(pAsm
))
3943 if (GL_FALSE
== assemble_src(pAsm
, 0, -1))
3948 if ( GL_FALSE
== next_ins(pAsm
) )
3956 GLboolean
assemble_MUL(r700_AssemblerBase
*pAsm
)
3958 if( GL_FALSE
== checkop2(pAsm
) )
3963 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
3965 if( GL_FALSE
== assemble_dst(pAsm
) )
3970 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3975 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3980 if( GL_FALSE
== next_ins(pAsm
) )
3988 GLboolean
assemble_POW(r700_AssemblerBase
*pAsm
)
3994 tmp
= gethelpr(pAsm
);
3996 // LG2 tmp.x, a.swizzle
3997 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_LOG_IEEE
;
3998 pAsm
->D
.dst
.math
= 1;
4000 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4001 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4002 pAsm
->D
.dst
.reg
= tmp
;
4003 nomask_PVSDST(&(pAsm
->D
.dst
));
4005 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4010 if( GL_FALSE
== next_ins(pAsm
) )
4015 // MUL tmp.x, tmp.x, b.swizzle
4016 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
4018 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4019 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4020 pAsm
->D
.dst
.reg
= tmp
;
4021 nomask_PVSDST(&(pAsm
->D
.dst
));
4023 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4024 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4025 pAsm
->S
[0].src
.reg
= tmp
;
4026 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4027 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4029 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4034 if( GL_FALSE
== next_ins(pAsm
) )
4039 // EX2 dst.mask, tmp.x
4041 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
4042 pAsm
->D
.dst
.math
= 1;
4044 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4045 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4046 pAsm
->D
.dst
.reg
= tmp
;
4047 nomask_PVSDST(&(pAsm
->D
.dst
));
4049 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4050 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4051 pAsm
->S
[0].src
.reg
= tmp
;
4052 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4053 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4055 if( GL_FALSE
== next_ins(pAsm
) )
4060 // Now replicate result to all necessary channels in destination
4061 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4063 if( GL_FALSE
== assemble_dst(pAsm
) )
4068 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4069 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
4070 pAsm
->S
[0].src
.reg
= tmp
;
4072 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4073 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4075 if( GL_FALSE
== next_ins(pAsm
) )
4083 GLboolean
assemble_RCP(r700_AssemblerBase
*pAsm
)
4085 return assemble_math_function(pAsm
, SQ_OP2_INST_RECIP_IEEE
);
4088 GLboolean
assemble_RSQ(r700_AssemblerBase
*pAsm
)
4090 return assemble_math_function(pAsm
, SQ_OP2_INST_RECIPSQRT_IEEE
);
4093 GLboolean
assemble_SCS(r700_AssemblerBase
*pAsm
)
4099 tmp
= gethelpr(pAsm
);
4101 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
4102 pAsm
->D
.dst
.op3
= 1;
4104 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4105 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4106 pAsm
->D
.dst
.reg
= tmp
;
4108 assemble_src(pAsm
, 0, -1);
4110 pAsm
->S
[1].src
.rtype
= SRC_REC_LITERAL
;
4111 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_X
);
4113 pAsm
->S
[2].src
.rtype
= SRC_REC_LITERAL
;
4114 setswizzle_PVSSRC(&(pAsm
->S
[2].src
), SQ_SEL_Y
);
4116 pAsm
->D2
.dst2
.literal_slots
= 1;
4117 pAsm
->C
[0].f
= 1/(3.1415926535 * 2);
4118 pAsm
->C
[1].f
= 0.5F
;
4120 if ( GL_FALSE
== next_ins(pAsm
) )
4125 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FRACT
;
4127 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4128 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4129 pAsm
->D
.dst
.reg
= tmp
;
4130 pAsm
->D
.dst
.writex
= 1;
4132 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4133 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4134 pAsm
->S
[0].src
.reg
= tmp
;
4135 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4137 if(( GL_FALSE
== next_ins(pAsm
) ))
4141 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
4142 pAsm
->D
.dst
.op3
= 1;
4144 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4145 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4146 pAsm
->D
.dst
.reg
= tmp
;
4148 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4149 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4150 pAsm
->S
[0].src
.reg
= tmp
;
4151 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4153 pAsm
->S
[1].src
.rtype
= SRC_REC_LITERAL
;
4154 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_X
);
4156 pAsm
->S
[2].src
.rtype
= SRC_REC_LITERAL
;
4157 setswizzle_PVSSRC(&(pAsm
->S
[2].src
), SQ_SEL_Y
);
4159 pAsm
->D2
.dst2
.literal_slots
= 1;
4162 pAsm
->C
[0].f
= 3.1415926535897f
* 2.0f
;
4163 pAsm
->C
[1].f
= -3.1415926535897f
;
4165 pAsm
->C
[0].f
= 1.0f
;
4166 pAsm
->C
[1].f
= -0.5f
;
4169 if(( GL_FALSE
== next_ins(pAsm
) ))
4175 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_COS
;
4176 pAsm
->D
.dst
.math
= 1;
4180 pAsm
->D
.dst
.writey
= 0;
4182 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4183 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4184 pAsm
->S
[0].src
.reg
= tmp
;
4185 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4186 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4188 if ( GL_FALSE
== next_ins(pAsm
) )
4194 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_SIN
;
4195 pAsm
->D
.dst
.math
= 1;
4199 pAsm
->D
.dst
.writex
= 0;
4201 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4202 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4203 pAsm
->S
[0].src
.reg
= tmp
;
4204 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4205 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4207 if( GL_FALSE
== next_ins(pAsm
) )
4215 GLboolean
assemble_LOGIC(r700_AssemblerBase
*pAsm
, BITS opcode
)
4217 if( GL_FALSE
== checkop2(pAsm
) )
4222 pAsm
->D
.dst
.opcode
= opcode
;
4223 //pAsm->D.dst.math = 1;
4225 if( GL_FALSE
== assemble_dst(pAsm
) )
4230 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4235 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4240 if( GL_FALSE
== next_ins(pAsm
) )
4248 GLboolean
assemble_LOGIC_PRED(r700_AssemblerBase
*pAsm
, BITS opcode
)
4250 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
4252 pAsm
->D
.dst
.opcode
= opcode
;
4253 pAsm
->D
.dst
.math
= 1;
4254 pAsm
->D
.dst
.predicated
= 1;
4256 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4257 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4258 pAsm
->D
.dst
.reg
= pAsm
->uHelpReg
;
4259 pAsm
->D
.dst
.writex
= 1;
4260 pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writez
= pAsm
->D
.dst
.writew
= 0;
4262 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4263 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4264 pAsm
->S
[0].src
.reg
= pAsm
->last_cond_register
+ pAsm
->starting_temp_register_number
;
4265 pAsm
->S
[0].src
.swizzlex
= pILInst
->DstReg
.CondSwizzle
& 0x7;
4266 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4268 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
4269 pAsm
->S
[1].src
.reg
= pAsm
->uHelpReg
;
4270 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
4271 noneg_PVSSRC(&(pAsm
->S
[1].src
));
4272 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_0
;
4273 pAsm
->S
[1].src
.swizzley
= SQ_SEL_0
;
4274 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_0
;
4275 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_0
;
4277 if( GL_FALSE
== next_ins(pAsm
) )
4285 GLboolean
assemble_SGE(r700_AssemblerBase
*pAsm
)
4287 if( GL_FALSE
== checkop2(pAsm
) )
4292 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_SETGE
;
4294 if( GL_FALSE
== assemble_dst(pAsm
) )
4299 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4304 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4309 if( GL_FALSE
== next_ins(pAsm
) )
4317 GLboolean
assemble_SLT(r700_AssemblerBase
*pAsm
)
4319 if( GL_FALSE
== checkop2(pAsm
) )
4324 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_SETGT
;
4326 if( GL_FALSE
== assemble_dst(pAsm
) )
4331 if( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
4336 if( GL_FALSE
== assemble_src(pAsm
, 1, 0) )
4341 if( GL_FALSE
== next_ins(pAsm
) )
4349 GLboolean
assemble_STP(r700_AssemblerBase
*pAsm
)
4354 GLboolean
assemble_TEX(r700_AssemblerBase
*pAsm
)
4356 GLboolean src_const
;
4357 GLboolean need_barrier
= GL_FALSE
;
4361 switch (pAsm
->pILInst
[pAsm
->uiCurInst
].SrcReg
[0].File
)
4363 case PROGRAM_UNIFORM
:
4364 case PROGRAM_CONSTANT
:
4365 case PROGRAM_LOCAL_PARAM
:
4366 case PROGRAM_ENV_PARAM
:
4367 case PROGRAM_STATE_VAR
:
4368 src_const
= GL_TRUE
;
4370 case PROGRAM_TEMPORARY
:
4373 src_const
= GL_FALSE
;
4377 if (GL_TRUE
== src_const
)
4379 if ( GL_FALSE
== mov_temp(pAsm
, 0) )
4381 need_barrier
= GL_TRUE
;
4384 if (pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_TXP
)
4386 GLuint tmp
= gethelpr(pAsm
);
4387 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_RECIP_IEEE
;
4388 pAsm
->D
.dst
.math
= 1;
4389 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4390 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4391 pAsm
->D
.dst
.reg
= tmp
;
4392 pAsm
->D
.dst
.writew
= 1;
4394 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4398 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
);
4399 if( GL_FALSE
== next_ins(pAsm
) )
4404 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
4405 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4406 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4407 pAsm
->D
.dst
.reg
= tmp
;
4408 pAsm
->D
.dst
.writex
= 1;
4409 pAsm
->D
.dst
.writey
= 1;
4410 pAsm
->D
.dst
.writez
= 1;
4411 pAsm
->D
.dst
.writew
= 0;
4413 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4417 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
4418 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
4419 pAsm
->S
[1].src
.reg
= tmp
;
4420 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_W
);
4422 if( GL_FALSE
== next_ins(pAsm
) )
4427 pAsm
->aArgSubst
[1] = tmp
;
4428 need_barrier
= GL_TRUE
;
4431 if (pAsm
->pILInst
[pAsm
->uiCurInst
].TexSrcTarget
== TEXTURE_CUBE_INDEX
)
4433 GLuint tmp1
= gethelpr(pAsm
);
4434 GLuint tmp2
= gethelpr(pAsm
);
4436 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
4437 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_CUBE
;
4438 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4439 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4440 pAsm
->D
.dst
.reg
= tmp1
;
4441 nomask_PVSDST(&(pAsm
->D
.dst
));
4443 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4448 if( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
4453 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Z
, SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_Y
);
4454 swizzleagain_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Y
, SQ_SEL_X
, SQ_SEL_Z
, SQ_SEL_Z
);
4456 if( GL_FALSE
== next_ins(pAsm
) )
4461 /* tmp1.z = RCP_e(|tmp1.z|) */
4462 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_RECIP_IEEE
;
4463 pAsm
->D
.dst
.math
= 1;
4464 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4465 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4466 pAsm
->D
.dst
.reg
= tmp1
;
4467 pAsm
->D
.dst
.writez
= 1;
4469 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4470 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4471 pAsm
->S
[0].src
.reg
= tmp1
;
4472 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_Z
;
4473 pAsm
->S
[0].src
.abs
= 1;
4477 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
4478 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
4479 * muladd has no writemask, have to use another temp
4481 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
4482 pAsm
->D
.dst
.op3
= 1;
4483 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4484 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4485 pAsm
->D
.dst
.reg
= tmp2
;
4487 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4488 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4489 pAsm
->S
[0].src
.reg
= tmp1
;
4490 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
4491 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
4492 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
4493 pAsm
->S
[1].src
.reg
= tmp1
;
4494 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Z
);
4495 setaddrmode_PVSSRC(&(pAsm
->S
[2].src
), ADDR_ABSOLUTE
);
4496 /* immediate c 1.5 */
4497 pAsm
->D2
.dst2
.literal_slots
= 1;
4498 pAsm
->C
[0].f
= 1.5F
;
4499 pAsm
->S
[2].src
.rtype
= SRC_REC_LITERAL
;
4500 pAsm
->S
[2].src
.reg
= tmp1
;
4501 setswizzle_PVSSRC(&(pAsm
->S
[2].src
), SQ_SEL_X
);
4505 /* tmp1.xy = temp2.xy */
4506 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4507 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4508 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4509 pAsm
->D
.dst
.reg
= tmp1
;
4510 pAsm
->D
.dst
.writex
= 1;
4511 pAsm
->D
.dst
.writey
= 1;
4512 pAsm
->D
.dst
.writez
= 0;
4513 pAsm
->D
.dst
.writew
= 0;
4515 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4516 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4517 pAsm
->S
[0].src
.reg
= tmp2
;
4518 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
4521 pAsm
->aArgSubst
[1] = tmp1
;
4522 need_barrier
= GL_TRUE
;
4526 switch(pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
)
4529 /* will these need WQM(1) on CF inst ? */
4530 pAsm
->D
.dst
.opcode
= SQ_TEX_INST_GET_GRADIENTS_H
;
4533 pAsm
->D
.dst
.opcode
= SQ_TEX_INST_GET_GRADIENTS_V
;
4536 pAsm
->D
.dst
.opcode
= SQ_TEX_INST_SAMPLE_L
;
4539 if(pAsm
->pILInst
[pAsm
->uiCurInst
].TexShadow
== 1)
4540 pAsm
->D
.dst
.opcode
= SQ_TEX_INST_SAMPLE_C
;
4542 pAsm
->D
.dst
.opcode
= SQ_TEX_INST_SAMPLE
;
4545 pAsm
->is_tex
= GL_TRUE
;
4546 if ( GL_TRUE
== need_barrier
)
4548 pAsm
->is_tex
= GL_TRUE
;
4549 if ( GL_TRUE
== need_barrier
)
4551 pAsm
->need_tex_barrier
= GL_TRUE
;
4553 // Set src1 to tex unit id
4554 pAsm
->S
[1].src
.reg
= pAsm
->SamplerUnits
[pAsm
->pILInst
[pAsm
->uiCurInst
].TexSrcUnit
];
4555 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
4557 //No sw info from mesa compiler, so hard code here.
4558 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_X
;
4559 pAsm
->S
[1].src
.swizzley
= SQ_SEL_Y
;
4560 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_Z
;
4561 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_W
;
4563 if( GL_FALSE
== tex_dst(pAsm
) )
4568 if( GL_FALSE
== tex_src(pAsm
) )
4573 if(pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_TXP
)
4575 /* hopefully did swizzles before */
4576 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
4579 if(pAsm
->pILInst
[pAsm
->uiCurInst
].TexSrcTarget
== TEXTURE_CUBE_INDEX
)
4581 /* SAMPLE dst, tmp.yxwy, CUBE */
4582 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_Y
;
4583 pAsm
->S
[0].src
.swizzley
= SQ_SEL_X
;
4584 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_W
;
4585 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_Y
;
4588 if(pAsm
->pILInst
[pAsm
->uiCurInst
].TexShadow
== 1)
4590 /* compare value goes to w chan ? */
4591 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_Z
;
4594 if ( GL_FALSE
== next_ins(pAsm
) )
4599 /* add ARB shadow ambient but clamp to 0..1 */
4600 if(pAsm
->pILInst
[pAsm
->uiCurInst
].TexShadow
== 1)
4602 /* ADD_SAT dst, dst, ambient[texunit] */
4603 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
4605 if( GL_FALSE
== assemble_dst(pAsm
) )
4609 pAsm
->D2
.dst2
.SaturateMode
= 1;
4611 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4612 pAsm
->S
[0].src
.reg
= pAsm
->D
.dst
.reg
;
4613 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
4614 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4616 pAsm
->S
[1].src
.rtype
= SRC_REG_CONSTANT
;
4617 pAsm
->S
[1].src
.reg
= pAsm
->shadow_regs
[pAsm
->pILInst
[pAsm
->uiCurInst
].TexSrcUnit
];
4618 noswizzle_PVSSRC(&(pAsm
->S
[1].src
));
4619 noneg_PVSSRC(&(pAsm
->S
[1].src
));
4621 if( GL_FALSE
== next_ins(pAsm
) )
4631 GLboolean
assemble_XPD(r700_AssemblerBase
*pAsm
)
4636 if( GL_FALSE
== checkop2(pAsm
) )
4641 tmp1
= gethelpr(pAsm
);
4643 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
4645 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4646 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4647 pAsm
->D
.dst
.reg
= tmp1
;
4648 nomask_PVSDST(&(pAsm
->D
.dst
));
4650 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4655 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4660 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_Y
, SQ_SEL_0
);
4661 swizzleagain_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Y
, SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_0
);
4663 if( GL_FALSE
== next_ins(pAsm
) )
4668 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
4669 pAsm
->D
.dst
.op3
= 1;
4671 if(0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
4673 tmp2
= gethelpr(pAsm
);
4675 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4676 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4677 pAsm
->D
.dst
.reg
= tmp2
;
4679 nomask_PVSDST(&(pAsm
->D
.dst
));
4683 if( GL_FALSE
== assemble_dst(pAsm
) )
4689 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4694 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4699 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Y
, SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_0
);
4700 swizzleagain_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_Y
, SQ_SEL_0
);
4702 // result1 + (neg) result0
4703 setaddrmode_PVSSRC(&(pAsm
->S
[2].src
),ADDR_ABSOLUTE
);
4704 pAsm
->S
[2].src
.rtype
= SRC_REG_TEMPORARY
;
4705 pAsm
->S
[2].src
.reg
= tmp1
;
4707 neg_PVSSRC(&(pAsm
->S
[2].src
));
4708 noswizzle_PVSSRC(&(pAsm
->S
[2].src
));
4710 if( GL_FALSE
== next_ins(pAsm
) )
4716 if(0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
4718 if( GL_FALSE
== assemble_dst(pAsm
) )
4723 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4725 // Use tmp as source
4726 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4727 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4728 pAsm
->S
[0].src
.reg
= tmp2
;
4730 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4731 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
4733 if( GL_FALSE
== next_ins(pAsm
) )
4742 GLboolean
assemble_EXPORT(r700_AssemblerBase
*pAsm
)
4747 static inline void decreaseCurrent(r700_AssemblerBase
*pAsm
, GLuint uReason
)
4752 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
--;
4755 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
-= 4;
4758 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
-= 4;
4761 /* TODO : for 16 vp asic, should -= 2; */
4762 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
-= 1;
4767 static inline void checkStackDepth(r700_AssemblerBase
*pAsm
, GLuint uReason
, GLboolean bCheckMaxOnly
)
4769 if(GL_TRUE
== bCheckMaxOnly
)
4774 if((pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+ 1)
4775 > pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
)
4777 pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
=
4778 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+ 1;
4782 if((pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+ 4)
4783 > pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
)
4785 pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
=
4786 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+ 4;
4796 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
++;
4799 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+= 4;
4802 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+= 4;
4805 /* TODO : for 16 vp asic, should += 2; */
4806 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+= 1;
4810 if(pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
4811 > pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
)
4813 pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
=
4814 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
;
4818 GLboolean
jumpToOffest(r700_AssemblerBase
*pAsm
, GLuint pops
, GLint offset
)
4820 if(GL_FALSE
== add_cf_instruction(pAsm
) )
4825 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= pops
;
4826 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
4827 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
4829 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
4830 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
4831 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_JUMP
;
4832 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
4834 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
4836 pAsm
->cf_current_cf_clause_ptr
->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
+ offset
;
4841 GLboolean
pops(r700_AssemblerBase
*pAsm
, GLuint pops
)
4843 if(GL_FALSE
== add_cf_instruction(pAsm
) )
4848 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= pops
;
4849 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
4850 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
4852 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
4853 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
4854 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_POP
;
4856 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
4858 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
4859 pAsm
->cf_current_cf_clause_ptr
->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
+ 1;
4864 GLboolean
assemble_IF(r700_AssemblerBase
*pAsm
, GLboolean bHasElse
)
4866 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
4868 assemble_LOGIC_PRED(pAsm
, SQ_OP2_INST_PRED_SETNE
);
4871 if(GL_FALSE
== add_cf_instruction(pAsm
) )
4876 if(GL_TRUE
!= bHasElse
)
4878 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
4882 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 0;
4884 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
4885 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
4887 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
4888 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
4889 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_JUMP
;
4890 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
4892 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
4895 pAsm
->fc_stack
[pAsm
->FCSP
].type
= FC_IF
;
4896 pAsm
->fc_stack
[pAsm
->FCSP
].mid
= NULL
;
4897 pAsm
->fc_stack
[pAsm
->FCSP
].midLen
= 0;
4898 pAsm
->fc_stack
[pAsm
->FCSP
].first
= pAsm
->cf_current_cf_clause_ptr
;
4900 #ifndef USE_CF_FOR_POP_AFTER
4901 if(GL_TRUE
!= bHasElse
)
4903 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU_POP_AFTER
;
4905 #endif /* USE_CF_FOR_POP_AFTER */
4907 checkStackDepth(pAsm
, FC_PUSH_VPM
, GL_FALSE
);
4912 GLboolean
assemble_ELSE(r700_AssemblerBase
*pAsm
)
4914 if(GL_FALSE
== add_cf_instruction(pAsm
) )
4919 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1; ///
4920 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
4921 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
4923 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
4924 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
4925 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_ELSE
;
4926 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
4928 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
4930 pAsm
->fc_stack
[pAsm
->FCSP
].mid
= (R700ControlFlowGenericClause
**)_mesa_realloc( (void *)pAsm
->fc_stack
[pAsm
->FCSP
].mid
,
4932 sizeof(R700ControlFlowGenericClause
*) );
4933 pAsm
->fc_stack
[pAsm
->FCSP
].mid
[0] = pAsm
->cf_current_cf_clause_ptr
;
4934 //pAsm->fc_stack[pAsm->FCSP].unNumMid = 1;
4936 #ifndef USE_CF_FOR_POP_AFTER
4937 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU_POP_AFTER
;
4938 #endif /* USE_CF_FOR_POP_AFTER */
4940 pAsm
->fc_stack
[pAsm
->FCSP
].first
->m_Word0
.f
.addr
= pAsm
->pR700Shader
->plstCFInstructions_active
->uNumOfNode
- 1;
4945 GLboolean
assemble_ENDIF(r700_AssemblerBase
*pAsm
)
4947 #ifdef USE_CF_FOR_POP_AFTER
4949 #endif /* USE_CF_FOR_POP_AFTER */
4951 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
4953 if(NULL
== pAsm
->fc_stack
[pAsm
->FCSP
].mid
)
4955 /* no else in between */
4956 pAsm
->fc_stack
[pAsm
->FCSP
].first
->m_Word0
.f
.addr
= pAsm
->pR700Shader
->plstCFInstructions_active
->uNumOfNode
;
4960 pAsm
->fc_stack
[pAsm
->FCSP
].mid
[0]->m_Word0
.f
.addr
= pAsm
->pR700Shader
->plstCFInstructions_active
->uNumOfNode
;
4963 if(NULL
!= pAsm
->fc_stack
[pAsm
->FCSP
].mid
)
4965 FREE(pAsm
->fc_stack
[pAsm
->FCSP
].mid
);
4968 if(pAsm
->fc_stack
[pAsm
->FCSP
].type
!= FC_IF
)
4970 radeon_error("if/endif in shader code are not paired. \n");
4976 decreaseCurrent(pAsm
, FC_PUSH_VPM
);
4981 GLboolean
assemble_BGNLOOP(r700_AssemblerBase
*pAsm
)
4983 if(GL_FALSE
== add_cf_instruction(pAsm
) )
4989 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 0;
4990 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
4991 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
4993 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
4994 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
4995 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_LOOP_START_NO_AL
;
4996 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
4998 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5001 pAsm
->fc_stack
[pAsm
->FCSP
].type
= FC_LOOP
;
5002 pAsm
->fc_stack
[pAsm
->FCSP
].mid
= NULL
;
5003 pAsm
->fc_stack
[pAsm
->FCSP
].unNumMid
= 0;
5004 pAsm
->fc_stack
[pAsm
->FCSP
].midLen
= 0;
5005 pAsm
->fc_stack
[pAsm
->FCSP
].first
= pAsm
->cf_current_cf_clause_ptr
;
5007 checkStackDepth(pAsm
, FC_LOOP
, GL_FALSE
);
5012 GLboolean
assemble_BRK(r700_AssemblerBase
*pAsm
)
5014 #ifdef USE_CF_FOR_CONTINUE_BREAK
5016 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
5018 assemble_LOGIC_PRED(pAsm
, SQ_OP2_INST_PRED_SETNE
);
5020 unsigned int unFCSP
;
5021 for(unFCSP
=pAsm
->FCSP
; unFCSP
>0; unFCSP
--)
5023 if(FC_LOOP
== pAsm
->fc_stack
[unFCSP
].type
)
5030 radeon_error("Break is not inside loop/endloop pair.\n");
5034 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5040 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
5041 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5042 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5044 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5045 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5046 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_LOOP_BREAK
;
5048 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5050 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5052 pAsm
->fc_stack
[unFCSP
].mid
= (R700ControlFlowGenericClause
**)_mesa_realloc(
5053 (void *)pAsm
->fc_stack
[unFCSP
].mid
,
5054 sizeof(R700ControlFlowGenericClause
*) * pAsm
->fc_stack
[unFCSP
].unNumMid
,
5055 sizeof(R700ControlFlowGenericClause
*) * (pAsm
->fc_stack
[unFCSP
].unNumMid
+ 1) );
5056 pAsm
->fc_stack
[unFCSP
].mid
[pAsm
->fc_stack
[unFCSP
].unNumMid
] = pAsm
->cf_current_cf_clause_ptr
;
5057 pAsm
->fc_stack
[unFCSP
].unNumMid
++;
5059 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5064 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
5065 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5066 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5068 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5069 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5070 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_POP
;
5072 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5074 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5075 pAsm
->cf_current_cf_clause_ptr
->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
+ 1;
5077 checkStackDepth(pAsm
, FC_PUSH_VPM
, GL_TRUE
);
5079 #endif //USE_CF_FOR_CONTINUE_BREAK
5083 GLboolean
assemble_CONT(r700_AssemblerBase
*pAsm
)
5085 #ifdef USE_CF_FOR_CONTINUE_BREAK
5086 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
5088 assemble_LOGIC_PRED(pAsm
, SQ_OP2_INST_PRED_SETNE
);
5090 unsigned int unFCSP
;
5091 for(unFCSP
=pAsm
->FCSP
; unFCSP
>0; unFCSP
--)
5093 if(FC_LOOP
== pAsm
->fc_stack
[unFCSP
].type
)
5100 radeon_error("Continue is not inside loop/endloop pair.\n");
5104 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5110 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
5111 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5112 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5114 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5115 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5116 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_LOOP_CONTINUE
;
5118 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5120 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5122 pAsm
->fc_stack
[unFCSP
].mid
= (R700ControlFlowGenericClause
**)_mesa_realloc(
5123 (void *)pAsm
->fc_stack
[unFCSP
].mid
,
5124 sizeof(R700ControlFlowGenericClause
*) * pAsm
->fc_stack
[unFCSP
].unNumMid
,
5125 sizeof(R700ControlFlowGenericClause
*) * (pAsm
->fc_stack
[unFCSP
].unNumMid
+ 1) );
5126 pAsm
->fc_stack
[unFCSP
].mid
[pAsm
->fc_stack
[unFCSP
].unNumMid
] = pAsm
->cf_current_cf_clause_ptr
;
5127 pAsm
->fc_stack
[unFCSP
].unNumMid
++;
5129 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5134 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
5135 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5136 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5138 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5139 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5140 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_POP
;
5142 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5144 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5145 pAsm
->cf_current_cf_clause_ptr
->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
+ 1;
5147 checkStackDepth(pAsm
, FC_PUSH_VPM
, GL_TRUE
);
5149 #endif /* USE_CF_FOR_CONTINUE_BREAK */
5154 GLboolean
assemble_ENDLOOP(r700_AssemblerBase
*pAsm
)
5158 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5164 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 0;
5165 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5166 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5168 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5169 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5170 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_LOOP_END
;
5171 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5173 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5175 pAsm
->cf_current_cf_clause_ptr
->m_Word0
.f
.addr
= pAsm
->fc_stack
[pAsm
->FCSP
].first
->m_uIndex
+ 1;
5176 pAsm
->fc_stack
[pAsm
->FCSP
].first
->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
+ 1;
5178 #ifdef USE_CF_FOR_CONTINUE_BREAK
5179 for(i
=0; i
<pAsm
->fc_stack
[pAsm
->FCSP
].unNumMid
; i
++)
5181 pAsm
->fc_stack
[pAsm
->FCSP
].mid
[i
]->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
;
5183 if(NULL
!= pAsm
->fc_stack
[pAsm
->FCSP
].mid
)
5185 FREE(pAsm
->fc_stack
[pAsm
->FCSP
].mid
);
5189 if(pAsm
->fc_stack
[pAsm
->FCSP
].type
!= FC_LOOP
)
5191 radeon_error("loop/endloop in shader code are not paired. \n");
5197 if((pAsm
->unCFflags
& HAS_CURRENT_LOOPRET
) > 0)
5199 for(unFCSP
=(pAsm
->FCSP
-1); unFCSP
>pAsm
->CALLSTACK
[pAsm
->CALLSP
].FCSP_BeforeEntry
; unFCSP
--)
5201 if(FC_LOOP
== pAsm
->fc_stack
[unFCSP
].type
)
5203 breakLoopOnFlag(pAsm
, unFCSP
);
5206 else if(FC_IF
== pAsm
->fc_stack
[unFCSP
].type
)
5211 if(unFCSP
<= pAsm
->CALLSTACK
[pAsm
->CALLSP
].FCSP_BeforeEntry
)
5213 #ifdef USE_CF_FOR_POP_AFTER
5214 returnOnFlag(pAsm
, unIF
);
5216 returnOnFlag(pAsm
, 0);
5217 #endif /* USE_CF_FOR_POP_AFTER */
5218 pAsm
->unCFflags
&= ~HAS_CURRENT_LOOPRET
;
5224 decreaseCurrent(pAsm
, FC_LOOP
);
5229 void add_return_inst(r700_AssemblerBase
*pAsm
)
5231 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5235 //pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5236 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 0;
5237 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5238 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5240 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5241 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5242 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_RETURN
;
5243 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5245 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5248 GLboolean
assemble_BGNSUB(r700_AssemblerBase
*pAsm
, GLint nILindex
, GLuint uiIL_Shift
)
5251 if( (pAsm
->unSubArrayPointer
+ 1) > pAsm
->unSubArraySize
)
5253 pAsm
->subs
= (SUB_OFFSET
*)_mesa_realloc( (void *)pAsm
->subs
,
5254 sizeof(SUB_OFFSET
) * pAsm
->unSubArraySize
,
5255 sizeof(SUB_OFFSET
) * (pAsm
->unSubArraySize
+ 10) );
5256 if(NULL
== pAsm
->subs
)
5260 pAsm
->unSubArraySize
+= 10;
5263 pAsm
->subs
[pAsm
->unSubArrayPointer
].subIL_Offset
= nILindex
+ uiIL_Shift
;
5264 pAsm
->subs
[pAsm
->unSubArrayPointer
].lstCFInstructions_local
.pHead
=NULL
;
5265 pAsm
->subs
[pAsm
->unSubArrayPointer
].lstCFInstructions_local
.pTail
=NULL
;
5266 pAsm
->subs
[pAsm
->unSubArrayPointer
].lstCFInstructions_local
.uNumOfNode
=0;
5269 pAsm
->CALLSTACK
[pAsm
->CALLSP
].subDescIndex
= pAsm
->unSubArrayPointer
;
5270 pAsm
->CALLSTACK
[pAsm
->CALLSP
].FCSP_BeforeEntry
= pAsm
->FCSP
;
5271 pAsm
->CALLSTACK
[pAsm
->CALLSP
].plstCFInstructions_local
5272 = &(pAsm
->subs
[pAsm
->unSubArrayPointer
].lstCFInstructions_local
);
5273 pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
= 0;
5274 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
= 0;
5275 SetActiveCFlist(pAsm
->pR700Shader
,
5276 pAsm
->CALLSTACK
[pAsm
->CALLSP
].plstCFInstructions_local
);
5278 pAsm
->unSubArrayPointer
++;
5281 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
5284 pAsm
->fc_stack
[pAsm
->FCSP
].type
= FC_REP
;
5286 checkStackDepth(pAsm
, FC_REP
, GL_FALSE
);
5291 GLboolean
assemble_ENDSUB(r700_AssemblerBase
*pAsm
)
5293 if(pAsm
->fc_stack
[pAsm
->FCSP
].type
!= FC_REP
)
5295 radeon_error("BGNSUB/ENDSUB in shader code are not paired. \n");
5299 /* copy max to sub structure */
5300 pAsm
->subs
[pAsm
->CALLSTACK
[pAsm
->CALLSP
].subDescIndex
].unStackDepthMax
5301 = pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
;
5303 decreaseCurrent(pAsm
, FC_REP
);
5306 SetActiveCFlist(pAsm
->pR700Shader
,
5307 pAsm
->CALLSTACK
[pAsm
->CALLSP
].plstCFInstructions_local
);
5309 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
5316 GLboolean
assemble_RET(r700_AssemblerBase
*pAsm
)
5320 if(pAsm
->CALLSP
> 0)
5323 for(unFCSP
=pAsm
->FCSP
; unFCSP
>pAsm
->CALLSTACK
[pAsm
->CALLSP
].FCSP_BeforeEntry
; unFCSP
--)
5325 if(FC_LOOP
== pAsm
->fc_stack
[unFCSP
].type
)
5327 setRetInLoopFlag(pAsm
, SQ_SEL_1
);
5328 breakLoopOnFlag(pAsm
, unFCSP
);
5329 pAsm
->unCFflags
|= LOOPRET_FLAGS
;
5333 else if(FC_IF
== pAsm
->fc_stack
[unFCSP
].type
)
5340 #ifdef USE_CF_FOR_POP_AFTER
5345 #endif /* USE_CF_FOR_POP_AFTER */
5347 add_return_inst(pAsm
);
5352 GLboolean
assemble_CAL(r700_AssemblerBase
*pAsm
,
5355 GLuint uiNumberInsts
,
5356 struct prog_instruction
*pILInst
,
5357 PRESUB_DESC
* pPresubDesc
)
5361 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
5363 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5368 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.call_count
= 1;
5369 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 0;
5370 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5371 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5373 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5374 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5375 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_CALL
;
5376 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5378 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5381 if( (pAsm
->unCallerArrayPointer
+ 1) > pAsm
->unCallerArraySize
)
5383 pAsm
->callers
= (CALLER_POINTER
*)_mesa_realloc( (void *)pAsm
->callers
,
5384 sizeof(CALLER_POINTER
) * pAsm
->unCallerArraySize
,
5385 sizeof(CALLER_POINTER
) * (pAsm
->unCallerArraySize
+ 10) );
5386 if(NULL
== pAsm
->callers
)
5390 pAsm
->unCallerArraySize
+= 10;
5393 uiIL_Offset
= nILindex
+ uiIL_Shift
;
5394 pAsm
->callers
[pAsm
->unCallerArrayPointer
].subIL_Offset
= uiIL_Offset
;
5395 pAsm
->callers
[pAsm
->unCallerArrayPointer
].cf_ptr
= pAsm
->cf_current_cf_clause_ptr
;
5397 pAsm
->callers
[pAsm
->unCallerArrayPointer
].finale_cf_ptr
= NULL
;
5398 pAsm
->callers
[pAsm
->unCallerArrayPointer
].prelude_cf_ptr
= NULL
;
5400 pAsm
->unCallerArrayPointer
++;
5406 for(j
=0; j
<pAsm
->unSubArrayPointer
; j
++)
5408 if(uiIL_Offset
== pAsm
->subs
[j
].subIL_Offset
)
5409 { /* compiled before */
5411 max
= pAsm
->subs
[j
].unStackDepthMax
5412 + pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
;
5413 if(max
> pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
)
5415 pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
= max
;
5418 pAsm
->callers
[pAsm
->unCallerArrayPointer
- 1].subDescIndex
= j
;
5423 pAsm
->callers
[pAsm
->unCallerArrayPointer
- 1].subDescIndex
= pAsm
->unSubArrayPointer
;
5424 unSubID
= pAsm
->unSubArrayPointer
;
5426 bRet
= AssembleInstr(nILindex
, uiIL_Shift
, uiNumberInsts
, pILInst
, pAsm
);
5430 max
= pAsm
->subs
[unSubID
].unStackDepthMax
5431 + pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
;
5432 if(max
> pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
)
5434 pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
= max
;
5437 pAsm
->subs
[unSubID
].pPresubDesc
= pPresubDesc
;
5443 GLboolean
setRetInLoopFlag(r700_AssemblerBase
*pAsm
, GLuint flagValue
)
5445 /*GLfloat fLiteral[2] = {0.1, 0.0};*/
5447 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
5448 pAsm
->D
.dst
.op3
= 0;
5449 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
5450 pAsm
->D
.dst
.reg
= pAsm
->flag_reg_index
;
5451 pAsm
->D
.dst
.writex
= 1;
5452 pAsm
->D
.dst
.writey
= 0;
5453 pAsm
->D
.dst
.writez
= 0;
5454 pAsm
->D
.dst
.writew
= 0;
5455 pAsm
->D2
.dst2
.literal_slots
= 1;
5456 pAsm
->D2
.dst2
.SaturateMode
= SATURATE_OFF
;
5457 pAsm
->D
.dst
.predicated
= 0;
5458 /* in reloc where dislink flag init inst, only one slot alu inst is handled. */
5459 pAsm
->D
.dst
.math
= 1; /* TODO : not math really, but one channel op, more generic alu assembler needed */
5460 pAsm
->D2
.dst2
.index_mode
= SQ_INDEX_LOOP
; /* Check this ! */
5462 pAsm
->S
[0].src
.rtype
= SRC_REC_LITERAL
;
5463 //pAsm->S[0].src.reg = 0;
5464 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
5465 noneg_PVSSRC(&(pAsm
->S
[0].src
));
5466 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_X
;
5467 pAsm
->S
[0].src
.swizzley
= SQ_SEL_Y
;
5468 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_Z
;
5469 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_W
;
5471 if( GL_FALSE
== next_ins_literal(pAsm
, &(fLiteral
[0])) )
5476 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
5477 pAsm
->S
[0].src
.reg
= 0;
5478 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
5479 noneg_PVSSRC(&(pAsm
->S
[0].src
));
5480 pAsm
->S
[0].src
.swizzlex
= flagValue
;
5481 pAsm
->S
[0].src
.swizzley
= flagValue
;
5482 pAsm
->S
[0].src
.swizzlez
= flagValue
;
5483 pAsm
->S
[0].src
.swizzlew
= flagValue
;
5485 if( GL_FALSE
== next_ins(pAsm
) )
5494 GLboolean
testFlag(r700_AssemblerBase
*pAsm
)
5496 /*GLfloat fLiteral[2] = {0.1, 0.0};*/
5499 GLuint tmp
= gethelpr(pAsm
);
5500 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
5502 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_PRED_SETE
;
5503 pAsm
->D
.dst
.math
= 1;
5504 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
5505 pAsm
->D
.dst
.reg
= tmp
;
5506 pAsm
->D
.dst
.writex
= 1;
5507 pAsm
->D
.dst
.writey
= 0;
5508 pAsm
->D
.dst
.writez
= 0;
5509 pAsm
->D
.dst
.writew
= 0;
5510 pAsm
->D2
.dst2
.literal_slots
= 1;
5511 pAsm
->D2
.dst2
.SaturateMode
= SATURATE_OFF
;
5512 pAsm
->D
.dst
.predicated
= 1;
5513 pAsm
->D2
.dst2
.index_mode
= SQ_INDEX_LOOP
; /* Check this ! */
5515 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
5516 pAsm
->S
[0].src
.reg
= pAsm
->flag_reg_index
;
5517 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
5518 noneg_PVSSRC(&(pAsm
->S
[0].src
));
5519 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_X
;
5520 pAsm
->S
[0].src
.swizzley
= SQ_SEL_Y
;
5521 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_Z
;
5522 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_W
;
5524 pAsm
->S
[1].src
.rtype
= SRC_REC_LITERAL
;
5525 //pAsm->S[1].src.reg = 0;
5526 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
5527 noneg_PVSSRC(&(pAsm
->S
[1].src
));
5528 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_X
;
5529 pAsm
->S
[1].src
.swizzley
= SQ_SEL_Y
;
5530 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_Z
;
5531 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_W
;
5533 if( GL_FALSE
== next_ins_literal(pAsm
, &(fLiteral
[0])) )
5538 pAsm
->S
[1].src
.rtype
= DST_REG_TEMPORARY
;
5539 pAsm
->S
[1].src
.reg
= 0;
5540 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
5541 noneg_PVSSRC(&(pAsm
->S
[1].src
));
5542 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_1
;
5543 pAsm
->S
[1].src
.swizzley
= SQ_SEL_1
;
5544 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_1
;
5545 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_1
;
5547 if( GL_FALSE
== next_ins(pAsm
) )
5553 checkStackDepth(pAsm
, FC_PUSH_VPM
, GL_TRUE
);
5558 GLboolean
returnOnFlag(r700_AssemblerBase
*pAsm
, GLuint unIF
)
5561 jumpToOffest(pAsm
, 1, 4);
5562 setRetInLoopFlag(pAsm
, SQ_SEL_0
);
5563 pops(pAsm
, unIF
+ 1);
5564 add_return_inst(pAsm
);
5569 GLboolean
breakLoopOnFlag(r700_AssemblerBase
*pAsm
, GLuint unFCSP
)
5574 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5579 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
5580 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5581 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5583 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5584 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5585 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_LOOP_BREAK
;
5586 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5588 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5590 pAsm
->fc_stack
[unFCSP
].mid
= (R700ControlFlowGenericClause
**)_mesa_realloc(
5591 (void *)pAsm
->fc_stack
[unFCSP
].mid
,
5592 sizeof(R700ControlFlowGenericClause
*) * pAsm
->fc_stack
[unFCSP
].unNumMid
,
5593 sizeof(R700ControlFlowGenericClause
*) * (pAsm
->fc_stack
[unFCSP
].unNumMid
+ 1) );
5594 pAsm
->fc_stack
[unFCSP
].mid
[pAsm
->fc_stack
[unFCSP
].unNumMid
] = pAsm
->cf_current_cf_clause_ptr
;
5595 pAsm
->fc_stack
[unFCSP
].unNumMid
++;
5602 GLboolean
AssembleInstr(GLuint uiFirstInst
,
5604 GLuint uiNumberInsts
,
5605 struct prog_instruction
*pILInst
,
5606 r700_AssemblerBase
*pR700AsmCode
)
5610 pR700AsmCode
->pILInst
= pILInst
;
5611 for(i
=uiFirstInst
; i
<uiNumberInsts
; i
++)
5613 pR700AsmCode
->uiCurInst
= i
;
5615 #ifndef USE_CF_FOR_CONTINUE_BREAK
5616 if(OPCODE_BRK
== pILInst
[i
+1].Opcode
)
5618 switch(pILInst
[i
].Opcode
)
5621 pILInst
[i
].Opcode
= OPCODE_SGT
;
5624 pILInst
[i
].Opcode
= OPCODE_SGE
;
5627 pILInst
[i
].Opcode
= OPCODE_SLT
;
5630 pILInst
[i
].Opcode
= OPCODE_SLE
;
5633 pILInst
[i
].Opcode
= OPCODE_SNE
;
5636 pILInst
[i
].Opcode
= OPCODE_SEQ
;
5643 if(pILInst
[i
].CondUpdate
== 1)
5645 /* remember dest register used for cond evaluation */
5646 /* XXX also handle PROGRAM_OUTPUT registers here? */
5647 pR700AsmCode
->last_cond_register
= pILInst
[i
].DstReg
.Index
;
5650 switch (pILInst
[i
].Opcode
)
5653 if ( GL_FALSE
== assemble_ABS(pR700AsmCode
) )
5658 if ( GL_FALSE
== assemble_ADD(pR700AsmCode
) )
5663 if ( GL_FALSE
== assemble_ARL(pR700AsmCode
) )
5667 radeon_error("Not yet implemented instruction OPCODE_ARR \n");
5668 //if ( GL_FALSE == assemble_BAD("ARR") )
5673 if ( GL_FALSE
== assemble_CMP(pR700AsmCode
) )
5677 if ( GL_FALSE
== assemble_TRIG(pR700AsmCode
, SQ_OP2_INST_COS
) )
5684 if ( GL_FALSE
== assemble_DOT(pR700AsmCode
) )
5689 if ( GL_FALSE
== assemble_DST(pR700AsmCode
) )
5694 if ( GL_FALSE
== assemble_EX2(pR700AsmCode
) )
5698 if ( GL_FALSE
== assemble_EXP(pR700AsmCode
) )
5703 if ( GL_FALSE
== assemble_FLR(pR700AsmCode
) )
5706 //case OP_FLR_INT: ;
5708 // if ( GL_FALSE == assemble_FLR_INT() )
5713 if ( GL_FALSE
== assemble_FRC(pR700AsmCode
) )
5719 if ( GL_FALSE
== assemble_KIL(pR700AsmCode
, SQ_OP2_INST_KILLGT
) )
5723 if ( GL_FALSE
== assemble_LG2(pR700AsmCode
) )
5727 if ( GL_FALSE
== assemble_LIT(pR700AsmCode
) )
5731 if ( GL_FALSE
== assemble_LRP(pR700AsmCode
) )
5735 if ( GL_FALSE
== assemble_LOG(pR700AsmCode
) )
5740 if ( GL_FALSE
== assemble_MAD(pR700AsmCode
) )
5744 if ( GL_FALSE
== assemble_MAX(pR700AsmCode
) )
5748 if ( GL_FALSE
== assemble_MIN(pR700AsmCode
) )
5753 if ( GL_FALSE
== assemble_MOV(pR700AsmCode
) )
5757 if ( GL_FALSE
== assemble_MUL(pR700AsmCode
) )
5763 callPreSub(pR700AsmCode
,
5766 pILInst
->DstReg
.Index
+ pR700AsmCode
->starting_temp_register_number
,
5768 radeon_error("noise1: not yet supported shader instruction\n");
5772 radeon_error("noise2: not yet supported shader instruction\n");
5775 radeon_error("noise3: not yet supported shader instruction\n");
5778 radeon_error("noise4: not yet supported shader instruction\n");
5782 if ( GL_FALSE
== assemble_POW(pR700AsmCode
) )
5786 if ( GL_FALSE
== assemble_RCP(pR700AsmCode
) )
5790 if ( GL_FALSE
== assemble_RSQ(pR700AsmCode
) )
5794 if ( GL_FALSE
== assemble_TRIG(pR700AsmCode
, SQ_OP2_INST_SIN
) )
5798 if ( GL_FALSE
== assemble_SCS(pR700AsmCode
) )
5803 if ( GL_FALSE
== assemble_LOGIC(pR700AsmCode
, SQ_OP2_INST_SETE
) )
5810 if ( GL_FALSE
== assemble_LOGIC(pR700AsmCode
, SQ_OP2_INST_SETGT
) )
5817 if ( GL_FALSE
== assemble_SGE(pR700AsmCode
) )
5823 /* NO LT, LE, TODO : use GE => LE, GT => LT : reverse 2 src order would be simpliest. Or use SQ_CF_COND_FALSE for SQ_CF_COND_ACTIVE.*/
5826 struct prog_src_register SrcRegSave
[2];
5827 SrcRegSave
[0] = pILInst
[i
].SrcReg
[0];
5828 SrcRegSave
[1] = pILInst
[i
].SrcReg
[1];
5829 pILInst
[i
].SrcReg
[0] = SrcRegSave
[1];
5830 pILInst
[i
].SrcReg
[1] = SrcRegSave
[0];
5831 if ( GL_FALSE
== assemble_LOGIC(pR700AsmCode
, SQ_OP2_INST_SETGT
) )
5833 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
5834 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
5837 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
5838 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
5844 struct prog_src_register SrcRegSave
[2];
5845 SrcRegSave
[0] = pILInst
[i
].SrcReg
[0];
5846 SrcRegSave
[1] = pILInst
[i
].SrcReg
[1];
5847 pILInst
[i
].SrcReg
[0] = SrcRegSave
[1];
5848 pILInst
[i
].SrcReg
[1] = SrcRegSave
[0];
5849 if ( GL_FALSE
== assemble_LOGIC(pR700AsmCode
, SQ_OP2_INST_SETGE
) )
5851 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
5852 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
5855 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
5856 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
5861 if ( GL_FALSE
== assemble_LOGIC(pR700AsmCode
, SQ_OP2_INST_SETNE
) )
5868 // if ( GL_FALSE == assemble_STP(pR700AsmCode) )
5873 if ( GL_FALSE
== assemble_MOV(pR700AsmCode
) )
5879 if( (i
+1)<uiNumberInsts
)
5881 if(OPCODE_END
!= pILInst
[i
+1].Opcode
)
5883 if( GL_TRUE
== IsTex(pILInst
[i
+1].Opcode
) )
5885 pR700AsmCode
->pInstDeps
[i
+1].nDstDep
= i
+1; //=1?
5896 if ( GL_FALSE
== assemble_TEX(pR700AsmCode
) )
5901 if ( GL_FALSE
== assemble_math_function(pR700AsmCode
, SQ_OP2_INST_TRUNC
) )
5906 if ( GL_FALSE
== assemble_XPD(pR700AsmCode
) )
5912 GLboolean bHasElse
= GL_FALSE
;
5914 if(pILInst
[pILInst
[i
].BranchTarget
].Opcode
== OPCODE_ELSE
)
5919 if ( GL_FALSE
== assemble_IF(pR700AsmCode
, bHasElse
) )
5927 if ( GL_FALSE
== assemble_ELSE(pR700AsmCode
) )
5932 if ( GL_FALSE
== assemble_ENDIF(pR700AsmCode
) )
5936 case OPCODE_BGNLOOP
:
5937 if( GL_FALSE
== assemble_BGNLOOP(pR700AsmCode
) )
5944 if( GL_FALSE
== assemble_BRK(pR700AsmCode
) )
5951 if( GL_FALSE
== assemble_CONT(pR700AsmCode
) )
5957 case OPCODE_ENDLOOP
:
5958 if( GL_FALSE
== assemble_ENDLOOP(pR700AsmCode
) )
5965 if( GL_FALSE
== assemble_BGNSUB(pR700AsmCode
, i
, uiIL_Shift
) )
5972 if( GL_FALSE
== assemble_RET(pR700AsmCode
) )
5979 if( GL_FALSE
== assemble_CAL(pR700AsmCode
,
5980 pILInst
[i
].BranchTarget
,
5990 //case OPCODE_EXPORT:
5991 // if ( GL_FALSE == assemble_EXPORT() )
5996 return assemble_ENDSUB(pR700AsmCode
);
5999 //pR700AsmCode->uiCurInst = i;
6000 //This is to remaind that if in later exoort there is depth/stencil
6001 //export, we need a mov to re-arrange DST channel, where using a
6002 //psuedo inst, we will use this end inst to do it.
6006 radeon_error("internal: unknown instruction\n");
6014 GLboolean
InitShaderProgram(r700_AssemblerBase
* pAsm
)
6016 setRetInLoopFlag(pAsm
, SQ_SEL_0
);
6017 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
6021 GLboolean
RelocProgram(r700_AssemblerBase
* pAsm
, struct gl_program
* pILProg
)
6025 TypedShaderList
* plstCFmain
;
6026 TypedShaderList
* plstCFsub
;
6028 R700ShaderInstruction
* pInst
;
6029 R700ControlFlowGenericClause
* pCFInst
;
6031 R700ControlFlowALUClause
* pCF_ALU
;
6032 R700ALUInstruction
* pALU
;
6033 GLuint unConstOffset
= 0;
6035 GLuint unMinRegIndex
;
6037 plstCFmain
= pAsm
->CALLSTACK
[0].plstCFInstructions_local
;
6039 /* remove flags init if they are not used */
6040 if((pAsm
->unCFflags
& HAS_LOOPRET
) == 0)
6042 R700ControlFlowALUClause
* pCF_ALU
;
6043 pInst
= plstCFmain
->pHead
;
6046 if(SIT_CF_ALU
== pInst
->m_ShaderInstType
)
6048 pCF_ALU
= (R700ControlFlowALUClause
*)pInst
;
6049 if(0 == pCF_ALU
->m_Word1
.f
.count
)
6051 pCF_ALU
->m_Word1
.f
.cf_inst
= SQ_CF_INST_NOP
;
6055 R700ALUInstruction
* pALU
= pCF_ALU
->m_pLinkedALUInstruction
;
6057 pALU
->m_pLinkedALUClause
= NULL
;
6058 pALU
= (R700ALUInstruction
*)(pALU
->pNextInst
);
6059 pALU
->m_pLinkedALUClause
= pCF_ALU
;
6060 pCF_ALU
->m_pLinkedALUInstruction
= pALU
;
6062 pCF_ALU
->m_Word1
.f
.count
--;
6066 pInst
= pInst
->pNextInst
;
6070 if(pAsm
->CALLSTACK
[0].max
> 0)
6072 pAsm
->pR700Shader
->uStackSize
= ((pAsm
->CALLSTACK
[0].max
+ 3)>>2) + 2;
6075 if(0 == pAsm
->unSubArrayPointer
)
6080 unCFoffset
= plstCFmain
->uNumOfNode
;
6082 if(NULL
!= pILProg
->Parameters
)
6084 unConstOffset
= pILProg
->Parameters
->NumParameters
;
6088 for(i
=0; i
<pAsm
->unSubArrayPointer
; i
++)
6090 pAsm
->subs
[i
].unCFoffset
= unCFoffset
;
6091 plstCFsub
= &(pAsm
->subs
[i
].lstCFInstructions_local
);
6093 pInst
= plstCFsub
->pHead
;
6095 /* reloc instructions */
6098 if(SIT_CF_GENERIC
== pInst
->m_ShaderInstType
)
6100 pCFInst
= (R700ControlFlowGenericClause
*)pInst
;
6102 switch (pCFInst
->m_Word1
.f
.cf_inst
)
6104 case SQ_CF_INST_POP
:
6105 case SQ_CF_INST_JUMP
:
6106 case SQ_CF_INST_ELSE
:
6107 case SQ_CF_INST_LOOP_END
:
6108 case SQ_CF_INST_LOOP_START
:
6109 case SQ_CF_INST_LOOP_START_NO_AL
:
6110 case SQ_CF_INST_LOOP_CONTINUE
:
6111 case SQ_CF_INST_LOOP_BREAK
:
6112 pCFInst
->m_Word0
.f
.addr
+= unCFoffset
;
6119 pInst
->m_uIndex
+= unCFoffset
;
6121 pInst
= pInst
->pNextInst
;
6124 if(NULL
!= pAsm
->subs
[i
].pPresubDesc
)
6128 unMinRegIndex
= pAsm
->subs
[i
].pPresubDesc
->pCompiledSub
->MinRegIndex
;
6129 unRegOffset
= pAsm
->subs
[i
].pPresubDesc
->maxStartReg
;
6130 unConstOffset
+= pAsm
->subs
[i
].pPresubDesc
->unConstantsStart
;
6132 pInst
= plstCFsub
->pHead
;
6135 if(SIT_CF_ALU
== pInst
->m_ShaderInstType
)
6137 pCF_ALU
= (R700ControlFlowALUClause
*)pInst
;
6139 pALU
= pCF_ALU
->m_pLinkedALUInstruction
;
6140 for(int j
=0; j
<=pCF_ALU
->m_Word1
.f
.count
; j
++)
6142 pALU
->m_Word1
.f
.dst_gpr
= pALU
->m_Word1
.f
.dst_gpr
+ unRegOffset
- unMinRegIndex
;
6144 if(pALU
->m_Word0
.f
.src0_sel
< SQ_ALU_SRC_GPR_SIZE
)
6146 pALU
->m_Word0
.f
.src0_sel
= pALU
->m_Word0
.f
.src0_sel
+ unRegOffset
- unMinRegIndex
;
6148 else if(pALU
->m_Word0
.f
.src0_sel
>= SQ_ALU_SRC_CFILE_BASE
)
6150 pALU
->m_Word0
.f
.src0_sel
+= unConstOffset
;
6153 if( ((pALU
->m_Word1
.val
>> SQ_ALU_WORD1_OP3_ALU_INST_SHIFT
) & 0x0000001F)
6154 >= SQ_OP3_INST_MUL_LIT
)
6155 { /* op3 : 3 srcs */
6156 if(pALU
->m_Word1_OP3
.f
.src2_sel
< SQ_ALU_SRC_GPR_SIZE
)
6158 pALU
->m_Word1_OP3
.f
.src2_sel
= pALU
->m_Word1_OP3
.f
.src2_sel
+ unRegOffset
- unMinRegIndex
;
6160 else if(pALU
->m_Word1_OP3
.f
.src2_sel
>= SQ_ALU_SRC_CFILE_BASE
)
6162 pALU
->m_Word1_OP3
.f
.src2_sel
+= unConstOffset
;
6164 if(pALU
->m_Word0
.f
.src1_sel
< SQ_ALU_SRC_GPR_SIZE
)
6166 pALU
->m_Word0
.f
.src1_sel
= pALU
->m_Word0
.f
.src1_sel
+ unRegOffset
- unMinRegIndex
;
6168 else if(pALU
->m_Word0
.f
.src1_sel
>= SQ_ALU_SRC_CFILE_BASE
)
6170 pALU
->m_Word0
.f
.src1_sel
+= unConstOffset
;
6177 uNumSrc
= r700GetNumOperands(pALU
->m_Word1_OP2
.f6
.alu_inst
, 0);
6181 uNumSrc
= r700GetNumOperands(pALU
->m_Word1_OP2
.f
.alu_inst
, 0);
6185 if(pALU
->m_Word0
.f
.src1_sel
< SQ_ALU_SRC_GPR_SIZE
)
6187 pALU
->m_Word0
.f
.src1_sel
= pALU
->m_Word0
.f
.src1_sel
+ unRegOffset
- unMinRegIndex
;
6189 else if(pALU
->m_Word0
.f
.src1_sel
>= SQ_ALU_SRC_CFILE_BASE
)
6191 pALU
->m_Word0
.f
.src1_sel
+= unConstOffset
;
6195 pALU
= (R700ALUInstruction
*)(pALU
->pNextInst
);
6198 pInst
= pInst
->pNextInst
;
6202 /* Put sub into main */
6203 plstCFmain
->pTail
->pNextInst
= plstCFsub
->pHead
;
6204 plstCFmain
->pTail
= plstCFsub
->pTail
;
6205 plstCFmain
->uNumOfNode
+= plstCFsub
->uNumOfNode
;
6207 unCFoffset
+= plstCFsub
->uNumOfNode
;
6211 for(i
=0; i
<pAsm
->unCallerArrayPointer
; i
++)
6213 pAsm
->callers
[i
].cf_ptr
->m_Word0
.f
.addr
6214 = pAsm
->subs
[pAsm
->callers
[i
].subDescIndex
].unCFoffset
;
6216 if(NULL
!= pAsm
->subs
[pAsm
->callers
[i
].subDescIndex
].pPresubDesc
)
6218 unMinRegIndex
= pAsm
->subs
[pAsm
->callers
[i
].subDescIndex
].pPresubDesc
->pCompiledSub
->MinRegIndex
;
6219 unRegOffset
= pAsm
->subs
[pAsm
->callers
[i
].subDescIndex
].pPresubDesc
->maxStartReg
;
6221 if(NULL
!= pAsm
->callers
[i
].prelude_cf_ptr
)
6223 pCF_ALU
= (R700ControlFlowALUClause
* )(pAsm
->callers
[i
].prelude_cf_ptr
);
6224 pALU
= pCF_ALU
->m_pLinkedALUInstruction
;
6225 for(int j
=0; j
<=pCF_ALU
->m_Word1
.f
.count
; j
++)
6227 pALU
->m_Word1
.f
.dst_gpr
= pALU
->m_Word1
.f
.dst_gpr
+ unRegOffset
- unMinRegIndex
;
6228 pALU
= (R700ALUInstruction
*)(pALU
->pNextInst
);
6231 if(NULL
!= pAsm
->callers
[i
].finale_cf_ptr
)
6233 pCF_ALU
= (R700ControlFlowALUClause
* )(pAsm
->callers
[i
].finale_cf_ptr
);
6234 pALU
= pCF_ALU
->m_pLinkedALUInstruction
;
6235 for(int j
=0; j
<=pCF_ALU
->m_Word1
.f
.count
; j
++)
6237 pALU
->m_Word0
.f
.src0_sel
= pALU
->m_Word0
.f
.src0_sel
+ unRegOffset
- unMinRegIndex
;
6238 pALU
= (R700ALUInstruction
*)(pALU
->pNextInst
);
6247 GLboolean
callPreSub(r700_AssemblerBase
* pAsm
,
6248 LOADABLE_SCRIPT_SIGNITURE scriptSigniture
,
6249 COMPILED_SUB
* pCompiledSub
,
6251 GLshort uNumValidSrc
)
6253 /* save assemble context */
6254 GLuint starting_temp_register_number_save
;
6255 GLuint number_used_registers_save
;
6256 GLuint uFirstHelpReg_save
;
6257 GLuint uHelpReg_save
;
6258 GLuint uiCurInst_save
;
6259 struct prog_instruction
*pILInst_save
;
6260 PRESUB_DESC
* pPresubDesc
;
6264 R700ControlFlowGenericClause
* prelude_cf_ptr
= NULL
;
6266 /* copy srcs to presub inputs */
6267 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
6268 for(i
=0; i
<uNumValidSrc
; i
++)
6270 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
6271 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
6272 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
6273 pAsm
->D
.dst
.reg
= pCompiledSub
->srcRegIndex
[i
];
6274 pAsm
->D
.dst
.writex
= 1;
6275 pAsm
->D
.dst
.writey
= 1;
6276 pAsm
->D
.dst
.writez
= 1;
6277 pAsm
->D
.dst
.writew
= 1;
6279 if( GL_FALSE
== assemble_src(pAsm
, i
, 0) )
6286 if(uNumValidSrc
> 0)
6288 prelude_cf_ptr
= (R700ControlFlowGenericClause
*) pAsm
->cf_current_alu_clause_ptr
;
6289 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
6292 /* browse thro existing presubs. */
6293 for(i
=0; i
<pAsm
->unNumPresub
; i
++)
6295 if(pAsm
->presubs
[i
].sptSigniture
== scriptSigniture
)
6301 if(i
== pAsm
->unNumPresub
)
6302 { /* not loaded yet */
6303 /* save assemble context */
6304 number_used_registers_save
= pAsm
->number_used_registers
;
6305 uFirstHelpReg_save
= pAsm
->uFirstHelpReg
;
6306 uHelpReg_save
= pAsm
->uHelpReg
;
6307 starting_temp_register_number_save
= pAsm
->starting_temp_register_number
;
6308 pILInst_save
= pAsm
->pILInst
;
6309 uiCurInst_save
= pAsm
->uiCurInst
;
6311 /* alloc in presub */
6312 if( (pAsm
->unNumPresub
+ 1) > pAsm
->unPresubArraySize
)
6314 pAsm
->presubs
= (PRESUB_DESC
*)_mesa_realloc( (void *)pAsm
->presubs
,
6315 sizeof(PRESUB_DESC
) * pAsm
->unPresubArraySize
,
6316 sizeof(PRESUB_DESC
) * (pAsm
->unPresubArraySize
+ 4) );
6317 if(NULL
== pAsm
->presubs
)
6319 radeon_error("No memeory to allocate built in shader function description structures. \n");
6322 pAsm
->unPresubArraySize
+= 4;
6325 pPresubDesc
= &(pAsm
->presubs
[i
]);
6326 pPresubDesc
->sptSigniture
= scriptSigniture
;
6328 /* constants offsets need to be final resolved at reloc. */
6329 if(0 == pAsm
->unNumPresub
)
6331 pPresubDesc
->unConstantsStart
= 0;
6335 pPresubDesc
->unConstantsStart
= pAsm
->presubs
[i
-1].unConstantsStart
6336 + pAsm
->presubs
[i
-1].pCompiledSub
->NumParameters
;
6339 pPresubDesc
->pCompiledSub
= pCompiledSub
;
6341 pPresubDesc
->subIL_Shift
= pAsm
->unCurNumILInsts
;
6342 pPresubDesc
->maxStartReg
= uFirstHelpReg_save
;
6343 pAsm
->unCurNumILInsts
+= pCompiledSub
->NumInstructions
;
6345 pAsm
->unNumPresub
++;
6347 /* setup new assemble context */
6348 pAsm
->starting_temp_register_number
= 0;
6349 pAsm
->number_used_registers
= pCompiledSub
->NumTemporaries
;
6350 pAsm
->uFirstHelpReg
= pAsm
->number_used_registers
;
6351 pAsm
->uHelpReg
= pAsm
->uFirstHelpReg
;
6353 bRet
= assemble_CAL(pAsm
,
6355 pPresubDesc
->subIL_Shift
,
6356 pCompiledSub
->NumInstructions
,
6357 pCompiledSub
->Instructions
,
6361 pPresubDesc
->number_used_registers
= pAsm
->number_used_registers
;
6363 /* restore assemble context */
6364 pAsm
->number_used_registers
= number_used_registers_save
;
6365 pAsm
->uFirstHelpReg
= uFirstHelpReg_save
;
6366 pAsm
->uHelpReg
= uHelpReg_save
;
6367 pAsm
->starting_temp_register_number
= starting_temp_register_number_save
;
6368 pAsm
->pILInst
= pILInst_save
;
6369 pAsm
->uiCurInst
= uiCurInst_save
;
6373 pPresubDesc
= &(pAsm
->presubs
[i
]);
6375 bRet
= assemble_CAL(pAsm
,
6377 pPresubDesc
->subIL_Shift
,
6378 pCompiledSub
->NumInstructions
,
6379 pCompiledSub
->Instructions
,
6383 if(GL_FALSE
== bRet
)
6385 radeon_error("Shader presub assemble failed. \n");
6389 /* copy presub output to real dst */
6390 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
6391 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
6393 if( GL_FALSE
== assemble_dst(pAsm
) )
6398 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
6399 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
6400 pAsm
->S
[0].src
.reg
= pCompiledSub
->dstRegIndex
;
6401 pAsm
->S
[0].src
.swizzlex
= pCompiledSub
->outputSwizzleX
;
6402 pAsm
->S
[0].src
.swizzley
= pCompiledSub
->outputSwizzleY
;
6403 pAsm
->S
[0].src
.swizzlez
= pCompiledSub
->outputSwizzleZ
;
6404 pAsm
->S
[0].src
.swizzlew
= pCompiledSub
->outputSwizzleW
;
6408 pAsm
->callers
[pAsm
->unCallerArrayPointer
- 1].finale_cf_ptr
= (R700ControlFlowGenericClause
*) pAsm
->cf_current_alu_clause_ptr
;
6409 pAsm
->callers
[pAsm
->unCallerArrayPointer
- 1].prelude_cf_ptr
= prelude_cf_ptr
;
6410 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
6413 if( (pPresubDesc
->number_used_registers
+ pAsm
->uFirstHelpReg
) > pAsm
->number_used_registers
)
6415 pAsm
->number_used_registers
= pPresubDesc
->number_used_registers
+ pAsm
->uFirstHelpReg
;
6417 if(pAsm
->uFirstHelpReg
> pPresubDesc
->maxStartReg
)
6419 pPresubDesc
->maxStartReg
= pAsm
->uFirstHelpReg
;
6425 GLboolean
Process_Export(r700_AssemblerBase
* pAsm
,
6427 GLuint export_starting_index
,
6428 GLuint export_count
,
6429 GLuint starting_register_number
,
6430 GLboolean is_depth_export
)
6432 unsigned char ucWriteMask
;
6434 check_current_clause(pAsm
, CF_EMPTY_CLAUSE
);
6435 check_current_clause(pAsm
, CF_EXPORT_CLAUSE
); //alloc the cf_current_export_clause_ptr
6437 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.type
= type
;
6441 case SQ_EXPORT_PIXEL
:
6442 if(GL_TRUE
== is_depth_export
)
6444 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= SQ_CF_PIXEL_Z
;
6448 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= SQ_CF_PIXEL_MRT0
+ export_starting_index
;
6453 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= SQ_CF_POS_0
+ export_starting_index
;
6456 case SQ_EXPORT_PARAM
:
6457 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= 0x0 + export_starting_index
;
6461 radeon_error("Unknown export type: %d\n", type
);
6466 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.rw_gpr
= starting_register_number
;
6468 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.rw_rel
= SQ_ABSOLUTE
;
6469 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.index_gpr
= 0x0;
6470 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.elem_size
= 0x3;
6472 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.burst_count
= (export_count
- 1);
6473 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
6474 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
6475 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT
; // _DONE
6476 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
6477 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
6479 if (export_count
== 1)
6481 assert(starting_register_number
>= pAsm
->starting_export_register_number
);
6483 ucWriteMask
= pAsm
->pucOutMask
[starting_register_number
- pAsm
->starting_export_register_number
];
6484 /* exports Z as a float into Red channel */
6485 if (GL_TRUE
== is_depth_export
)
6488 if( (ucWriteMask
& 0x1) != 0)
6490 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_X
;
6494 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_MASK
;
6496 if( ((ucWriteMask
>>1) & 0x1) != 0)
6498 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_Y
;
6502 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_MASK
;
6504 if( ((ucWriteMask
>>2) & 0x1) != 0)
6506 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_Z
;
6510 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_MASK
;
6512 if( ((ucWriteMask
>>3) & 0x1) != 0)
6514 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_W
;
6518 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_MASK
;
6523 // This should only be used if all components for all registers have been written
6524 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_X
;
6525 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_Y
;
6526 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_Z
;
6527 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_W
;
6530 pAsm
->cf_last_export_ptr
= pAsm
->cf_current_export_clause_ptr
;
6535 GLboolean
Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase
*pAsm
, BITS depth_channel_select
)
6537 gl_inst_opcode Opcode_save
= pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
; //Should be OPCODE_END
6538 pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
= OPCODE_MOV
;
6540 // MOV depth_export_register.hw_depth_channel, depth_export_register.depth_channel_select
6542 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
6544 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
6545 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
6546 pAsm
->D
.dst
.reg
= pAsm
->depth_export_register_number
;
6548 pAsm
->D
.dst
.writex
= 1; // depth goes in R channel for HW
6550 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
6551 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
6552 pAsm
->S
[0].src
.reg
= pAsm
->depth_export_register_number
;
6554 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), depth_channel_select
);
6556 noneg_PVSSRC(&(pAsm
->S
[0].src
));
6558 if( GL_FALSE
== next_ins(pAsm
) )
6563 pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
= Opcode_save
;
6568 GLboolean
Process_Fragment_Exports(r700_AssemblerBase
*pR700AsmCode
,
6569 GLbitfield OutputsWritten
)
6572 GLuint export_count
= 0;
6575 if(pR700AsmCode
->depth_export_register_number
>= 0)
6577 if( GL_FALSE
== Move_Depth_Exports_To_Correct_Channels(pR700AsmCode
, SQ_SEL_Z
) ) // depth
6583 for (i
= 0; i
< FRAG_RESULT_MAX
; ++i
)
6587 if (OutputsWritten
& unBit
)
6589 GLboolean is_depth
= i
== FRAG_RESULT_DEPTH
? GL_TRUE
: GL_FALSE
;
6590 if (!Process_Export(pR700AsmCode
, SQ_EXPORT_PIXEL
, 0, 1, pR700AsmCode
->uiFP_OutputMap
[i
], is_depth
))
6596 /* Need to export something, otherwise we'll hang
6597 * results are undefined anyway */
6598 if(export_count
== 0)
6600 Process_Export(pR700AsmCode
, SQ_EXPORT_PIXEL
, 0, 1, pR700AsmCode
->starting_export_register_number
, GL_FALSE
);
6603 if(pR700AsmCode
->cf_last_export_ptr
!= NULL
)
6605 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
6606 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.end_of_program
= 0x1;
6612 GLboolean
Process_Vertex_Exports(r700_AssemblerBase
*pR700AsmCode
,
6613 GLbitfield OutputsWritten
)
6618 GLuint export_starting_index
= 0;
6619 GLuint export_count
= pR700AsmCode
->number_of_exports
;
6621 unBit
= 1 << VERT_RESULT_HPOS
;
6622 if(OutputsWritten
& unBit
)
6624 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6626 export_starting_index
,
6628 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_HPOS
],
6633 export_starting_index
++;
6637 unBit
= 1 << VERT_RESULT_PSIZ
;
6638 if(OutputsWritten
& unBit
)
6640 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6642 export_starting_index
,
6644 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_PSIZ
],
6652 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
6655 pR700AsmCode
->number_of_exports
= export_count
;
6656 export_starting_index
= 0;
6658 unBit
= 1 << VERT_RESULT_COL0
;
6659 if(OutputsWritten
& unBit
)
6661 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6663 export_starting_index
,
6665 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_COL0
],
6671 export_starting_index
++;
6674 unBit
= 1 << VERT_RESULT_COL1
;
6675 if(OutputsWritten
& unBit
)
6677 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6679 export_starting_index
,
6681 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_COL1
],
6687 export_starting_index
++;
6690 unBit
= 1 << VERT_RESULT_FOGC
;
6691 if(OutputsWritten
& unBit
)
6693 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6695 export_starting_index
,
6697 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_FOGC
],
6703 export_starting_index
++;
6708 unBit
= 1 << (VERT_RESULT_TEX0
+ i
);
6709 if(OutputsWritten
& unBit
)
6711 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6713 export_starting_index
,
6715 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_TEX0
+ i
],
6721 export_starting_index
++;
6725 for(i
=VERT_RESULT_VAR0
; i
<VERT_RESULT_MAX
; i
++)
6728 if(OutputsWritten
& unBit
)
6730 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6732 export_starting_index
,
6734 pR700AsmCode
->ucVP_OutputMap
[i
],
6740 export_starting_index
++;
6744 // At least one param should be exported
6747 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
6751 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6755 pR700AsmCode
->starting_export_register_number
,
6761 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_0
;
6762 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_0
;
6763 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_0
;
6764 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_1
;
6765 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
6768 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.end_of_program
= 0x1;
6773 GLboolean
Clean_Up_Assembler(r700_AssemblerBase
*pR700AsmCode
)
6775 FREE(pR700AsmCode
->pucOutMask
);
6776 FREE(pR700AsmCode
->pInstDeps
);
6778 if(NULL
!= pR700AsmCode
->subs
)
6780 FREE(pR700AsmCode
->subs
);
6782 if(NULL
!= pR700AsmCode
->callers
)
6784 FREE(pR700AsmCode
->callers
);
6787 if(NULL
!= pR700AsmCode
->presubs
)
6789 FREE(pR700AsmCode
->presubs
);