2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
33 #include "main/mtypes.h"
34 #include "main/imports.h"
35 #include "program/prog_parameter.h"
37 #include "radeon_debug.h"
38 #include "r600_context.h"
40 #include "r700_assembler.h"
41 #include "evergreen_sq.h"
43 #define USE_CF_FOR_CONTINUE_BREAK 1
44 #define USE_CF_FOR_POP_AFTER 1
46 struct prog_instruction noise1_insts
[12] = {
47 {OPCODE_BGNSUB
, {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
48 {OPCODE_MOV
, {{0, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 2, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
49 {OPCODE_MOV
, {{8, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 4, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
50 {OPCODE_MOV
, {{8, 0, 585, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 8, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
51 {OPCODE_SGT
, {{0, 0, 585, 0, 0, 0}, {8, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 1, 1, 0, 8, 1672, 0}, 1, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
52 {OPCODE_IF
, {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 7, 0, 0}, 0, 0, 0, 1, 0, 0, 0, 15, 0, 0, 0},
53 {OPCODE_MOV
, {{0, 0, 1755, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
54 {OPCODE_RET
, {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
55 {OPCODE_ENDIF
, {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
56 {OPCODE_MOV
, {{0, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
57 {OPCODE_RET
, {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
58 {OPCODE_ENDSUB
, {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}
60 float noise1_const
[2][4] = {
61 {0.300000f
, 0.900000f
, 0.500000f
, 0.300000f
}
64 COMPILED_SUB noise1_presub
= {
79 BITS
addrmode_PVSDST(PVSDST
* pPVSDST
)
81 return pPVSDST
->addrmode0
| ((BITS
)pPVSDST
->addrmode1
<< 1);
84 void setaddrmode_PVSDST(PVSDST
* pPVSDST
, BITS addrmode
)
86 pPVSDST
->addrmode0
= addrmode
& 1;
87 pPVSDST
->addrmode1
= (addrmode
>> 1) & 1;
90 void nomask_PVSDST(PVSDST
* pPVSDST
)
92 pPVSDST
->writex
= pPVSDST
->writey
= pPVSDST
->writez
= pPVSDST
->writew
= 1;
95 BITS
addrmode_PVSSRC(PVSSRC
* pPVSSRC
)
97 return pPVSSRC
->addrmode0
| ((BITS
)pPVSSRC
->addrmode1
<< 1);
100 void setaddrmode_PVSSRC(PVSSRC
* pPVSSRC
, BITS addrmode
)
102 pPVSSRC
->addrmode0
= addrmode
& 1;
103 pPVSSRC
->addrmode1
= (addrmode
>> 1) & 1;
107 void setswizzle_PVSSRC(PVSSRC
* pPVSSRC
, BITS swz
)
112 pPVSSRC
->swizzlew
= swz
;
115 void noswizzle_PVSSRC(PVSSRC
* pPVSSRC
)
117 pPVSSRC
->swizzlex
= SQ_SEL_X
;
118 pPVSSRC
->swizzley
= SQ_SEL_Y
;
119 pPVSSRC
->swizzlez
= SQ_SEL_Z
;
120 pPVSSRC
->swizzlew
= SQ_SEL_W
;
124 swizzleagain_PVSSRC(PVSSRC
* pPVSSRC
, BITS x
, BITS y
, BITS z
, BITS w
)
128 case SQ_SEL_X
: x
= pPVSSRC
->swizzlex
;
130 case SQ_SEL_Y
: x
= pPVSSRC
->swizzley
;
132 case SQ_SEL_Z
: x
= pPVSSRC
->swizzlez
;
134 case SQ_SEL_W
: x
= pPVSSRC
->swizzlew
;
141 case SQ_SEL_X
: y
= pPVSSRC
->swizzlex
;
143 case SQ_SEL_Y
: y
= pPVSSRC
->swizzley
;
145 case SQ_SEL_Z
: y
= pPVSSRC
->swizzlez
;
147 case SQ_SEL_W
: y
= pPVSSRC
->swizzlew
;
154 case SQ_SEL_X
: z
= pPVSSRC
->swizzlex
;
156 case SQ_SEL_Y
: z
= pPVSSRC
->swizzley
;
158 case SQ_SEL_Z
: z
= pPVSSRC
->swizzlez
;
160 case SQ_SEL_W
: z
= pPVSSRC
->swizzlew
;
167 case SQ_SEL_X
: w
= pPVSSRC
->swizzlex
;
169 case SQ_SEL_Y
: w
= pPVSSRC
->swizzley
;
171 case SQ_SEL_Z
: w
= pPVSSRC
->swizzlez
;
173 case SQ_SEL_W
: w
= pPVSSRC
->swizzlew
;
178 pPVSSRC
->swizzlex
= x
;
179 pPVSSRC
->swizzley
= y
;
180 pPVSSRC
->swizzlez
= z
;
181 pPVSSRC
->swizzlew
= w
;
184 void neg_PVSSRC(PVSSRC
* pPVSSRC
)
192 void noneg_PVSSRC(PVSSRC
* pPVSSRC
)
200 // negate argument (for SUB instead of ADD and alike)
201 void flipneg_PVSSRC(PVSSRC
* pPVSSRC
)
203 pPVSSRC
->negx
= !pPVSSRC
->negx
;
204 pPVSSRC
->negy
= !pPVSSRC
->negy
;
205 pPVSSRC
->negz
= !pPVSSRC
->negz
;
206 pPVSSRC
->negw
= !pPVSSRC
->negw
;
209 void zerocomp_PVSSRC(PVSSRC
* pPVSSRC
, int c
)
213 case 0: pPVSSRC
->swizzlex
= SQ_SEL_0
; pPVSSRC
->negx
= 0; break;
214 case 1: pPVSSRC
->swizzley
= SQ_SEL_0
; pPVSSRC
->negy
= 0; break;
215 case 2: pPVSSRC
->swizzlez
= SQ_SEL_0
; pPVSSRC
->negz
= 0; break;
216 case 3: pPVSSRC
->swizzlew
= SQ_SEL_0
; pPVSSRC
->negw
= 0; break;
221 void onecomp_PVSSRC(PVSSRC
* pPVSSRC
, int c
)
225 case 0: pPVSSRC
->swizzlex
= SQ_SEL_1
; pPVSSRC
->negx
= 0; break;
226 case 1: pPVSSRC
->swizzley
= SQ_SEL_1
; pPVSSRC
->negy
= 0; break;
227 case 2: pPVSSRC
->swizzlez
= SQ_SEL_1
; pPVSSRC
->negz
= 0; break;
228 case 3: pPVSSRC
->swizzlew
= SQ_SEL_1
; pPVSSRC
->negw
= 0; break;
233 BITS
is_misc_component_exported(VAP_OUT_VTX_FMT_0
* pOutVTXFmt0
)
235 return (pOutVTXFmt0
->point_size
|
236 pOutVTXFmt0
->edge_flag
|
237 pOutVTXFmt0
->rta_index
|
238 pOutVTXFmt0
->kill_flag
|
239 pOutVTXFmt0
->viewport_index
);
242 BITS
is_depth_component_exported(OUT_FRAGMENT_FMT_0
* pFPOutFmt
)
244 return (pFPOutFmt
->depth
|
245 pFPOutFmt
->stencil_ref
|
247 pFPOutFmt
->coverage_to_mask
);
250 GLboolean
is_reduction_opcode(PVSDWORD
* dest
)
252 if (dest
->dst
.op3
== 0)
254 if ( (dest
->dst
.opcode
== SQ_OP2_INST_DOT4
|| dest
->dst
.opcode
== SQ_OP2_INST_DOT4_IEEE
|| dest
->dst
.opcode
== SQ_OP2_INST_CUBE
) )
263 GLboolean
EG_is_reduction_opcode(PVSDWORD
* dest
)
265 if (dest
->dst
.op3
== 0)
267 if ( (dest
->dst
.opcode
== EG_OP2_INST_DOT4
|| dest
->dst
.opcode
== EG_OP2_INST_DOT4_IEEE
|| dest
->dst
.opcode
== EG_OP2_INST_CUBE
) )
276 GLuint
GetSurfaceFormat(GLenum eType
, GLuint nChannels
, GLuint
* pClient_size
)
278 GLuint format
= FMT_INVALID
;
279 GLuint uiElemSize
= 0;
284 case GL_UNSIGNED_BYTE
:
289 format
= FMT_8
; break;
291 format
= FMT_8_8
; break;
293 /* for some (small/unaligned) strides using 4 comps works
294 * better, probably same as GL_SHORT below
295 * test piglit/draw-vertices */
296 format
= FMT_8_8_8_8
; break;
298 format
= FMT_8_8_8_8
; break;
304 case GL_UNSIGNED_SHORT
:
310 format
= FMT_16
; break;
312 format
= FMT_16_16
; break;
314 /* 3 comp GL_SHORT vertex format doesnt work on r700
315 4 somehow works, test - sauerbraten */
316 format
= FMT_16_16_16_16
; break;
318 format
= FMT_16_16_16_16
; break;
324 case GL_UNSIGNED_INT
:
330 format
= FMT_32
; break;
332 format
= FMT_32_32
; break;
334 format
= FMT_32_32_32
; break;
336 format
= FMT_32_32_32_32
; break;
347 format
= FMT_32_FLOAT
; break;
349 format
= FMT_32_32_FLOAT
; break;
351 format
= FMT_32_32_32_FLOAT
; break;
353 format
= FMT_32_32_32_32_FLOAT
; break;
363 format
= FMT_32_FLOAT
; break;
365 format
= FMT_32_32_FLOAT
; break;
367 format
= FMT_32_32_32_FLOAT
; break;
369 format
= FMT_32_32_32_32_FLOAT
; break;
376 //GL_ASSERT_NO_CASE();
379 if(NULL
!= pClient_size
)
381 *pClient_size
= uiElemSize
* nChannels
;
387 unsigned int r700GetNumOperands(GLuint opcode
, GLuint nIsOp3
)
396 case SQ_OP2_INST_ADD
:
397 case SQ_OP2_INST_KILLE
:
398 case SQ_OP2_INST_KILLGT
:
399 case SQ_OP2_INST_KILLGE
:
400 case SQ_OP2_INST_KILLNE
:
401 case SQ_OP2_INST_MUL
:
402 case SQ_OP2_INST_MAX
:
403 case SQ_OP2_INST_MIN
:
404 //case SQ_OP2_INST_MAX_DX10:
405 //case SQ_OP2_INST_MIN_DX10:
406 case SQ_OP2_INST_SETE
:
407 case SQ_OP2_INST_SETNE
:
408 case SQ_OP2_INST_SETGT
:
409 case SQ_OP2_INST_SETGE
:
410 case SQ_OP2_INST_PRED_SETE
:
411 case SQ_OP2_INST_PRED_SETGT
:
412 case SQ_OP2_INST_PRED_SETGE
:
413 case SQ_OP2_INST_PRED_SETNE
:
414 case SQ_OP2_INST_DOT4
:
415 case SQ_OP2_INST_DOT4_IEEE
:
416 case SQ_OP2_INST_CUBE
:
419 case SQ_OP2_INST_MOV
:
420 case SQ_OP2_INST_MOVA_FLOOR
:
421 case SQ_OP2_INST_FRACT
:
422 case SQ_OP2_INST_FLOOR
:
423 case SQ_OP2_INST_TRUNC
:
424 case SQ_OP2_INST_EXP_IEEE
:
425 case SQ_OP2_INST_LOG_CLAMPED
:
426 case SQ_OP2_INST_LOG_IEEE
:
427 case SQ_OP2_INST_RECIP_IEEE
:
428 case SQ_OP2_INST_RECIPSQRT_IEEE
:
429 case SQ_OP2_INST_FLT_TO_INT
:
430 case SQ_OP2_INST_SIN
:
431 case SQ_OP2_INST_COS
:
434 default: radeon_error(
435 "Need instruction operand number for %x.\n", opcode
);
441 unsigned int EG_GetNumOperands(GLuint opcode
, GLuint nIsOp3
)
450 case EG_OP2_INST_ADD
:
451 case EG_OP2_INST_KILLE
:
452 case EG_OP2_INST_KILLGT
:
453 case EG_OP2_INST_KILLGE
:
454 case EG_OP2_INST_KILLNE
:
455 case EG_OP2_INST_MUL
:
456 case EG_OP2_INST_MAX
:
457 case EG_OP2_INST_MIN
:
458 //case EG_OP2_INST_MAX_DX10:
459 //case EG_OP2_INST_MIN_DX10:
460 case EG_OP2_INST_SETE
:
461 case EG_OP2_INST_SETNE
:
462 case EG_OP2_INST_SETGT
:
463 case EG_OP2_INST_SETGE
:
464 case EG_OP2_INST_PRED_SETE
:
465 case EG_OP2_INST_PRED_SETGT
:
466 case EG_OP2_INST_PRED_SETGE
:
467 case EG_OP2_INST_PRED_SETNE
:
468 case EG_OP2_INST_DOT4
:
469 case EG_OP2_INST_DOT4_IEEE
:
470 case EG_OP2_INST_CUBE
:
473 case EG_OP2_INST_MOV
:
474 //case SQ_OP2_INST_MOVA_FLOOR:
475 case EG_OP2_INST_FRACT
:
476 case EG_OP2_INST_FLOOR
:
477 case EG_OP2_INST_TRUNC
:
478 case EG_OP2_INST_EXP_IEEE
:
479 case EG_OP2_INST_LOG_CLAMPED
:
480 case EG_OP2_INST_LOG_IEEE
:
481 case EG_OP2_INST_RECIP_IEEE
:
482 case EG_OP2_INST_RECIPSQRT_IEEE
:
483 case EG_OP2_INST_FLT_TO_INT
:
484 case EG_OP2_INST_SIN
:
485 case EG_OP2_INST_COS
:
486 case EG_OP2_INST_FLT_TO_INT_FLOOR
:
487 case EG_OP2_INST_MOVA_INT
:
490 default: radeon_error(
491 "Need instruction operand number for %x.\n", opcode
);
497 int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt
, r700_AssemblerBase
* pAsm
, R700_Shader
* pShader
)
501 Init_R700_Shader(pShader
);
502 pAsm
->pR700Shader
= pShader
;
503 pAsm
->currentShaderType
= spt
;
505 pAsm
->cf_last_export_ptr
= NULL
;
507 pAsm
->cf_current_export_clause_ptr
= NULL
;
508 pAsm
->cf_current_alu_clause_ptr
= NULL
;
509 pAsm
->cf_current_tex_clause_ptr
= NULL
;
510 pAsm
->cf_current_vtx_clause_ptr
= NULL
;
511 pAsm
->cf_current_cf_clause_ptr
= NULL
;
513 // No clause has been created yet
514 pAsm
->cf_current_clause_type
= CF_EMPTY_CLAUSE
;
516 pAsm
->number_of_colorandz_exports
= 0;
517 pAsm
->number_of_exports
= 0;
518 pAsm
->number_of_export_opcodes
= 0;
520 pAsm
->alu_x_opcode
= 0;
529 pAsm
->uLastPosUpdate
= 0;
531 *(BITS
*) &pAsm
->fp_stOutFmt0
= 0;
535 pAsm
->number_used_registers
= 0;
536 pAsm
->uUsedConsts
= 256;
540 pAsm
->uBoolConsts
= 0;
541 pAsm
->uIntConsts
= 0;
546 pAsm
->fc_stack
[0].type
= FC_NONE
;
551 pAsm
->aArgSubst
[3] = (-1);
555 for (i
=0; i
<NUMBER_OF_OUTPUT_COLORS
; i
++)
557 pAsm
->color_export_register_number
[i
] = (-1);
561 pAsm
->depth_export_register_number
= (-1);
562 pAsm
->stencil_export_register_number
= (-1);
563 pAsm
->coverage_to_mask_export_register_number
= (-1);
564 pAsm
->mask_export_register_number
= (-1);
566 pAsm
->starting_export_register_number
= 0;
567 pAsm
->starting_vfetch_register_number
= 0;
568 pAsm
->starting_temp_register_number
= 0;
569 pAsm
->uFirstHelpReg
= 0;
571 pAsm
->input_position_is_used
= GL_FALSE
;
572 pAsm
->input_normal_is_used
= GL_FALSE
;
574 for (i
=0; i
<NUMBER_OF_INPUT_COLORS
; i
++)
576 pAsm
->input_color_is_used
[ i
] = GL_FALSE
;
579 for (i
=0; i
<NUMBER_OF_TEXTURE_UNITS
; i
++)
581 pAsm
->input_texture_unit_is_used
[ i
] = GL_FALSE
;
584 for (i
=0; i
<VERT_ATTRIB_MAX
; i
++)
586 pAsm
->vfetch_instruction_ptr_array
[ i
] = NULL
;
589 pAsm
->number_of_inputs
= 0;
591 pAsm
->is_tex
= GL_FALSE
;
592 pAsm
->need_tex_barrier
= GL_FALSE
;
595 pAsm
->unSubArraySize
= 0;
596 pAsm
->unSubArrayPointer
= 0;
597 pAsm
->callers
= NULL
;
598 pAsm
->unCallerArraySize
= 0;
599 pAsm
->unCallerArrayPointer
= 0;
602 pAsm
->CALLSTACK
[0].FCSP_BeforeEntry
= 0;
603 pAsm
->CALLSTACK
[0].plstCFInstructions_local
604 = &(pAsm
->pR700Shader
->lstCFInstructions
);
606 pAsm
->CALLSTACK
[0].max
= 0;
607 pAsm
->CALLSTACK
[0].current
= 0;
609 SetActiveCFlist(pAsm
->pR700Shader
, pAsm
->CALLSTACK
[0].plstCFInstructions_local
);
613 pAsm
->presubs
= NULL
;
614 pAsm
->unPresubArraySize
= 0;
615 pAsm
->unNumPresub
= 0;
616 pAsm
->unCurNumILInsts
= 0;
618 pAsm
->unVetTexBits
= 0;
623 GLboolean
IsTex(gl_inst_opcode Opcode
)
625 if( (OPCODE_TEX
==Opcode
) || (OPCODE_TXP
==Opcode
) || (OPCODE_TXB
==Opcode
) ||
626 (OPCODE_DDX
==Opcode
) || (OPCODE_DDY
==Opcode
) || (OPCODE_TXL
==Opcode
) )
633 GLboolean
IsAlu(gl_inst_opcode Opcode
)
635 //TODO : more for fc and ex for higher spec.
643 int check_current_clause(r700_AssemblerBase
* pAsm
,
644 CF_CLAUSE_TYPE new_clause_type
)
646 if (pAsm
->cf_current_clause_type
!= new_clause_type
)
647 { //Close last open clause
648 switch (pAsm
->cf_current_clause_type
)
651 if ( pAsm
->cf_current_alu_clause_ptr
!= NULL
)
653 pAsm
->cf_current_alu_clause_ptr
= NULL
;
657 if ( pAsm
->cf_current_vtx_clause_ptr
!= NULL
)
659 pAsm
->cf_current_vtx_clause_ptr
= NULL
;
663 if ( pAsm
->cf_current_tex_clause_ptr
!= NULL
)
665 pAsm
->cf_current_tex_clause_ptr
= NULL
;
668 case CF_EXPORT_CLAUSE
:
669 if ( pAsm
->cf_current_export_clause_ptr
!= NULL
)
671 pAsm
->cf_current_export_clause_ptr
= NULL
;
674 case CF_OTHER_CLAUSE
:
675 if ( pAsm
->cf_current_cf_clause_ptr
!= NULL
)
677 pAsm
->cf_current_cf_clause_ptr
= NULL
;
680 case CF_EMPTY_CLAUSE
:
684 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type
);
688 pAsm
->cf_current_clause_type
= CF_EMPTY_CLAUSE
;
691 switch (new_clause_type
)
694 pAsm
->cf_current_clause_type
= CF_ALU_CLAUSE
;
697 pAsm
->cf_current_clause_type
= CF_VTX_CLAUSE
;
700 pAsm
->cf_current_clause_type
= CF_TEX_CLAUSE
;
702 case CF_EXPORT_CLAUSE
:
704 R700ControlFlowSXClause
* pR700ControlFlowSXClause
705 = (R700ControlFlowSXClause
*) CALLOC_STRUCT(R700ControlFlowSXClause
);
707 // Add new export instruction to control flow program
708 if (pR700ControlFlowSXClause
!= 0)
710 pAsm
->cf_current_export_clause_ptr
= pR700ControlFlowSXClause
;
711 Init_R700ControlFlowSXClause(pR700ControlFlowSXClause
);
712 AddCFInstruction( pAsm
->pR700Shader
,
713 (R700ControlFlowInstruction
*)pR700ControlFlowSXClause
);
718 "Error allocating new EXPORT CF instruction in check_current_clause. \n");
721 pAsm
->cf_current_clause_type
= CF_EXPORT_CLAUSE
;
724 case CF_EMPTY_CLAUSE
:
726 case CF_OTHER_CLAUSE
:
727 pAsm
->cf_current_clause_type
= CF_OTHER_CLAUSE
;
731 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type
);
739 GLboolean
add_cf_instruction(r700_AssemblerBase
* pAsm
)
741 if(GL_FALSE
== check_current_clause(pAsm
, CF_OTHER_CLAUSE
))
746 pAsm
->cf_current_cf_clause_ptr
=
747 (R700ControlFlowGenericClause
*) CALLOC_STRUCT(R700ControlFlowGenericClause
);
749 if (pAsm
->cf_current_cf_clause_ptr
!= NULL
)
751 Init_R700ControlFlowGenericClause(pAsm
->cf_current_cf_clause_ptr
);
752 AddCFInstruction( pAsm
->pR700Shader
,
753 (R700ControlFlowInstruction
*)pAsm
->cf_current_cf_clause_ptr
);
757 radeon_error("Could not allocate a new VFetch CF instruction.\n");
764 GLboolean
add_vfetch_instruction(r700_AssemblerBase
* pAsm
,
765 R700VertexInstruction
* vertex_instruction_ptr
)
767 if( GL_FALSE
== check_current_clause(pAsm
, CF_VTX_CLAUSE
) )
772 if( pAsm
->cf_current_vtx_clause_ptr
== NULL
||
773 ( (pAsm
->cf_current_vtx_clause_ptr
!= NULL
) &&
774 (pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.count
>= GetCFMaxInstructions(pAsm
->cf_current_vtx_clause_ptr
->m_ShaderInstType
)-1)
777 // Create new Vfetch control flow instruction for this new clause
778 pAsm
->cf_current_vtx_clause_ptr
= (R700ControlFlowGenericClause
*) CALLOC_STRUCT(R700ControlFlowGenericClause
);
780 if (pAsm
->cf_current_vtx_clause_ptr
!= NULL
)
782 Init_R700ControlFlowGenericClause(pAsm
->cf_current_vtx_clause_ptr
);
783 AddCFInstruction( pAsm
->pR700Shader
,
784 (R700ControlFlowInstruction
*)pAsm
->cf_current_vtx_clause_ptr
);
788 radeon_error("Could not allocate a new VFetch CF instruction.\n");
792 if(8 == pAsm
->unAsic
)
794 SETfield(pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.val
, EG_CF_INST_VC
,
795 EG_CF_WORD1__CF_INST_shift
, EG_CF_WORD1__CF_INST_mask
);
796 SETfield(pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.val
, 0,
797 EG_CF_WORD1__POP_COUNT_shift
, EG_CF_WORD1__POP_COUNT_mask
);
798 SETfield(pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.val
, 0,
799 EG_CF_WORD1__CF_CONST_shift
, EG_CF_WORD1__CF_CONST_mask
);
800 SETfield(pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.val
, SQ_CF_COND_ACTIVE
,
801 EG_CF_WORD1__COND_shift
, EG_CF_WORD1__COND_mask
);
802 SETfield(pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.val
, 0,
803 EG_CF_WORD1__COUNT_shift
, EG_CF_WORD1__COUNT_mask
);
804 SETfield(pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.val
, 0,
805 EG_CF_WORD1__VPM_shift
, EG_CF_WORD1__VPM_bit
);
806 SETfield(pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.val
, 0,
807 EG_CF_WORD1__EOP_shift
, EG_CF_WORD1__EOP_bit
);
808 SETfield(pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.val
, 0,
809 EG_CF_WORD1__WQM_shift
, EG_CF_WORD1__WQM_bit
);
810 SETfield(pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.val
, 1,
811 EG_CF_WORD1__BARRIER_shift
, EG_CF_WORD1__BARRIER_bit
);
815 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.pop_count
= 0x0;
816 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
817 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
818 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.count
= 0x0;
819 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
820 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
821 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_VTX
;
822 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
823 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
826 LinkVertexInstruction(pAsm
->cf_current_vtx_clause_ptr
, vertex_instruction_ptr
);
830 if(8 == pAsm
->unAsic
)
832 unsigned int count
= GETbits(pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.val
,
833 EG_CF_WORD1__COUNT_shift
, EG_CF_WORD1__COUNT_mask
) + 1;
834 SETfield(pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.val
, count
,
835 EG_CF_WORD1__COUNT_shift
, EG_CF_WORD1__COUNT_mask
);
839 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.count
++;
843 AddVTXInstruction(pAsm
->pR700Shader
, vertex_instruction_ptr
);
848 GLboolean
add_tex_instruction(r700_AssemblerBase
* pAsm
,
849 R700TextureInstruction
* tex_instruction_ptr
)
851 if ( GL_FALSE
== check_current_clause(pAsm
, CF_TEX_CLAUSE
) )
856 if ( pAsm
->cf_current_tex_clause_ptr
== NULL
||
857 ( (pAsm
->cf_current_tex_clause_ptr
!= NULL
) &&
858 (pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.count
>= GetCFMaxInstructions(pAsm
->cf_current_tex_clause_ptr
->m_ShaderInstType
)-1)
861 // new tex cf instruction for this new clause
862 pAsm
->cf_current_tex_clause_ptr
= (R700ControlFlowGenericClause
*) CALLOC_STRUCT(R700ControlFlowGenericClause
);
864 if (pAsm
->cf_current_tex_clause_ptr
!= NULL
)
866 Init_R700ControlFlowGenericClause(pAsm
->cf_current_tex_clause_ptr
);
867 AddCFInstruction( pAsm
->pR700Shader
,
868 (R700ControlFlowInstruction
*)pAsm
->cf_current_tex_clause_ptr
);
872 radeon_error("Could not allocate a new TEX CF instruction.\n");
876 if(8 == pAsm
->unAsic
)
878 SETfield(pAsm
->cf_current_tex_clause_ptr
->m_Word1
.val
, EG_CF_INST_TC
,
879 EG_CF_WORD1__CF_INST_shift
, EG_CF_WORD1__CF_INST_mask
);
880 SETfield(pAsm
->cf_current_tex_clause_ptr
->m_Word1
.val
, 0,
881 EG_CF_WORD1__POP_COUNT_shift
, EG_CF_WORD1__POP_COUNT_mask
);
882 SETfield(pAsm
->cf_current_tex_clause_ptr
->m_Word1
.val
, 0,
883 EG_CF_WORD1__CF_CONST_shift
, EG_CF_WORD1__CF_CONST_mask
);
884 SETfield(pAsm
->cf_current_tex_clause_ptr
->m_Word1
.val
, SQ_CF_COND_ACTIVE
,
885 EG_CF_WORD1__COND_shift
, EG_CF_WORD1__COND_mask
);
886 SETfield(pAsm
->cf_current_tex_clause_ptr
->m_Word1
.val
, 0,
887 EG_CF_WORD1__COUNT_shift
, EG_CF_WORD1__COUNT_mask
);
888 SETfield(pAsm
->cf_current_tex_clause_ptr
->m_Word1
.val
, 0,
889 EG_CF_WORD1__VPM_shift
, EG_CF_WORD1__VPM_bit
);
890 SETfield(pAsm
->cf_current_tex_clause_ptr
->m_Word1
.val
, 0,
891 EG_CF_WORD1__EOP_shift
, EG_CF_WORD1__EOP_bit
);
892 SETfield(pAsm
->cf_current_tex_clause_ptr
->m_Word1
.val
, 0,
893 EG_CF_WORD1__WQM_shift
, EG_CF_WORD1__WQM_bit
);
894 #ifdef FORCE_CF_TEX_BARRIER
895 SETfield(pAsm
->cf_current_tex_clause_ptr
->m_Word1
.val
, 1,
896 EG_CF_WORD1__BARRIER_shift
, EG_CF_WORD1__BARRIER_bit
);
898 SETfield(pAsm
->cf_current_tex_clause_ptr
->m_Word1
.val
, 0,
899 EG_CF_WORD1__BARRIER_shift
, EG_CF_WORD1__BARRIER_bit
);
904 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.pop_count
= 0x0;
905 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
906 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
908 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
909 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
910 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_TEX
;
911 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
912 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.barrier
= 0x0; //0x1;
917 if(8 == pAsm
->unAsic
)
919 unsigned int count
= GETbits(pAsm
->cf_current_tex_clause_ptr
->m_Word1
.val
,
920 EG_CF_WORD1__COUNT_shift
, EG_CF_WORD1__COUNT_mask
) + 1;
921 SETfield(pAsm
->cf_current_tex_clause_ptr
->m_Word1
.val
, count
,
922 EG_CF_WORD1__COUNT_shift
, EG_CF_WORD1__COUNT_mask
);
926 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.count
++;
930 // If this clause constains any TEX instruction that is dependent on a
931 // previous instruction, set the barrier bit, also always set for vert
932 // programs as tex deps are not(yet) computed for them
933 if( pAsm
->currentShaderType
== SPT_VP
|| pAsm
->pInstDeps
[pAsm
->uiCurInst
].nDstDep
> (-1) || pAsm
->need_tex_barrier
== GL_TRUE
)
935 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
938 if(NULL
== pAsm
->cf_current_tex_clause_ptr
->m_pLinkedTEXInstruction
)
940 pAsm
->cf_current_tex_clause_ptr
->m_pLinkedTEXInstruction
= tex_instruction_ptr
;
941 tex_instruction_ptr
->m_pLinkedGenericClause
= pAsm
->cf_current_tex_clause_ptr
;
944 AddTEXInstruction(pAsm
->pR700Shader
, tex_instruction_ptr
);
949 GLboolean
assemble_vfetch_instruction(r700_AssemblerBase
* pAsm
,
951 GLuint destination_register
,
952 GLuint number_of_elements
,
953 GLenum dataElementType
,
954 VTX_FETCH_METHOD
* pFetchMethod
)
956 GLuint client_size_inbyte
;
958 GLuint mega_fetch_count
;
959 GLuint is_mega_fetch_flag
;
961 R700VertexGenericFetch
* vfetch_instruction_ptr
;
962 R700VertexGenericFetch
* assembled_vfetch_instruction_ptr
= pAsm
->vfetch_instruction_ptr_array
[ gl_client_id
];
964 if (assembled_vfetch_instruction_ptr
== NULL
)
966 vfetch_instruction_ptr
= (R700VertexGenericFetch
*) CALLOC_STRUCT(R700VertexGenericFetch
);
967 if (vfetch_instruction_ptr
== NULL
)
971 Init_R700VertexGenericFetch(vfetch_instruction_ptr
);
975 vfetch_instruction_ptr
= assembled_vfetch_instruction_ptr
;
978 data_format
= GetSurfaceFormat(dataElementType
, number_of_elements
, &client_size_inbyte
);
980 if(GL_TRUE
== pFetchMethod
->bEnableMini
) //More conditions here
983 mega_fetch_count
= 0;
984 is_mega_fetch_flag
= 0;
988 mega_fetch_count
= MEGA_FETCH_BYTES
- 1;
989 is_mega_fetch_flag
= 0x1;
990 pFetchMethod
->mega_fetch_remainder
= MEGA_FETCH_BYTES
- client_size_inbyte
;
993 vfetch_instruction_ptr
->m_Word0
.f
.vtx_inst
= SQ_VTX_INST_FETCH
;
994 vfetch_instruction_ptr
->m_Word0
.f
.fetch_type
= SQ_VTX_FETCH_VERTEX_DATA
;
995 vfetch_instruction_ptr
->m_Word0
.f
.fetch_whole_quad
= 0x0;
997 vfetch_instruction_ptr
->m_Word0
.f
.buffer_id
= gl_client_id
;
998 vfetch_instruction_ptr
->m_Word0
.f
.src_gpr
= 0x0;
999 vfetch_instruction_ptr
->m_Word0
.f
.src_rel
= SQ_ABSOLUTE
;
1000 vfetch_instruction_ptr
->m_Word0
.f
.src_sel_x
= SQ_SEL_X
;
1001 vfetch_instruction_ptr
->m_Word0
.f
.mega_fetch_count
= mega_fetch_count
;
1003 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_x
= (number_of_elements
< 1) ? SQ_SEL_0
: SQ_SEL_X
;
1004 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_y
= (number_of_elements
< 2) ? SQ_SEL_0
: SQ_SEL_Y
;
1005 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_z
= (number_of_elements
< 3) ? SQ_SEL_0
: SQ_SEL_Z
;
1006 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_w
= (number_of_elements
< 4) ? SQ_SEL_1
: SQ_SEL_W
;
1008 vfetch_instruction_ptr
->m_Word1
.f
.use_const_fields
= 1;
1010 // Destination register
1011 vfetch_instruction_ptr
->m_Word1_GPR
.f
.dst_gpr
= destination_register
;
1012 vfetch_instruction_ptr
->m_Word1_GPR
.f
.dst_rel
= SQ_ABSOLUTE
;
1014 vfetch_instruction_ptr
->m_Word2
.f
.offset
= 0;
1015 vfetch_instruction_ptr
->m_Word2
.f
.const_buf_no_stride
= 0x0;
1017 vfetch_instruction_ptr
->m_Word2
.f
.mega_fetch
= is_mega_fetch_flag
;
1019 if (assembled_vfetch_instruction_ptr
== NULL
)
1021 if ( GL_FALSE
== add_vfetch_instruction(pAsm
, (R700VertexInstruction
*)vfetch_instruction_ptr
) )
1026 if (pAsm
->vfetch_instruction_ptr_array
[ gl_client_id
] != NULL
)
1032 pAsm
->vfetch_instruction_ptr_array
[ gl_client_id
] = vfetch_instruction_ptr
;
1039 GLboolean
EG_assemble_vfetch_instruction(r700_AssemblerBase
* pAsm
,
1040 GLuint destination_register
,
1045 GLboolean normalize
,
1047 VTX_FETCH_METHOD
* pFetchMethod
)
1049 GLuint client_size_inbyte
;
1051 GLuint mega_fetch_count
;
1052 GLuint is_mega_fetch_flag
;
1054 GLuint dst_sel_x
, dst_sel_y
, dst_sel_z
, dst_sel_w
;
1056 R700VertexGenericFetch
* vfetch_instruction_ptr
;
1057 R700VertexGenericFetch
* assembled_vfetch_instruction_ptr
1058 = pAsm
->vfetch_instruction_ptr_array
[element
];
1060 if (assembled_vfetch_instruction_ptr
== NULL
)
1062 vfetch_instruction_ptr
= (R700VertexGenericFetch
*) CALLOC_STRUCT(R700VertexGenericFetch
);
1063 if (vfetch_instruction_ptr
== NULL
)
1067 Init_R700VertexGenericFetch(vfetch_instruction_ptr
);
1071 vfetch_instruction_ptr
= assembled_vfetch_instruction_ptr
;
1074 data_format
= GetSurfaceFormat(type
, size
, &client_size_inbyte
);
1076 if(GL_TRUE
== pFetchMethod
->bEnableMini
) //More conditions here
1079 mega_fetch_count
= 0;
1080 is_mega_fetch_flag
= 0;
1084 mega_fetch_count
= MEGA_FETCH_BYTES
- 1;
1085 is_mega_fetch_flag
= 0x1;
1086 pFetchMethod
->mega_fetch_remainder
= MEGA_FETCH_BYTES
- client_size_inbyte
;
1089 SETfield(vfetch_instruction_ptr
->m_Word0
.val
, EG_VC_INST_FETCH
,
1090 EG_VTX_WORD0__VC_INST_shift
,
1091 EG_VTX_WORD0__VC_INST_mask
);
1092 SETfield(vfetch_instruction_ptr
->m_Word0
.val
, EG_VTX_FETCH_VERTEX_DATA
,
1093 EG_VTX_WORD0__FETCH_TYPE_shift
,
1094 EG_VTX_WORD0__FETCH_TYPE_mask
);
1095 CLEARbit(vfetch_instruction_ptr
->m_Word0
.val
,
1096 EG_VTX_WORD0__FWQ_bit
);
1097 SETfield(vfetch_instruction_ptr
->m_Word0
.val
, element
,
1098 EG_VTX_WORD0__BUFFER_ID_shift
,
1099 EG_VTX_WORD0__BUFFER_ID_mask
);
1100 SETfield(vfetch_instruction_ptr
->m_Word0
.val
, 0x0,
1101 EG_VTX_WORD0__SRC_GPR_shift
,
1102 EG_VTX_WORD0__SRC_GPR_mask
);
1103 SETfield(vfetch_instruction_ptr
->m_Word0
.val
, SQ_ABSOLUTE
,
1104 EG_VTX_WORD0__SRC_REL_shift
,
1105 EG_VTX_WORD0__SRC_REL_bit
);
1106 SETfield(vfetch_instruction_ptr
->m_Word0
.val
, SQ_SEL_X
,
1107 EG_VTX_WORD0__SRC_SEL_X_shift
,
1108 EG_VTX_WORD0__SRC_SEL_X_mask
);
1109 SETfield(vfetch_instruction_ptr
->m_Word0
.val
, mega_fetch_count
,
1110 EG_VTX_WORD0__MFC_shift
,
1111 EG_VTX_WORD0__MFC_mask
);
1113 if(format
== GL_BGRA
)
1115 dst_sel_x
= (size
< 1) ? SQ_SEL_0
: SQ_SEL_Z
;
1116 dst_sel_y
= (size
< 2) ? SQ_SEL_0
: SQ_SEL_Y
;
1117 dst_sel_z
= (size
< 3) ? SQ_SEL_0
: SQ_SEL_X
;
1118 dst_sel_w
= (size
< 4) ? SQ_SEL_1
: SQ_SEL_W
;
1122 dst_sel_x
= (size
< 1) ? SQ_SEL_0
: SQ_SEL_X
;
1123 dst_sel_y
= (size
< 2) ? SQ_SEL_0
: SQ_SEL_Y
;
1124 dst_sel_z
= (size
< 3) ? SQ_SEL_0
: SQ_SEL_Z
;
1125 dst_sel_w
= (size
< 4) ? SQ_SEL_1
: SQ_SEL_W
;
1128 SETfield(vfetch_instruction_ptr
->m_Word1
.val
, dst_sel_x
,
1129 EG_VTX_WORD1__DST_SEL_X_shift
,
1130 EG_VTX_WORD1__DST_SEL_X_mask
);
1131 SETfield(vfetch_instruction_ptr
->m_Word1
.val
, dst_sel_y
,
1132 EG_VTX_WORD1__DST_SEL_Y_shift
,
1133 EG_VTX_WORD1__DST_SEL_Y_mask
);
1134 SETfield(vfetch_instruction_ptr
->m_Word1
.val
, dst_sel_z
,
1135 EG_VTX_WORD1__DST_SEL_Z_shift
,
1136 EG_VTX_WORD1__DST_SEL_Z_mask
);
1137 SETfield(vfetch_instruction_ptr
->m_Word1
.val
, dst_sel_w
,
1138 EG_VTX_WORD1__DST_SEL_W_shift
,
1139 EG_VTX_WORD1__DST_SEL_W_mask
);
1141 SETfield(vfetch_instruction_ptr
->m_Word1
.val
, 1,
1142 EG_VTX_WORD1__UCF_shift
,
1143 EG_VTX_WORD1__UCF_bit
);
1144 SETfield(vfetch_instruction_ptr
->m_Word1
.val
, data_format
,
1145 EG_VTX_WORD1__DATA_FORMAT_shift
,
1146 EG_VTX_WORD1__DATA_FORMAT_mask
);
1148 SETfield(vfetch_instruction_ptr
->m_Word1
.val
, SQ_FORMAT_COMP_SIGNED
,
1149 EG_VTX_WORD1__FCA_shift
,
1150 EG_VTX_WORD1__FCA_bit
);
1154 SETfield(vfetch_instruction_ptr
->m_Word1
.val
, SQ_FORMAT_COMP_SIGNED
,
1155 EG_VTX_WORD1__FCA_shift
,
1156 EG_VTX_WORD1__FCA_bit
);
1160 SETfield(vfetch_instruction_ptr
->m_Word1
.val
, SQ_FORMAT_COMP_UNSIGNED
,
1161 EG_VTX_WORD1__FCA_shift
,
1162 EG_VTX_WORD1__FCA_bit
);
1164 #endif /* TEST_VFETCH */
1166 if(GL_TRUE
== normalize
)
1168 SETfield(vfetch_instruction_ptr
->m_Word1
.val
, SQ_NUM_FORMAT_NORM
,
1169 EG_VTX_WORD1__NFA_shift
,
1170 EG_VTX_WORD1__NFA_mask
);
1174 SETfield(vfetch_instruction_ptr
->m_Word1
.val
, SQ_NUM_FORMAT_SCALED
,
1175 EG_VTX_WORD1__NFA_shift
,
1176 EG_VTX_WORD1__NFA_mask
);
1179 /* Destination register */
1180 SETfield(vfetch_instruction_ptr
->m_Word1
.val
, destination_register
,
1181 EG_VTX_WORD1_GPR__DST_GPR_shift
,
1182 EG_VTX_WORD1_GPR__DST_GPR_mask
);
1183 SETfield(vfetch_instruction_ptr
->m_Word1
.val
, SQ_ABSOLUTE
,
1184 EG_VTX_WORD1_GPR__DST_REL_shift
,
1185 EG_VTX_WORD1_GPR__DST_REL_bit
);
1188 SETfield(vfetch_instruction_ptr
->m_Word2
.val
, 0,
1189 EG_VTX_WORD2__OFFSET_shift
,
1190 EG_VTX_WORD2__OFFSET_mask
);
1191 SETfield(vfetch_instruction_ptr
->m_Word2
.val
,
1192 #ifdef MESA_BIG_ENDIAN
1197 EG_VTX_WORD2__ENDIAN_SWAP_shift
,
1198 EG_VTX_WORD2__ENDIAN_SWAP_mask
);
1199 SETfield(vfetch_instruction_ptr
->m_Word2
.val
, 0,
1200 EG_VTX_WORD2__CBNS_shift
,
1201 EG_VTX_WORD2__CBNS_bit
);
1202 SETfield(vfetch_instruction_ptr
->m_Word2
.val
, is_mega_fetch_flag
,
1203 EG_VTX_WORD2__MEGA_FETCH_shift
,
1204 EG_VTX_WORD2__MEGA_FETCH_mask
);
1206 if (assembled_vfetch_instruction_ptr
== NULL
)
1208 if ( GL_FALSE
== add_vfetch_instruction(pAsm
, (R700VertexInstruction
*)vfetch_instruction_ptr
) )
1213 if (pAsm
->vfetch_instruction_ptr_array
[element
] != NULL
)
1219 pAsm
->vfetch_instruction_ptr_array
[element
] = vfetch_instruction_ptr
;
1226 GLboolean
assemble_vfetch_instruction2(r700_AssemblerBase
* pAsm
,
1227 GLuint destination_register
,
1232 GLboolean normalize
,
1234 VTX_FETCH_METHOD
* pFetchMethod
)
1236 GLuint client_size_inbyte
;
1238 GLuint mega_fetch_count
;
1239 GLuint is_mega_fetch_flag
;
1241 R700VertexGenericFetch
* vfetch_instruction_ptr
;
1242 R700VertexGenericFetch
* assembled_vfetch_instruction_ptr
1243 = pAsm
->vfetch_instruction_ptr_array
[element
];
1245 if (assembled_vfetch_instruction_ptr
== NULL
)
1247 vfetch_instruction_ptr
= (R700VertexGenericFetch
*) CALLOC_STRUCT(R700VertexGenericFetch
);
1248 if (vfetch_instruction_ptr
== NULL
)
1252 Init_R700VertexGenericFetch(vfetch_instruction_ptr
);
1256 vfetch_instruction_ptr
= assembled_vfetch_instruction_ptr
;
1259 data_format
= GetSurfaceFormat(type
, size
, &client_size_inbyte
);
1261 if(GL_TRUE
== pFetchMethod
->bEnableMini
) //More conditions here
1264 mega_fetch_count
= 0;
1265 is_mega_fetch_flag
= 0;
1269 mega_fetch_count
= MEGA_FETCH_BYTES
- 1;
1270 is_mega_fetch_flag
= 0x1;
1271 pFetchMethod
->mega_fetch_remainder
= MEGA_FETCH_BYTES
- client_size_inbyte
;
1274 vfetch_instruction_ptr
->m_Word0
.f
.vtx_inst
= SQ_VTX_INST_FETCH
;
1275 vfetch_instruction_ptr
->m_Word0
.f
.fetch_type
= SQ_VTX_FETCH_VERTEX_DATA
;
1276 vfetch_instruction_ptr
->m_Word0
.f
.fetch_whole_quad
= 0x0;
1278 vfetch_instruction_ptr
->m_Word0
.f
.buffer_id
= element
;
1279 vfetch_instruction_ptr
->m_Word0
.f
.src_gpr
= 0x0;
1280 vfetch_instruction_ptr
->m_Word0
.f
.src_rel
= SQ_ABSOLUTE
;
1281 vfetch_instruction_ptr
->m_Word0
.f
.src_sel_x
= SQ_SEL_X
;
1282 vfetch_instruction_ptr
->m_Word0
.f
.mega_fetch_count
= mega_fetch_count
;
1284 if(format
== GL_BGRA
)
1286 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_x
= (size
< 1) ? SQ_SEL_0
: SQ_SEL_Z
;
1287 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_y
= (size
< 2) ? SQ_SEL_0
: SQ_SEL_Y
;
1288 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_z
= (size
< 3) ? SQ_SEL_0
: SQ_SEL_X
;
1289 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_w
= (size
< 4) ? SQ_SEL_1
: SQ_SEL_W
;
1293 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_x
= (size
< 1) ? SQ_SEL_0
: SQ_SEL_X
;
1294 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_y
= (size
< 2) ? SQ_SEL_0
: SQ_SEL_Y
;
1295 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_z
= (size
< 3) ? SQ_SEL_0
: SQ_SEL_Z
;
1296 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_w
= (size
< 4) ? SQ_SEL_1
: SQ_SEL_W
;
1300 vfetch_instruction_ptr
->m_Word1
.f
.use_const_fields
= 1;
1301 vfetch_instruction_ptr
->m_Word1
.f
.data_format
= data_format
;
1302 #ifdef MESA_BIG_ENDIAN
1303 vfetch_instruction_ptr
->m_Word2
.f
.endian_swap
= SQ_ENDIAN_8IN32
;
1305 vfetch_instruction_ptr
->m_Word2
.f
.endian_swap
= SQ_ENDIAN_NONE
;
1310 vfetch_instruction_ptr
->m_Word1
.f
.format_comp_all
= SQ_FORMAT_COMP_SIGNED
;
1314 vfetch_instruction_ptr
->m_Word1
.f
.format_comp_all
= SQ_FORMAT_COMP_UNSIGNED
;
1317 if(GL_TRUE
== normalize
)
1319 vfetch_instruction_ptr
->m_Word1
.f
.num_format_all
= SQ_NUM_FORMAT_NORM
;
1323 vfetch_instruction_ptr
->m_Word1
.f
.num_format_all
= SQ_NUM_FORMAT_INT
;
1326 // Destination register
1327 vfetch_instruction_ptr
->m_Word1_GPR
.f
.dst_gpr
= destination_register
;
1328 vfetch_instruction_ptr
->m_Word1_GPR
.f
.dst_rel
= SQ_ABSOLUTE
;
1330 vfetch_instruction_ptr
->m_Word2
.f
.offset
= 0;
1331 vfetch_instruction_ptr
->m_Word2
.f
.const_buf_no_stride
= 0x0;
1333 vfetch_instruction_ptr
->m_Word2
.f
.mega_fetch
= is_mega_fetch_flag
;
1335 if (assembled_vfetch_instruction_ptr
== NULL
)
1337 if ( GL_FALSE
== add_vfetch_instruction(pAsm
, (R700VertexInstruction
*)vfetch_instruction_ptr
) )
1342 if (pAsm
->vfetch_instruction_ptr_array
[element
] != NULL
)
1348 pAsm
->vfetch_instruction_ptr_array
[element
] = vfetch_instruction_ptr
;
1355 GLboolean
cleanup_vfetch_instructions(r700_AssemblerBase
* pAsm
)
1358 pAsm
->cf_current_clause_type
= CF_EMPTY_CLAUSE
;
1359 pAsm
->cf_current_vtx_clause_ptr
= NULL
;
1361 for (i
=0; i
<VERT_ATTRIB_MAX
; i
++)
1363 pAsm
->vfetch_instruction_ptr_array
[ i
] = NULL
;
1366 cleanup_vfetch_shaderinst(pAsm
->pR700Shader
);
1371 GLuint
gethelpr(r700_AssemblerBase
* pAsm
)
1373 GLuint r
= pAsm
->uHelpReg
;
1375 if (pAsm
->uHelpReg
> pAsm
->number_used_registers
)
1377 pAsm
->number_used_registers
= pAsm
->uHelpReg
;
1381 void resethelpr(r700_AssemblerBase
* pAsm
)
1383 pAsm
->uHelpReg
= pAsm
->uFirstHelpReg
;
1386 void checkop_init(r700_AssemblerBase
* pAsm
)
1389 pAsm
->aArgSubst
[0] =
1390 pAsm
->aArgSubst
[1] =
1391 pAsm
->aArgSubst
[2] =
1392 pAsm
->aArgSubst
[3] = -1;
1395 static GLboolean
next_ins(r700_AssemblerBase
*pAsm
)
1397 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1399 if (GL_TRUE
== pAsm
->is_tex
)
1401 if (pILInst
->TexSrcTarget
== TEXTURE_RECT_INDEX
)
1403 if (GL_FALSE
== assemble_tex_instruction(pAsm
, GL_FALSE
))
1405 radeon_error("Error assembling TEX instruction\n");
1411 if (GL_FALSE
== assemble_tex_instruction(pAsm
, GL_TRUE
))
1413 radeon_error("Error assembling TEX instruction\n");
1420 if (GL_FALSE
== assemble_alu_instruction(pAsm
))
1422 radeon_error("Error assembling ALU instruction\n");
1427 if (pAsm
->D
.dst
.rtype
== DST_REG_OUT
)
1429 assert(pAsm
->D
.dst
.reg
>= pAsm
->starting_export_register_number
);
1432 //reset for next inst.
1435 pAsm
->S
[0].bits
= 0;
1436 pAsm
->S
[1].bits
= 0;
1437 pAsm
->S
[2].bits
= 0;
1438 pAsm
->is_tex
= GL_FALSE
;
1439 pAsm
->need_tex_barrier
= GL_FALSE
;
1441 pAsm
->C
[0].bits
= pAsm
->C
[1].bits
= pAsm
->C
[2].bits
= pAsm
->C
[3].bits
= 0;
1445 GLboolean
mov_temp(r700_AssemblerBase
* pAsm
, int src
)
1447 GLuint tmp
= gethelpr(pAsm
);
1449 //mov src to temp helper gpr.
1450 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
1452 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1454 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
1455 pAsm
->D
.dst
.reg
= tmp
;
1457 nomask_PVSDST(&(pAsm
->D
.dst
));
1459 if( GL_FALSE
== assemble_src(pAsm
, src
, 0) )
1464 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
1465 noneg_PVSSRC(&(pAsm
->S
[0].src
));
1467 if( GL_FALSE
== next_ins(pAsm
) )
1472 pAsm
->aArgSubst
[1 + src
] = tmp
;
1477 GLboolean
checkop1(r700_AssemblerBase
* pAsm
)
1483 GLboolean
checkop2(r700_AssemblerBase
* pAsm
)
1485 GLboolean bSrcConst
[2];
1486 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1490 if( (pILInst
->SrcReg
[0].File
== PROGRAM_UNIFORM
) ||
1491 (pILInst
->SrcReg
[0].File
== PROGRAM_CONSTANT
) ||
1492 (pILInst
->SrcReg
[0].File
== PROGRAM_LOCAL_PARAM
) ||
1493 (pILInst
->SrcReg
[0].File
== PROGRAM_ENV_PARAM
) ||
1494 (pILInst
->SrcReg
[0].File
== PROGRAM_STATE_VAR
) )
1496 bSrcConst
[0] = GL_TRUE
;
1500 bSrcConst
[0] = GL_FALSE
;
1502 if( (pILInst
->SrcReg
[1].File
== PROGRAM_UNIFORM
) ||
1503 (pILInst
->SrcReg
[1].File
== PROGRAM_CONSTANT
) ||
1504 (pILInst
->SrcReg
[1].File
== PROGRAM_LOCAL_PARAM
) ||
1505 (pILInst
->SrcReg
[1].File
== PROGRAM_ENV_PARAM
) ||
1506 (pILInst
->SrcReg
[1].File
== PROGRAM_STATE_VAR
) )
1508 bSrcConst
[1] = GL_TRUE
;
1512 bSrcConst
[1] = GL_FALSE
;
1515 if( (bSrcConst
[0] == GL_TRUE
) && (bSrcConst
[1] == GL_TRUE
) )
1517 if(pILInst
->SrcReg
[0].Index
!= pILInst
->SrcReg
[1].Index
)
1519 if( GL_FALSE
== mov_temp(pAsm
, 1) )
1529 GLboolean
checkop3(r700_AssemblerBase
* pAsm
)
1531 GLboolean bSrcConst
[3];
1532 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1536 if( (pILInst
->SrcReg
[0].File
== PROGRAM_UNIFORM
) ||
1537 (pILInst
->SrcReg
[0].File
== PROGRAM_CONSTANT
) ||
1538 (pILInst
->SrcReg
[0].File
== PROGRAM_LOCAL_PARAM
) ||
1539 (pILInst
->SrcReg
[0].File
== PROGRAM_ENV_PARAM
) ||
1540 (pILInst
->SrcReg
[0].File
== PROGRAM_STATE_VAR
) )
1542 bSrcConst
[0] = GL_TRUE
;
1546 bSrcConst
[0] = GL_FALSE
;
1548 if( (pILInst
->SrcReg
[1].File
== PROGRAM_UNIFORM
) ||
1549 (pILInst
->SrcReg
[1].File
== PROGRAM_CONSTANT
) ||
1550 (pILInst
->SrcReg
[1].File
== PROGRAM_LOCAL_PARAM
) ||
1551 (pILInst
->SrcReg
[1].File
== PROGRAM_ENV_PARAM
) ||
1552 (pILInst
->SrcReg
[1].File
== PROGRAM_STATE_VAR
) )
1554 bSrcConst
[1] = GL_TRUE
;
1558 bSrcConst
[1] = GL_FALSE
;
1560 if( (pILInst
->SrcReg
[2].File
== PROGRAM_UNIFORM
) ||
1561 (pILInst
->SrcReg
[2].File
== PROGRAM_CONSTANT
) ||
1562 (pILInst
->SrcReg
[2].File
== PROGRAM_LOCAL_PARAM
) ||
1563 (pILInst
->SrcReg
[2].File
== PROGRAM_ENV_PARAM
) ||
1564 (pILInst
->SrcReg
[2].File
== PROGRAM_STATE_VAR
) )
1566 bSrcConst
[2] = GL_TRUE
;
1570 bSrcConst
[2] = GL_FALSE
;
1573 if( (GL_TRUE
== bSrcConst
[0]) &&
1574 (GL_TRUE
== bSrcConst
[1]) &&
1575 (GL_TRUE
== bSrcConst
[2]) )
1577 if( GL_FALSE
== mov_temp(pAsm
, 1) )
1581 if( GL_FALSE
== mov_temp(pAsm
, 2) )
1588 else if( (GL_TRUE
== bSrcConst
[0]) &&
1589 (GL_TRUE
== bSrcConst
[1]) )
1591 if(pILInst
->SrcReg
[0].Index
!= pILInst
->SrcReg
[1].Index
)
1593 if( GL_FALSE
== mov_temp(pAsm
, 1) )
1601 else if ( (GL_TRUE
== bSrcConst
[0]) &&
1602 (GL_TRUE
== bSrcConst
[2]) )
1604 if(pILInst
->SrcReg
[0].Index
!= pILInst
->SrcReg
[2].Index
)
1606 if( GL_FALSE
== mov_temp(pAsm
, 2) )
1614 else if( (GL_TRUE
== bSrcConst
[1]) &&
1615 (GL_TRUE
== bSrcConst
[2]) )
1617 if(pILInst
->SrcReg
[1].Index
!= pILInst
->SrcReg
[2].Index
)
1619 if( GL_FALSE
== mov_temp(pAsm
, 2) )
1631 GLboolean
assemble_src(r700_AssemblerBase
*pAsm
,
1635 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1642 if(pAsm
->aArgSubst
[1+src
] >= 0)
1645 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1646 pAsm
->S
[fld
].src
.rtype
= SRC_REG_TEMPORARY
;
1647 pAsm
->S
[fld
].src
.reg
= pAsm
->aArgSubst
[1+src
];
1651 if (1 == pILInst
->SrcReg
[src
].RelAddr
)
1653 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_RELATIVE_A0
);
1657 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1659 switch (pILInst
->SrcReg
[src
].File
)
1661 case PROGRAM_TEMPORARY
:
1662 pAsm
->S
[fld
].src
.rtype
= SRC_REG_TEMPORARY
;
1663 pAsm
->S
[fld
].src
.reg
= pILInst
->SrcReg
[src
].Index
+ pAsm
->starting_temp_register_number
;
1665 case PROGRAM_CONSTANT
:
1666 case PROGRAM_LOCAL_PARAM
:
1667 case PROGRAM_ENV_PARAM
:
1668 case PROGRAM_STATE_VAR
:
1669 case PROGRAM_UNIFORM
:
1670 pAsm
->S
[fld
].src
.rtype
= SRC_REG_CONSTANT
;
1671 if(pILInst
->SrcReg
[src
].Index
< 0)
1673 WARN_ONCE("Negative register offsets not supported yet!\n");
1674 pAsm
->S
[fld
].src
.reg
= 0;
1678 pAsm
->S
[fld
].src
.reg
= pILInst
->SrcReg
[src
].Index
;
1682 pAsm
->S
[fld
].src
.rtype
= SRC_REG_GPR
;
1683 switch (pAsm
->currentShaderType
)
1686 pAsm
->S
[fld
].src
.reg
= pAsm
->uiFP_AttributeMap
[pILInst
->SrcReg
[src
].Index
];
1689 pAsm
->S
[fld
].src
.reg
= pAsm
->ucVP_AttributeMap
[pILInst
->SrcReg
[src
].Index
];
1693 case PROGRAM_OUTPUT
:
1694 pAsm
->S
[fld
].src
.rtype
= SRC_REG_GPR
;
1695 switch (pAsm
->currentShaderType
)
1698 pAsm
->S
[fld
].src
.reg
= pAsm
->uiFP_OutputMap
[pILInst
->SrcReg
[src
].Index
];
1701 pAsm
->S
[fld
].src
.reg
= pAsm
->ucVP_OutputMap
[pILInst
->SrcReg
[src
].Index
];
1706 radeon_error("Invalid source argument type : %d \n", pILInst
->SrcReg
[src
].File
);
1711 pAsm
->S
[fld
].src
.swizzlex
= pILInst
->SrcReg
[src
].Swizzle
& 0x7;
1712 pAsm
->S
[fld
].src
.swizzley
= (pILInst
->SrcReg
[src
].Swizzle
>> 3) & 0x7;
1713 pAsm
->S
[fld
].src
.swizzlez
= (pILInst
->SrcReg
[src
].Swizzle
>> 6) & 0x7;
1714 pAsm
->S
[fld
].src
.swizzlew
= (pILInst
->SrcReg
[src
].Swizzle
>> 9) & 0x7;
1716 pAsm
->S
[fld
].src
.negx
= pILInst
->SrcReg
[src
].Negate
& 0x1;
1717 pAsm
->S
[fld
].src
.negy
= (pILInst
->SrcReg
[src
].Negate
>> 1) & 0x1;
1718 pAsm
->S
[fld
].src
.negz
= (pILInst
->SrcReg
[src
].Negate
>> 2) & 0x1;
1719 pAsm
->S
[fld
].src
.negw
= (pILInst
->SrcReg
[src
].Negate
>> 3) & 0x1;
1724 GLboolean
assemble_dst(r700_AssemblerBase
*pAsm
)
1726 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1727 switch (pILInst
->DstReg
.File
)
1729 case PROGRAM_TEMPORARY
:
1730 if (1 == pILInst
->DstReg
.RelAddr
)
1732 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_RELATIVE_A0
);
1736 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1738 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
1739 pAsm
->D
.dst
.reg
= pILInst
->DstReg
.Index
+ pAsm
->starting_temp_register_number
;
1741 case PROGRAM_ADDRESS
:
1742 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1743 pAsm
->D
.dst
.rtype
= DST_REG_A0
;
1744 pAsm
->D
.dst
.reg
= 0;
1746 case PROGRAM_OUTPUT
:
1747 if (1 == pILInst
->DstReg
.RelAddr
)
1749 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_RELATIVE_A0
);
1753 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1755 pAsm
->D
.dst
.rtype
= DST_REG_OUT
;
1756 switch (pAsm
->currentShaderType
)
1759 pAsm
->D
.dst
.reg
= pAsm
->uiFP_OutputMap
[pILInst
->DstReg
.Index
];
1762 pAsm
->D
.dst
.reg
= pAsm
->ucVP_OutputMap
[pILInst
->DstReg
.Index
];
1767 radeon_error("Invalid destination output argument type\n");
1771 pAsm
->D
.dst
.writex
= pILInst
->DstReg
.WriteMask
& 0x1;
1772 pAsm
->D
.dst
.writey
= (pILInst
->DstReg
.WriteMask
>> 1) & 0x1;
1773 pAsm
->D
.dst
.writez
= (pILInst
->DstReg
.WriteMask
>> 2) & 0x1;
1774 pAsm
->D
.dst
.writew
= (pILInst
->DstReg
.WriteMask
>> 3) & 0x1;
1776 if(pILInst
->SaturateMode
== SATURATE_ZERO_ONE
)
1778 pAsm
->D2
.dst2
.SaturateMode
= 1;
1782 pAsm
->D2
.dst2
.SaturateMode
= 0;
1788 GLboolean
tex_dst(r700_AssemblerBase
*pAsm
)
1790 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1792 if(PROGRAM_TEMPORARY
== pILInst
->DstReg
.File
)
1794 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
1795 pAsm
->D
.dst
.reg
= pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.Index
+ pAsm
->starting_temp_register_number
;
1797 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1799 else if(PROGRAM_OUTPUT
== pILInst
->DstReg
.File
)
1801 pAsm
->D
.dst
.rtype
= DST_REG_OUT
;
1802 switch (pAsm
->currentShaderType
)
1805 pAsm
->D
.dst
.reg
= pAsm
->uiFP_OutputMap
[pILInst
->DstReg
.Index
];
1808 pAsm
->D
.dst
.reg
= pAsm
->ucVP_OutputMap
[pILInst
->DstReg
.Index
];
1812 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1816 radeon_error("Invalid destination output argument type\n");
1820 pAsm
->D
.dst
.writex
= pILInst
->DstReg
.WriteMask
& 0x1;
1821 pAsm
->D
.dst
.writey
= (pILInst
->DstReg
.WriteMask
>> 1) & 0x1;
1822 pAsm
->D
.dst
.writez
= (pILInst
->DstReg
.WriteMask
>> 2) & 0x1;
1823 pAsm
->D
.dst
.writew
= (pILInst
->DstReg
.WriteMask
>> 3) & 0x1;
1828 GLboolean
tex_src(r700_AssemblerBase
*pAsm
)
1830 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1832 GLboolean bValidTexCoord
= GL_FALSE
;
1834 if(pAsm
->aArgSubst
[1] >= 0)
1836 bValidTexCoord
= GL_TRUE
;
1837 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
1838 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
1839 pAsm
->S
[0].src
.reg
= pAsm
->aArgSubst
[1];
1843 switch (pILInst
->SrcReg
[0].File
) {
1844 case PROGRAM_UNIFORM
:
1845 case PROGRAM_CONSTANT
:
1846 case PROGRAM_LOCAL_PARAM
:
1847 case PROGRAM_ENV_PARAM
:
1848 case PROGRAM_STATE_VAR
:
1850 case PROGRAM_TEMPORARY
:
1851 bValidTexCoord
= GL_TRUE
;
1852 pAsm
->S
[0].src
.reg
= pILInst
->SrcReg
[0].Index
+
1853 pAsm
->starting_temp_register_number
;
1854 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
1857 if(SPT_VP
== pAsm
->currentShaderType
)
1859 switch (pILInst
->SrcReg
[0].Index
)
1861 case VERT_ATTRIB_TEX0
:
1862 case VERT_ATTRIB_TEX1
:
1863 case VERT_ATTRIB_TEX2
:
1864 case VERT_ATTRIB_TEX3
:
1865 case VERT_ATTRIB_TEX4
:
1866 case VERT_ATTRIB_TEX5
:
1867 case VERT_ATTRIB_TEX6
:
1868 case VERT_ATTRIB_TEX7
:
1869 bValidTexCoord
= GL_TRUE
;
1870 pAsm
->S
[0].src
.reg
=
1871 pAsm
->ucVP_AttributeMap
[pILInst
->SrcReg
[0].Index
];
1872 pAsm
->S
[0].src
.rtype
= SRC_REG_GPR
;
1878 switch (pILInst
->SrcReg
[0].Index
)
1880 case FRAG_ATTRIB_WPOS
:
1881 case FRAG_ATTRIB_COL0
:
1882 case FRAG_ATTRIB_COL1
:
1883 case FRAG_ATTRIB_FOGC
:
1884 case FRAG_ATTRIB_TEX0
:
1885 case FRAG_ATTRIB_TEX1
:
1886 case FRAG_ATTRIB_TEX2
:
1887 case FRAG_ATTRIB_TEX3
:
1888 case FRAG_ATTRIB_TEX4
:
1889 case FRAG_ATTRIB_TEX5
:
1890 case FRAG_ATTRIB_TEX6
:
1891 case FRAG_ATTRIB_TEX7
:
1892 bValidTexCoord
= GL_TRUE
;
1893 pAsm
->S
[0].src
.reg
=
1894 pAsm
->uiFP_AttributeMap
[pILInst
->SrcReg
[0].Index
];
1895 pAsm
->S
[0].src
.rtype
= SRC_REG_GPR
;
1897 case FRAG_ATTRIB_FACE
:
1898 fprintf(stderr
, "FRAG_ATTRIB_FACE unsupported\n");
1900 case FRAG_ATTRIB_PNTC
:
1901 fprintf(stderr
, "FRAG_ATTRIB_PNTC unsupported\n");
1905 if( (pILInst
->SrcReg
[0].Index
>= FRAG_ATTRIB_VAR0
) ||
1906 (pILInst
->SrcReg
[0].Index
< FRAG_ATTRIB_MAX
) )
1908 bValidTexCoord
= GL_TRUE
;
1909 pAsm
->S
[0].src
.reg
=
1910 pAsm
->uiFP_AttributeMap
[pILInst
->SrcReg
[0].Index
];
1911 pAsm
->S
[0].src
.rtype
= SRC_REG_GPR
;
1919 if(GL_TRUE
== bValidTexCoord
)
1921 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
1925 radeon_error("Invalid source texcoord for TEX instruction\n");
1929 pAsm
->S
[0].src
.swizzlex
= pILInst
->SrcReg
[0].Swizzle
& 0x7;
1930 pAsm
->S
[0].src
.swizzley
= (pILInst
->SrcReg
[0].Swizzle
>> 3) & 0x7;
1931 pAsm
->S
[0].src
.swizzlez
= (pILInst
->SrcReg
[0].Swizzle
>> 6) & 0x7;
1932 pAsm
->S
[0].src
.swizzlew
= (pILInst
->SrcReg
[0].Swizzle
>> 9) & 0x7;
1934 pAsm
->S
[0].src
.negx
= pILInst
->SrcReg
[0].Negate
& 0x1;
1935 pAsm
->S
[0].src
.negy
= (pILInst
->SrcReg
[0].Negate
>> 1) & 0x1;
1936 pAsm
->S
[0].src
.negz
= (pILInst
->SrcReg
[0].Negate
>> 2) & 0x1;
1937 pAsm
->S
[0].src
.negw
= (pILInst
->SrcReg
[0].Negate
>> 3) & 0x1;
1942 GLboolean
assemble_tex_instruction(r700_AssemblerBase
*pAsm
, GLboolean normalized
)
1944 PVSSRC
* texture_coordinate_source
;
1945 PVSSRC
* texture_unit_source
;
1947 R700TextureInstruction
* tex_instruction_ptr
= (R700TextureInstruction
*) CALLOC_STRUCT(R700TextureInstruction
);
1948 if (tex_instruction_ptr
== NULL
)
1952 Init_R700TextureInstruction(tex_instruction_ptr
);
1954 texture_coordinate_source
= &(pAsm
->S
[0].src
);
1955 texture_unit_source
= &(pAsm
->S
[1].src
);
1957 if(8 == pAsm
->unAsic
) /* evergreen */
1960 SETfield(tex_instruction_ptr
->m_Word0
.val
, pAsm
->D
.dst
.opcode
,
1961 EG_TEX_WORD0__TEX_INST_shift
,
1962 EG_TEX_WORD0__TEX_INST_mask
);
1964 if( (SQ_TEX_INST_GET_GRADIENTS_H
== pAsm
->D
.dst
.opcode
)
1965 ||(SQ_TEX_INST_GET_GRADIENTS_V
== pAsm
->D
.dst
.opcode
) )
1967 /* Use fine texel derivative calculation rather than use quad derivative */
1968 SETfield(tex_instruction_ptr
->m_Word0
.val
, 1,
1969 EG_TEX_WORD0__INST_MOD_shift
,
1970 EG_TEX_WORD0__INST_MOD_mask
);
1974 SETfield(tex_instruction_ptr
->m_Word0
.val
, 0,
1975 EG_TEX_WORD0__INST_MOD_shift
,
1976 EG_TEX_WORD0__INST_MOD_mask
);
1979 CLEARbit(tex_instruction_ptr
->m_Word0
.val
, EG_TEX_WORD0__FWQ_bit
);
1981 if(SPT_VP
== pAsm
->currentShaderType
)
1983 SETfield(tex_instruction_ptr
->m_Word0
.val
, (texture_unit_source
->reg
+ VERT_ATTRIB_MAX
),
1984 EG_TEX_WORD0__RESOURCE_ID_shift
,
1985 EG_TEX_WORD0__RESOURCE_ID_mask
);
1986 pAsm
->unVetTexBits
|= 1 << texture_unit_source
->reg
;
1990 SETfield(tex_instruction_ptr
->m_Word0
.val
, texture_unit_source
->reg
,
1991 EG_TEX_WORD0__RESOURCE_ID_shift
,
1992 EG_TEX_WORD0__RESOURCE_ID_mask
);
1995 CLEARbit(tex_instruction_ptr
->m_Word0
.val
, EG_TEX_WORD0__ALT_CONST_bit
);
1996 SETfield(tex_instruction_ptr
->m_Word0
.val
, 0,
1997 EG_TEX_WORD0__RIM_shift
,
1998 EG_TEX_WORD0__RIM_mask
);
1999 SETfield(tex_instruction_ptr
->m_Word0
.val
, 0,
2000 EG_TEX_WORD0__SIM_shift
,
2001 EG_TEX_WORD0__SIM_mask
);
2005 tex_instruction_ptr
->m_Word0
.f
.tex_inst
= pAsm
->D
.dst
.opcode
;
2006 tex_instruction_ptr
->m_Word0
.f
.bc_frac_mode
= 0x0;
2007 tex_instruction_ptr
->m_Word0
.f
.fetch_whole_quad
= 0x0;
2008 tex_instruction_ptr
->m_Word0
.f
.alt_const
= 0;
2010 if(SPT_VP
== pAsm
->currentShaderType
)
2012 tex_instruction_ptr
->m_Word0
.f
.resource_id
= texture_unit_source
->reg
+ VERT_ATTRIB_MAX
;
2013 pAsm
->unVetTexBits
|= 1 << texture_unit_source
->reg
;
2017 tex_instruction_ptr
->m_Word0
.f
.resource_id
= texture_unit_source
->reg
;
2021 tex_instruction_ptr
->m_Word1
.f
.lod_bias
= 0x0;
2023 tex_instruction_ptr
->m_Word1
.f
.coord_type_x
= SQ_TEX_NORMALIZED
;
2024 tex_instruction_ptr
->m_Word1
.f
.coord_type_y
= SQ_TEX_NORMALIZED
;
2025 tex_instruction_ptr
->m_Word1
.f
.coord_type_z
= SQ_TEX_NORMALIZED
;
2026 tex_instruction_ptr
->m_Word1
.f
.coord_type_w
= SQ_TEX_NORMALIZED
;
2028 /* XXX: UNNORMALIZED tex coords have limited wrap modes */
2029 tex_instruction_ptr
->m_Word1
.f
.coord_type_x
= SQ_TEX_UNNORMALIZED
;
2030 tex_instruction_ptr
->m_Word1
.f
.coord_type_y
= SQ_TEX_UNNORMALIZED
;
2031 tex_instruction_ptr
->m_Word1
.f
.coord_type_z
= SQ_TEX_UNNORMALIZED
;
2032 tex_instruction_ptr
->m_Word1
.f
.coord_type_w
= SQ_TEX_UNNORMALIZED
;
2035 tex_instruction_ptr
->m_Word2
.f
.offset_x
= 0x0;
2036 tex_instruction_ptr
->m_Word2
.f
.offset_y
= 0x0;
2037 tex_instruction_ptr
->m_Word2
.f
.offset_z
= 0x0;
2038 tex_instruction_ptr
->m_Word2
.f
.sampler_id
= texture_unit_source
->reg
;
2041 if ( (pAsm
->D
.dst
.rtype
== DST_REG_TEMPORARY
) ||
2042 (pAsm
->D
.dst
.rtype
== DST_REG_OUT
) )
2044 if(8 == pAsm
->unAsic
) /* evergreen */
2046 SETfield(tex_instruction_ptr
->m_Word0
.val
, texture_coordinate_source
->reg
,
2047 EG_TEX_WORD0__SRC_GPR_shift
,
2048 EG_TEX_WORD0__SRC_GPR_mask
);
2049 SETfield(tex_instruction_ptr
->m_Word0
.val
, SQ_ABSOLUTE
,
2050 EG_TEX_WORD0__SRC_REL_shift
,
2051 EG_TEX_WORD0__SRC_REL_bit
);
2055 tex_instruction_ptr
->m_Word0
.f
.src_gpr
= texture_coordinate_source
->reg
;
2056 tex_instruction_ptr
->m_Word0
.f
.src_rel
= SQ_ABSOLUTE
;
2059 tex_instruction_ptr
->m_Word1
.f
.dst_gpr
= pAsm
->D
.dst
.reg
;
2060 tex_instruction_ptr
->m_Word1
.f
.dst_rel
= SQ_ABSOLUTE
;
2062 tex_instruction_ptr
->m_Word1
.f
.dst_sel_x
= (pAsm
->D
.dst
.writex
? texture_unit_source
->swizzlex
: SQ_SEL_MASK
);
2063 tex_instruction_ptr
->m_Word1
.f
.dst_sel_y
= (pAsm
->D
.dst
.writey
? texture_unit_source
->swizzley
: SQ_SEL_MASK
);
2064 tex_instruction_ptr
->m_Word1
.f
.dst_sel_z
= (pAsm
->D
.dst
.writez
? texture_unit_source
->swizzlez
: SQ_SEL_MASK
);
2065 tex_instruction_ptr
->m_Word1
.f
.dst_sel_w
= (pAsm
->D
.dst
.writew
? texture_unit_source
->swizzlew
: SQ_SEL_MASK
);
2068 tex_instruction_ptr
->m_Word2
.f
.src_sel_x
= texture_coordinate_source
->swizzlex
;
2069 tex_instruction_ptr
->m_Word2
.f
.src_sel_y
= texture_coordinate_source
->swizzley
;
2070 tex_instruction_ptr
->m_Word2
.f
.src_sel_z
= texture_coordinate_source
->swizzlez
;
2071 tex_instruction_ptr
->m_Word2
.f
.src_sel_w
= texture_coordinate_source
->swizzlew
;
2075 radeon_error("Only temp destination registers supported for TEX dest regs.\n");
2079 if( GL_FALSE
== add_tex_instruction(pAsm
, tex_instruction_ptr
) )
2087 void initialize(r700_AssemblerBase
*pAsm
)
2089 GLuint cycle
, component
;
2091 for (cycle
=0; cycle
<NUMBER_OF_CYCLES
; cycle
++)
2093 for (component
=0; component
<NUMBER_OF_COMPONENTS
; component
++)
2095 pAsm
->hw_gpr
[cycle
][component
] = (-1);
2098 for (component
=0; component
<NUMBER_OF_COMPONENTS
; component
++)
2100 pAsm
->hw_cfile_addr
[component
] = (-1);
2101 pAsm
->hw_cfile_chan
[component
] = (-1);
2105 GLboolean
assemble_alu_src(R700ALUInstruction
* alu_instruction_ptr
,
2108 BITS scalar_channel_index
,
2109 r700_AssemblerBase
*pAsm
)
2116 //--------------------------------------------------------------------------
2117 // Source for operands src0, src1.
2118 // Values [0,127] correspond to GPR[0..127].
2119 // Values [256,511] correspond to cfile constants c[0..255].
2121 //--------------------------------------------------------------------------
2122 // Other special values are shown in the list below.
2124 // 248 SQ_ALU_SRC_0: special constant 0.0.
2125 // 249 SQ_ALU_SRC_1: special constant 1.0 float.
2127 // 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
2128 // 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
2130 // 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
2131 // 253 SQ_ALU_SRC_LITERAL: literal constant.
2133 // 254 SQ_ALU_SRC_PV: previous vector result.
2134 // 255 SQ_ALU_SRC_PS: previous scalar result.
2135 //--------------------------------------------------------------------------
2137 BITS channel_swizzle
;
2138 switch (scalar_channel_index
)
2140 case 0: channel_swizzle
= pSource
->swizzlex
; break;
2141 case 1: channel_swizzle
= pSource
->swizzley
; break;
2142 case 2: channel_swizzle
= pSource
->swizzlez
; break;
2143 case 3: channel_swizzle
= pSource
->swizzlew
; break;
2144 default: channel_swizzle
= SQ_SEL_MASK
; break;
2147 if(channel_swizzle
== SQ_SEL_0
)
2149 src_sel
= SQ_ALU_SRC_0
;
2151 else if (channel_swizzle
== SQ_SEL_1
)
2153 src_sel
= SQ_ALU_SRC_1
;
2157 if ( (pSource
->rtype
== SRC_REG_TEMPORARY
) ||
2158 (pSource
->rtype
== SRC_REG_GPR
)
2161 src_sel
= pSource
->reg
;
2163 else if (pSource
->rtype
== SRC_REG_CONSTANT
)
2165 /* TODO : 4 const buffers */
2166 if(GL_TRUE
== pAsm
->bUseMemConstant
)
2168 src_sel
= pSource
->reg
+ SQ_ALU_SRC_KCACHE0_BASE
;
2169 pAsm
->kcacheUsed
= SQ_ALU_SRC_KCACHE0_BASE
;
2173 src_sel
= pSource
->reg
+ CFILE_REGISTER_OFFSET
;
2176 else if (pSource
->rtype
== SRC_REC_LITERAL
)
2178 src_sel
= SQ_ALU_SRC_LITERAL
;
2182 radeon_error("Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.\n",
2183 source_index
, pSource
->rtype
);
2188 if( ADDR_ABSOLUTE
== addrmode_PVSSRC(pSource
) )
2190 src_rel
= SQ_ABSOLUTE
;
2194 src_rel
= SQ_RELATIVE
;
2197 switch (channel_swizzle
)
2200 src_chan
= SQ_CHAN_X
;
2203 src_chan
= SQ_CHAN_Y
;
2206 src_chan
= SQ_CHAN_Z
;
2209 src_chan
= SQ_CHAN_W
;
2213 // Does not matter since src_sel controls
2214 src_chan
= SQ_CHAN_X
;
2217 radeon_error("Unknown source select value (%d) in assemble_alu_src().\n", channel_swizzle
);
2222 switch (scalar_channel_index
)
2224 case 0: src_neg
= pSource
->negx
; break;
2225 case 1: src_neg
= pSource
->negy
; break;
2226 case 2: src_neg
= pSource
->negz
; break;
2227 case 3: src_neg
= pSource
->negw
; break;
2228 default: src_neg
= 0; break;
2231 switch (source_index
)
2234 assert(alu_instruction_ptr
);
2235 alu_instruction_ptr
->m_Word0
.f
.src0_sel
= src_sel
;
2236 alu_instruction_ptr
->m_Word0
.f
.src0_rel
= src_rel
;
2237 alu_instruction_ptr
->m_Word0
.f
.src0_chan
= src_chan
;
2238 alu_instruction_ptr
->m_Word0
.f
.src0_neg
= src_neg
;
2241 assert(alu_instruction_ptr
);
2242 alu_instruction_ptr
->m_Word0
.f
.src1_sel
= src_sel
;
2243 alu_instruction_ptr
->m_Word0
.f
.src1_rel
= src_rel
;
2244 alu_instruction_ptr
->m_Word0
.f
.src1_chan
= src_chan
;
2245 alu_instruction_ptr
->m_Word0
.f
.src1_neg
= src_neg
;
2248 assert(alu_instruction_ptr
);
2249 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_sel
= src_sel
;
2250 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_rel
= src_rel
;
2251 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_chan
= src_chan
;
2252 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_neg
= src_neg
;
2255 radeon_error("Only three sources allowed in ALU opcodes.\n");
2263 GLboolean
add_alu_instruction(r700_AssemblerBase
* pAsm
,
2264 R700ALUInstruction
* alu_instruction_ptr
,
2265 GLuint contiguous_slots_needed
)
2267 if( GL_FALSE
== check_current_clause(pAsm
, CF_ALU_CLAUSE
) )
2272 if ( pAsm
->alu_x_opcode
!= 0 ||
2273 pAsm
->cf_current_alu_clause_ptr
== NULL
||
2274 ( (pAsm
->cf_current_alu_clause_ptr
!= NULL
) &&
2275 (pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
>= (GetCFMaxInstructions(pAsm
->cf_current_alu_clause_ptr
->m_ShaderInstType
)-contiguous_slots_needed
-1) )
2279 //new cf inst for this clause
2280 pAsm
->cf_current_alu_clause_ptr
= (R700ControlFlowALUClause
*) CALLOC_STRUCT(R700ControlFlowALUClause
);
2282 // link the new cf to cf segment
2283 if(NULL
!= pAsm
->cf_current_alu_clause_ptr
)
2285 Init_R700ControlFlowALUClause(pAsm
->cf_current_alu_clause_ptr
);
2286 AddCFInstruction( pAsm
->pR700Shader
,
2287 (R700ControlFlowInstruction
*)pAsm
->cf_current_alu_clause_ptr
);
2291 radeon_error("Could not allocate a new ALU CF instruction.\n");
2295 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_bank0
= 0x0;
2296 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_bank1
= 0x0;
2297 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_mode0
= SQ_CF_KCACHE_NOP
;
2299 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_mode1
= SQ_CF_KCACHE_NOP
;
2300 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_addr0
= 0x0;
2301 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_addr1
= 0x0;
2303 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
= 0x0;
2305 if(pAsm
->alu_x_opcode
!= 0)
2307 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.cf_inst
= pAsm
->alu_x_opcode
;
2308 pAsm
->alu_x_opcode
= 0;
2312 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_ALU
;
2315 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
2317 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
2321 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
+= (GetInstructionSize(alu_instruction_ptr
->m_ShaderInstType
) / 2);
2324 /* TODO : handle 4 bufs */
2325 if( (pAsm
->kcacheUsed
> 0) && (GL_TRUE
== pAsm
->bUseMemConstant
) )
2327 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_bank0
= 0x0;
2328 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_bank1
= 0x0;
2329 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_mode0
= SQ_CF_KCACHE_LOCK_2
;
2330 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_mode1
= SQ_CF_KCACHE_NOP
;
2331 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_addr0
= 0x0;
2332 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_addr1
= 0x0;
2335 // If this clause constains any instruction that is forward dependent on a TEX instruction,
2336 // set the whole_quad_mode for this clause
2337 if ( pAsm
->pInstDeps
[pAsm
->uiCurInst
].nDstDep
> (-1) )
2339 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x1;
2342 if (pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
>= (GetCFMaxInstructions(pAsm
->cf_current_alu_clause_ptr
->m_ShaderInstType
)-1) )
2344 alu_instruction_ptr
->m_Word0
.f
.last
= 1;
2347 if(NULL
== pAsm
->cf_current_alu_clause_ptr
->m_pLinkedALUInstruction
)
2349 pAsm
->cf_current_alu_clause_ptr
->m_pLinkedALUInstruction
= alu_instruction_ptr
;
2350 alu_instruction_ptr
->m_pLinkedALUClause
= pAsm
->cf_current_alu_clause_ptr
;
2353 AddALUInstruction(pAsm
->pR700Shader
, alu_instruction_ptr
);
2358 GLboolean
EG_add_ps_interp(r700_AssemblerBase
* pAsm
)
2360 R700ALUInstruction
* alu_instruction_ptr
= NULL
;
2364 unsigned int unWord0Temp
= 0x380C00;
2365 unsigned int unWord1Temp
= 0x146B10; //SQ_SEL_X
2369 for(ui
=(pAsm
->uIIns
-1); ui
>=0; ui
--)
2371 for(uj
=0; uj
<8; uj
++)
2373 alu_instruction_ptr
= (R700ALUInstruction
*) CALLOC_STRUCT(R700ALUInstruction
);
2374 Init_R700ALUInstruction(alu_instruction_ptr
);
2375 alu_instruction_ptr
->m_Word0
.val
= unWord0Temp
;
2376 alu_instruction_ptr
->m_Word1
.val
= unWord1Temp
;
2380 SETfield(alu_instruction_ptr
->m_Word1
.val
, EG_OP2_INST_INTERP_ZW
,
2381 EG_ALU_WORD1_OP2__ALU_INST_shift
, EG_ALU_WORD1_OP2__ALU_INST_mask
);
2385 SETfield(alu_instruction_ptr
->m_Word1
.val
, EG_OP2_INST_INTERP_XY
,
2386 EG_ALU_WORD1_OP2__ALU_INST_shift
, EG_ALU_WORD1_OP2__ALU_INST_mask
);
2388 if( (uj
> 1) && (uj
< 6) )
2390 SETfield(alu_instruction_ptr
->m_Word1
.val
, 1,
2391 EG_ALU_WORD1_OP2__WRITE_MASK_shift
, EG_ALU_WORD1_OP2__WRITE_MASK_bit
);
2395 SETfield(alu_instruction_ptr
->m_Word1
.val
, 0,
2396 EG_ALU_WORD1_OP2__WRITE_MASK_shift
, EG_ALU_WORD1_OP2__WRITE_MASK_bit
);
2398 if( (uj
> 1) && (uj
< 6) )
2400 SETfield(alu_instruction_ptr
->m_Word1
.val
, ui
,
2401 EG_ALU_WORD1__DST_GPR_shift
, EG_ALU_WORD1__DST_GPR_mask
);
2405 SETfield(alu_instruction_ptr
->m_Word1
.val
, 111,
2406 EG_ALU_WORD1__DST_GPR_shift
, EG_ALU_WORD1__DST_GPR_mask
);
2409 SETfield(alu_instruction_ptr
->m_Word1
.val
, (uj
% 4),
2410 EG_ALU_WORD1__DST_CHAN_shift
, EG_ALU_WORD1__DST_CHAN_mask
);
2411 SETfield(alu_instruction_ptr
->m_Word0
.val
, (1 - (uj
% 2)),
2412 EG_ALU_WORD0__SRC0_CHAN_shift
, EG_ALU_WORD0__SRC0_CHAN_mask
);
2413 SETfield(alu_instruction_ptr
->m_Word0
.val
, (EG_ALU_SRC_PARAM_BASE
+ ui
),
2414 EG_ALU_WORD0__SRC1_SEL_shift
, EG_ALU_WORD0__SRC1_SEL_mask
);
2417 SETfield(alu_instruction_ptr
->m_Word0
.val
, 1,
2418 EG_ALU_WORD0__LAST_shift
, EG_ALU_WORD0__LAST_bit
);
2421 if(GL_FALSE
== add_alu_instruction(pAsm
, alu_instruction_ptr
, 4) )
2432 void get_src_properties(R700ALUInstruction
* alu_instruction_ptr
,
2439 switch (source_index
)
2442 *psrc_sel
= alu_instruction_ptr
->m_Word0
.f
.src0_sel
;
2443 *psrc_rel
= alu_instruction_ptr
->m_Word0
.f
.src0_rel
;
2444 *psrc_chan
= alu_instruction_ptr
->m_Word0
.f
.src0_chan
;
2445 *psrc_neg
= alu_instruction_ptr
->m_Word0
.f
.src0_neg
;
2449 *psrc_sel
= alu_instruction_ptr
->m_Word0
.f
.src1_sel
;
2450 *psrc_rel
= alu_instruction_ptr
->m_Word0
.f
.src1_rel
;
2451 *psrc_chan
= alu_instruction_ptr
->m_Word0
.f
.src1_chan
;
2452 *psrc_neg
= alu_instruction_ptr
->m_Word0
.f
.src1_neg
;
2456 *psrc_sel
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_sel
;
2457 *psrc_rel
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_rel
;
2458 *psrc_chan
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_chan
;
2459 *psrc_neg
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_neg
;
2464 int is_cfile(BITS sel
)
2466 if (sel
> 255 && sel
< 512)
2473 int is_const(BITS sel
)
2479 else if(sel
>= SQ_ALU_SRC_0
&& sel
<= SQ_ALU_SRC_LITERAL
)
2486 int is_gpr(BITS sel
)
2488 if (sel
>= 0 && sel
< 128)
2495 const GLuint BANK_SWIZZLE_VEC
[8] = {SQ_ALU_VEC_210
, //000
2496 SQ_ALU_VEC_120
, //001
2497 SQ_ALU_VEC_102
, //010
2499 SQ_ALU_VEC_201
, //011
2500 SQ_ALU_VEC_012
, //100
2501 SQ_ALU_VEC_021
, //101
2503 SQ_ALU_VEC_012
, //110
2504 SQ_ALU_VEC_012
}; //111
2506 const GLuint BANK_SWIZZLE_SCL
[8] = {SQ_ALU_SCL_210
, //000
2507 SQ_ALU_SCL_122
, //001
2508 SQ_ALU_SCL_122
, //010
2510 SQ_ALU_SCL_221
, //011
2511 SQ_ALU_SCL_212
, //100
2512 SQ_ALU_SCL_122
, //101
2514 SQ_ALU_SCL_122
, //110
2515 SQ_ALU_SCL_122
}; //111
2517 GLboolean
reserve_cfile(r700_AssemblerBase
* pAsm
,
2521 int res_match
= (-1);
2522 int res_empty
= (-1);
2526 for (res
=3; res
>=0; res
--)
2528 if(pAsm
->hw_cfile_addr
[ res
] < 0)
2532 else if( (pAsm
->hw_cfile_addr
[res
] == (int)sel
)
2534 (pAsm
->hw_cfile_chan
[ res
] == (int) chan
) )
2542 // Read for this scalar component already reserved, nothing to do here.
2545 else if(res_empty
>= 0)
2547 pAsm
->hw_cfile_addr
[ res_empty
] = sel
;
2548 pAsm
->hw_cfile_chan
[ res_empty
] = chan
;
2552 radeon_error("All cfile read ports are used, cannot reference C$sel, channel $chan.\n");
2558 GLboolean
reserve_gpr(r700_AssemblerBase
* pAsm
, GLuint sel
, GLuint chan
, GLuint cycle
)
2560 if(pAsm
->hw_gpr
[cycle
][chan
] < 0)
2562 pAsm
->hw_gpr
[cycle
][chan
] = sel
;
2564 else if(pAsm
->hw_gpr
[cycle
][chan
] != (int)sel
)
2566 radeon_error("Another scalar operation has already used GPR read port for given channel\n");
2573 GLboolean
cycle_for_scalar_bank_swizzle(const int swiz
, const int sel
, GLuint
* pCycle
)
2577 case SQ_ALU_SCL_210
:
2579 int table
[3] = {2, 1, 0};
2580 *pCycle
= table
[sel
];
2584 case SQ_ALU_SCL_122
:
2586 int table
[3] = {1, 2, 2};
2587 *pCycle
= table
[sel
];
2591 case SQ_ALU_SCL_212
:
2593 int table
[3] = {2, 1, 2};
2594 *pCycle
= table
[sel
];
2598 case SQ_ALU_SCL_221
:
2600 int table
[3] = {2, 2, 1};
2601 *pCycle
= table
[sel
];
2606 radeon_error("Bad Scalar bank swizzle value\n");
2613 GLboolean
cycle_for_vector_bank_swizzle(const int swiz
, const int sel
, GLuint
* pCycle
)
2617 case SQ_ALU_VEC_012
:
2619 int table
[3] = {0, 1, 2};
2620 *pCycle
= table
[sel
];
2623 case SQ_ALU_VEC_021
:
2625 int table
[3] = {0, 2, 1};
2626 *pCycle
= table
[sel
];
2629 case SQ_ALU_VEC_120
:
2631 int table
[3] = {1, 2, 0};
2632 *pCycle
= table
[sel
];
2635 case SQ_ALU_VEC_102
:
2637 int table
[3] = {1, 0, 2};
2638 *pCycle
= table
[sel
];
2641 case SQ_ALU_VEC_201
:
2643 int table
[3] = {2, 0, 1};
2644 *pCycle
= table
[sel
];
2647 case SQ_ALU_VEC_210
:
2649 int table
[3] = {2, 1, 0};
2650 *pCycle
= table
[sel
];
2654 radeon_error("Bad Vec bank swizzle value\n");
2662 GLboolean
check_scalar(r700_AssemblerBase
* pAsm
,
2663 R700ALUInstruction
* alu_instruction_ptr
)
2666 GLuint bank_swizzle
;
2667 GLuint const_count
= 0;
2676 BITS src_sel
[3] = {0,0,0};
2677 BITS src_chan
[3] = {0,0,0};
2678 BITS src_rel
[3] = {0,0,0};
2679 BITS src_neg
[3] = {0,0,0};
2682 GLuint number_of_operands
;
2684 if(8 == pAsm
->unAsic
)
2686 number_of_operands
= EG_GetNumOperands(pAsm
->D
.dst
.opcode
, pAsm
->D
.dst
.op3
);
2690 number_of_operands
= r700GetNumOperands(pAsm
->D
.dst
.opcode
, pAsm
->D
.dst
.op3
);
2693 for (src
=0; src
<number_of_operands
; src
++)
2695 get_src_properties(alu_instruction_ptr
,
2704 swizzle_key
= ( (is_const( src_sel
[0] ) ? 4 : 0) +
2705 (is_const( src_sel
[1] ) ? 2 : 0) +
2706 (is_const( src_sel
[2] ) ? 1 : 0) );
2708 alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
= BANK_SWIZZLE_SCL
[ swizzle_key
];
2710 for (src
=0; src
<number_of_operands
; src
++)
2712 sel
= src_sel
[src
];
2713 chan
= src_chan
[src
];
2714 rel
= src_rel
[src
];
2715 neg
= src_neg
[src
];
2717 if (is_const( sel
))
2719 // Any constant, including literal and inline constants
2722 if (is_cfile( sel
))
2724 reserve_cfile(pAsm
, sel
, chan
);
2730 for (src
=0; src
<number_of_operands
; src
++)
2732 sel
= src_sel
[src
];
2733 chan
= src_chan
[src
];
2734 rel
= src_rel
[src
];
2735 neg
= src_neg
[src
];
2739 bank_swizzle
= alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
;
2741 if( GL_FALSE
== cycle_for_scalar_bank_swizzle(bank_swizzle
, src
, &cycle
) )
2746 if(cycle
< const_count
)
2748 if( GL_FALSE
== reserve_gpr(pAsm
, sel
, chan
, cycle
) )
2759 GLboolean
check_vector(r700_AssemblerBase
* pAsm
,
2760 R700ALUInstruction
* alu_instruction_ptr
)
2763 GLuint bank_swizzle
;
2764 GLuint const_count
= 0;
2773 BITS src_sel
[3] = {0,0,0};
2774 BITS src_chan
[3] = {0,0,0};
2775 BITS src_rel
[3] = {0,0,0};
2776 BITS src_neg
[3] = {0,0,0};
2779 GLuint number_of_operands
;
2781 if(8 == pAsm
->unAsic
)
2783 number_of_operands
= EG_GetNumOperands(pAsm
->D
.dst
.opcode
, pAsm
->D
.dst
.op3
);
2787 number_of_operands
= r700GetNumOperands(pAsm
->D
.dst
.opcode
, pAsm
->D
.dst
.op3
);
2790 for (src
=0; src
<number_of_operands
; src
++)
2792 get_src_properties(alu_instruction_ptr
,
2801 swizzle_key
= ( (is_const( src_sel
[0] ) ? 4 : 0) +
2802 (is_const( src_sel
[1] ) ? 2 : 0) +
2803 (is_const( src_sel
[2] ) ? 1 : 0)
2806 alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
= BANK_SWIZZLE_VEC
[swizzle_key
];
2808 for (src
=0; src
<number_of_operands
; src
++)
2810 sel
= src_sel
[src
];
2811 chan
= src_chan
[src
];
2812 rel
= src_rel
[src
];
2813 neg
= src_neg
[src
];
2816 bank_swizzle
= alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
;
2820 if( GL_FALSE
== cycle_for_vector_bank_swizzle(bank_swizzle
, src
, &cycle
) )
2826 (sel
== src_sel
[0]) &&
2827 (chan
== src_chan
[0]) )
2832 if( GL_FALSE
== reserve_gpr(pAsm
, sel
, chan
, cycle
) )
2838 else if( is_const(sel
) )
2844 if( GL_FALSE
== reserve_cfile(pAsm
, sel
, chan
) )
2855 GLboolean
assemble_alu_instruction(r700_AssemblerBase
*pAsm
)
2857 R700ALUInstruction
* alu_instruction_ptr
= NULL
;
2858 R700ALUInstructionHalfLiteral
* alu_instruction_ptr_hl
;
2859 R700ALUInstructionFullLiteral
* alu_instruction_ptr_fl
;
2861 GLuint number_of_scalar_operations
;
2862 GLboolean is_single_scalar_operation
;
2863 GLuint scalar_channel_index
;
2865 PVSSRC
* pcurrent_source
;
2866 int current_source_index
;
2867 GLuint contiguous_slots_needed
;
2869 GLboolean bSplitInst
;
2871 if(8 == pAsm
->unAsic
)
2873 uNumSrc
= EG_GetNumOperands(pAsm
->D
.dst
.opcode
, pAsm
->D
.dst
.op3
);
2877 uNumSrc
= r700GetNumOperands(pAsm
->D
.dst
.opcode
, pAsm
->D
.dst
.op3
);
2880 //GLuint channel_swizzle, j;
2881 //GLuint chan_counter[4] = {0, 0, 0, 0};
2882 //PVSSRC * pSource[3];
2883 bSplitInst
= GL_FALSE
;
2884 pAsm
->kcacheUsed
= 0;
2886 if (1 == pAsm
->D
.dst
.math
)
2888 is_single_scalar_operation
= GL_TRUE
;
2889 number_of_scalar_operations
= 1;
2893 is_single_scalar_operation
= GL_FALSE
;
2894 number_of_scalar_operations
= 4;
2896 /* current assembler doesn't do more than 1 register per source */
2898 /* check read port, only very preliminary algorithm, not count in
2899 src0/1 same comp case and prev slot repeat case; also not count relative
2900 addressing. TODO: improve performance. */
2901 for(j
=0; j
<uNumSrc
; j
++)
2903 pSource
[j
] = &(pAsm
->S
[j
].src
);
2905 for(scalar_channel_index
=0; scalar_channel_index
<4; scalar_channel_index
++)
2907 for(j
=0; j
<uNumSrc
; j
++)
2909 switch (scalar_channel_index
)
2911 case 0: channel_swizzle
= pSource
[j
]->swizzlex
; break;
2912 case 1: channel_swizzle
= pSource
[j
]->swizzley
; break;
2913 case 2: channel_swizzle
= pSource
[j
]->swizzlez
; break;
2914 case 3: channel_swizzle
= pSource
[j
]->swizzlew
; break;
2915 default: channel_swizzle
= SQ_SEL_MASK
; break;
2917 if ( ((pSource
[j
]->rtype
== SRC_REG_TEMPORARY
) ||
2918 (pSource
[j
]->rtype
== SRC_REG_GPR
))
2919 && (channel_swizzle
<= SQ_SEL_W
) )
2921 chan_counter
[channel_swizzle
]++;
2925 if( (chan_counter
[SQ_SEL_X
] > 3)
2926 || (chan_counter
[SQ_SEL_Y
] > 3)
2927 || (chan_counter
[SQ_SEL_Z
] > 3)
2928 || (chan_counter
[SQ_SEL_W
] > 3) ) /* each chan bank has only 3 ports. */
2930 bSplitInst
= GL_TRUE
;
2935 contiguous_slots_needed
= 0;
2937 if(!is_single_scalar_operation
)
2939 contiguous_slots_needed
= 4;
2942 contiguous_slots_needed
+= pAsm
->D2
.dst2
.literal_slots
;
2946 for (scalar_channel_index
=0;
2947 scalar_channel_index
< number_of_scalar_operations
;
2948 scalar_channel_index
++)
2950 if(scalar_channel_index
== (number_of_scalar_operations
-1))
2952 switch(pAsm
->D2
.dst2
.literal_slots
)
2955 alu_instruction_ptr
= (R700ALUInstruction
*) CALLOC_STRUCT(R700ALUInstruction
);
2956 Init_R700ALUInstruction(alu_instruction_ptr
);
2959 alu_instruction_ptr_hl
= (R700ALUInstructionHalfLiteral
*) CALLOC_STRUCT(R700ALUInstructionHalfLiteral
);
2960 Init_R700ALUInstructionHalfLiteral(alu_instruction_ptr_hl
, pAsm
->C
[0].f
, pAsm
->C
[1].f
);
2961 alu_instruction_ptr
= (R700ALUInstruction
*)alu_instruction_ptr_hl
;
2964 alu_instruction_ptr_fl
= (R700ALUInstructionFullLiteral
*) CALLOC_STRUCT(R700ALUInstructionFullLiteral
);
2965 Init_R700ALUInstructionFullLiteral(alu_instruction_ptr_fl
,pAsm
->C
[0].f
, pAsm
->C
[1].f
, pAsm
->C
[2].f
, pAsm
->C
[3].f
);
2966 alu_instruction_ptr
= (R700ALUInstruction
*)alu_instruction_ptr_fl
;
2972 alu_instruction_ptr
= (R700ALUInstruction
*) CALLOC_STRUCT(R700ALUInstruction
);
2973 Init_R700ALUInstruction(alu_instruction_ptr
);
2977 current_source_index
= 0;
2978 pcurrent_source
= &(pAsm
->S
[0].src
);
2980 if (GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2981 current_source_index
,
2983 scalar_channel_index
,
2992 current_source_index
= 1;
2993 pcurrent_source
= &(pAsm
->S
[current_source_index
].src
);
2995 if (GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2996 current_source_index
,
2998 scalar_channel_index
,
3006 alu_instruction_ptr
->m_Word0
.f
.index_mode
= pAsm
->D2
.dst2
.index_mode
;
3008 if( (is_single_scalar_operation
== GL_TRUE
)
3009 || (GL_TRUE
== bSplitInst
) )
3011 alu_instruction_ptr
->m_Word0
.f
.last
= 1;
3015 alu_instruction_ptr
->m_Word0
.f
.last
= (scalar_channel_index
== 3) ? 1 : 0;
3018 alu_instruction_ptr
->m_Word0
.f
.pred_sel
= (pAsm
->D
.dst
.pred_inv
> 0) ? 1 : 0;
3019 if(1 == pAsm
->D
.dst
.predicated
)
3021 alu_instruction_ptr
->m_Word1_OP2
.f
.update_pred
= 0x1;
3022 alu_instruction_ptr
->m_Word1_OP2
.f
.update_execute_mask
= 0x1;
3026 alu_instruction_ptr
->m_Word1_OP2
.f
.update_pred
= 0x0;
3027 alu_instruction_ptr
->m_Word1_OP2
.f
.update_execute_mask
= 0x0;
3031 if( (pAsm
->D
.dst
.rtype
== DST_REG_TEMPORARY
) ||
3032 (pAsm
->D
.dst
.rtype
== DST_REG_OUT
) )
3034 alu_instruction_ptr
->m_Word1
.f
.dst_gpr
= pAsm
->D
.dst
.reg
;
3038 radeon_error("Only temp destination registers supported for ALU dest regs.\n");
3042 if ( ADDR_RELATIVE_A0
== addrmode_PVSDST(&(pAsm
->D
.dst
)) )
3044 alu_instruction_ptr
->m_Word1
.f
.dst_rel
= SQ_RELATIVE
;
3048 alu_instruction_ptr
->m_Word1
.f
.dst_rel
= SQ_ABSOLUTE
;
3051 if ( is_single_scalar_operation
== GL_TRUE
)
3053 // Override scalar_channel_index since only one scalar value will be written
3054 if(pAsm
->D
.dst
.writex
)
3056 scalar_channel_index
= 0;
3058 else if(pAsm
->D
.dst
.writey
)
3060 scalar_channel_index
= 1;
3062 else if(pAsm
->D
.dst
.writez
)
3064 scalar_channel_index
= 2;
3066 else if(pAsm
->D
.dst
.writew
)
3068 scalar_channel_index
= 3;
3072 alu_instruction_ptr
->m_Word1
.f
.dst_chan
= scalar_channel_index
;
3074 alu_instruction_ptr
->m_Word1
.f
.clamp
= pAsm
->D2
.dst2
.SaturateMode
;
3076 if (pAsm
->D
.dst
.op3
)
3080 alu_instruction_ptr
->m_Word1_OP3
.f
.alu_inst
= pAsm
->D
.dst
.opcode
;
3082 //There's 3rd src for op3
3083 current_source_index
= 2;
3084 pcurrent_source
= &(pAsm
->S
[current_source_index
].src
);
3086 if ( GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
3087 current_source_index
,
3089 scalar_channel_index
,
3100 alu_instruction_ptr
->m_Word1_OP2
.f6
.alu_inst
= pAsm
->D
.dst
.opcode
;
3102 alu_instruction_ptr
->m_Word1_OP2
.f6
.src0_abs
= pAsm
->S
[0].src
.abs
;
3103 alu_instruction_ptr
->m_Word1_OP2
.f6
.src1_abs
= pAsm
->S
[1].src
.abs
;
3105 //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
3106 //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0;
3107 switch (scalar_channel_index
)
3110 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writex
;
3113 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writey
;
3116 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writez
;
3119 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writew
;
3122 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= 1; //SQ_SEL_MASK;
3125 alu_instruction_ptr
->m_Word1_OP2
.f6
.omod
= SQ_ALU_OMOD_OFF
;
3129 alu_instruction_ptr
->m_Word1_OP2
.f
.alu_inst
= pAsm
->D
.dst
.opcode
;
3131 alu_instruction_ptr
->m_Word1_OP2
.f
.src0_abs
= pAsm
->S
[0].src
.abs
;
3132 alu_instruction_ptr
->m_Word1_OP2
.f
.src1_abs
= pAsm
->S
[1].src
.abs
;
3134 //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
3135 //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
3136 switch (scalar_channel_index
)
3139 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writex
;
3142 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writey
;
3145 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writez
;
3148 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writew
;
3151 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= 1; //SQ_SEL_MASK;
3154 alu_instruction_ptr
->m_Word1_OP2
.f
.omod
= SQ_ALU_OMOD_OFF
;
3158 if(GL_FALSE
== add_alu_instruction(pAsm
, alu_instruction_ptr
, contiguous_slots_needed
) )
3164 * Judge the type of current instruction, is it vector or scalar
3167 if (is_single_scalar_operation
)
3169 if(GL_FALSE
== check_scalar(pAsm
, alu_instruction_ptr
) )
3176 if(GL_FALSE
== check_vector(pAsm
, alu_instruction_ptr
) )
3182 contiguous_slots_needed
-= 1;
3188 GLboolean
assemble_math_function(r700_AssemblerBase
* pAsm
, BITS opcode
)
3194 tmp
= gethelpr(pAsm
);
3196 // opcode tmp.x, a.x
3199 pAsm
->D
.dst
.opcode
= opcode
;
3200 pAsm
->D
.dst
.math
= 1;
3202 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3203 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3204 pAsm
->D
.dst
.reg
= tmp
;
3205 pAsm
->D
.dst
.writex
= 1;
3207 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3212 if( pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_RSQ
)
3213 pAsm
->S
[0].src
.abs
= 1;
3215 if ( GL_FALSE
== next_ins(pAsm
) )
3220 // Now replicate result to all necessary channels in destination
3221 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3223 if( GL_FALSE
== assemble_dst(pAsm
) )
3228 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3229 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3230 pAsm
->S
[0].src
.reg
= tmp
;
3232 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3233 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3235 if( GL_FALSE
== next_ins(pAsm
) )
3243 GLboolean
assemble_ABS(r700_AssemblerBase
*pAsm
)
3247 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
3249 if( GL_FALSE
== assemble_dst(pAsm
) )
3253 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3258 pAsm
->S
[1].bits
= pAsm
->S
[0].bits
;
3259 flipneg_PVSSRC(&(pAsm
->S
[1].src
));
3261 if ( GL_FALSE
== next_ins(pAsm
) )
3269 GLboolean
assemble_ADD(r700_AssemblerBase
*pAsm
)
3271 if( GL_FALSE
== checkop2(pAsm
) )
3276 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
3278 if( GL_FALSE
== assemble_dst(pAsm
) )
3283 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3288 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3293 if(pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_SUB
)
3295 flipneg_PVSSRC(&(pAsm
->S
[1].src
));
3298 if( GL_FALSE
== next_ins(pAsm
) )
3306 GLboolean
assemble_ARL(r700_AssemblerBase
*pAsm
)
3307 { /* TODO: ar values dont' persist between clauses */
3308 if( GL_FALSE
== checkop1(pAsm
) )
3313 if(8 == pAsm
->unAsic
)
3317 /* Float to Signed Integer Using FLOOR */
3318 pAsm
->D
.dst
.opcode
= EG_OP2_INST_FLT_TO_INT_FLOOR
;
3319 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3320 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3321 pAsm
->D
.dst
.reg
= 0;
3322 pAsm
->D
.dst
.writex
= 0;
3323 pAsm
->D
.dst
.writey
= 0;
3324 pAsm
->D
.dst
.writez
= 0;
3325 pAsm
->D
.dst
.writew
= 0;
3327 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3332 if( GL_FALSE
== next_ins(pAsm
) )
3337 /* Copy Signed Integer To Integer in AR and GPR */
3338 pAsm
->D
.dst
.opcode
= EG_OP2_INST_MOVA_INT
;
3339 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3340 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3341 pAsm
->D
.dst
.reg
= 0;
3342 pAsm
->D
.dst
.writex
= 0;
3343 pAsm
->D
.dst
.writey
= 0;
3344 pAsm
->D
.dst
.writez
= 0;
3345 pAsm
->D
.dst
.writew
= 0;
3347 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3352 if( GL_FALSE
== next_ins(pAsm
) )
3361 /* Truncate floating-point to the nearest integer
3362 in the range [-256, +255], and copy to AR and
3365 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOVA_FLOOR
;
3366 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3367 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3368 pAsm
->D
.dst
.reg
= 0;
3369 pAsm
->D
.dst
.writex
= 0;
3370 pAsm
->D
.dst
.writey
= 0;
3371 pAsm
->D
.dst
.writez
= 0;
3372 pAsm
->D
.dst
.writew
= 0;
3374 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3379 if( GL_FALSE
== next_ins(pAsm
) )
3388 GLboolean
assemble_BAD(char *opcode_str
)
3390 radeon_error("Not yet implemented instruction (%s)\n", opcode_str
);
3394 GLboolean
assemble_CMP(r700_AssemblerBase
*pAsm
)
3398 if( GL_FALSE
== checkop3(pAsm
) )
3403 if(8 == pAsm
->unAsic
)
3405 pAsm
->D
.dst
.opcode
= EG_OP3_INST_CNDGE
;
3409 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_CNDGE
;
3411 pAsm
->D
.dst
.op3
= 1;
3415 if(0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
3417 //OP3 has no support for write mask
3418 tmp
= gethelpr(pAsm
);
3420 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3421 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3422 pAsm
->D
.dst
.reg
= tmp
;
3424 nomask_PVSDST(&(pAsm
->D
.dst
));
3428 if( GL_FALSE
== assemble_dst(pAsm
) )
3434 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3439 if( GL_FALSE
== assemble_src(pAsm
, 2, 1) )
3444 if( GL_FALSE
== assemble_src(pAsm
, 1, 2) )
3449 if ( GL_FALSE
== next_ins(pAsm
) )
3454 if (0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
3456 if( GL_FALSE
== assemble_dst(pAsm
) )
3461 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3464 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3465 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3466 pAsm
->S
[0].src
.reg
= tmp
;
3468 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3469 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3471 if( GL_FALSE
== next_ins(pAsm
) )
3480 GLboolean
assemble_TRIG(r700_AssemblerBase
*pAsm
, BITS opcode
)
3483 * r600 - trunc to -PI..PI range
3484 * r700 - normalize by dividing by 2PI
3491 tmp
= gethelpr(pAsm
);
3492 if(8 == pAsm
->unAsic
)
3494 pAsm
->D
.dst
.opcode
= EG_OP3_INST_MULADD
;
3498 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
3500 pAsm
->D
.dst
.op3
= 1;
3502 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3503 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3504 pAsm
->D
.dst
.reg
= tmp
;
3506 assemble_src(pAsm
, 0, -1);
3508 pAsm
->S
[1].src
.rtype
= SRC_REC_LITERAL
;
3509 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_X
);
3511 pAsm
->S
[2].src
.rtype
= SRC_REC_LITERAL
;
3512 setswizzle_PVSSRC(&(pAsm
->S
[2].src
), SQ_SEL_Y
);
3514 pAsm
->D2
.dst2
.literal_slots
= 1;
3515 pAsm
->C
[0].f
= 1/(3.1415926535 * 2);
3516 pAsm
->C
[1].f
= 0.5f
;
3518 if ( GL_FALSE
== next_ins(pAsm
) )
3523 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FRACT
;
3525 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3526 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3527 pAsm
->D
.dst
.reg
= tmp
;
3528 pAsm
->D
.dst
.writex
= 1;
3530 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3531 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3532 pAsm
->S
[0].src
.reg
= tmp
;
3533 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3535 if(( GL_FALSE
== next_ins(pAsm
) ))
3539 if(8 == pAsm
->unAsic
)
3541 pAsm
->D
.dst
.opcode
= EG_OP3_INST_MULADD
;
3545 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
3547 pAsm
->D
.dst
.op3
= 1;
3549 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3550 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3551 pAsm
->D
.dst
.reg
= tmp
;
3553 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3554 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3555 pAsm
->S
[0].src
.reg
= tmp
;
3556 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3558 pAsm
->S
[1].src
.rtype
= SRC_REC_LITERAL
;
3559 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_X
);
3561 pAsm
->S
[2].src
.rtype
= SRC_REC_LITERAL
;
3562 setswizzle_PVSSRC(&(pAsm
->S
[2].src
), SQ_SEL_Y
);
3564 pAsm
->D2
.dst2
.literal_slots
= 1;
3568 pAsm
->C
[0].f
= 3.1415926535897f
* 2.0f
;
3569 pAsm
->C
[1].f
= -3.1415926535897f
;
3573 pAsm
->C
[0].f
= 1.0f
;
3574 pAsm
->C
[1].f
= -0.5f
;
3577 if(( GL_FALSE
== next_ins(pAsm
) ))
3582 pAsm
->D
.dst
.opcode
= opcode
;
3583 pAsm
->D
.dst
.math
= 1;
3587 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3588 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3589 pAsm
->S
[0].src
.reg
= tmp
;
3590 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3591 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3595 //TODO - replicate if more channels set in WriteMask
3600 GLboolean
assemble_DOT(r700_AssemblerBase
*pAsm
)
3602 if( GL_FALSE
== checkop2(pAsm
) )
3607 if(8 == pAsm
->unAsic
)
3609 pAsm
->D
.dst
.opcode
= EG_OP2_INST_DOT4
;
3613 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_DOT4
;
3616 if( GL_FALSE
== assemble_dst(pAsm
) )
3621 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3626 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3631 if(OPCODE_DP2
== pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
)
3633 zerocomp_PVSSRC(&(pAsm
->S
[0].src
),2);
3634 zerocomp_PVSSRC(&(pAsm
->S
[0].src
),3);
3635 zerocomp_PVSSRC(&(pAsm
->S
[1].src
),2);
3636 zerocomp_PVSSRC(&(pAsm
->S
[1].src
),3);
3638 else if(OPCODE_DP3
== pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
)
3640 zerocomp_PVSSRC(&(pAsm
->S
[0].src
), 3);
3641 zerocomp_PVSSRC(&(pAsm
->S
[1].src
), 3);
3643 else if(pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_DPH
)
3645 onecomp_PVSSRC(&(pAsm
->S
[0].src
), 3);
3648 if ( GL_FALSE
== next_ins(pAsm
) )
3656 GLboolean
assemble_DST(r700_AssemblerBase
*pAsm
)
3658 if( GL_FALSE
== checkop2(pAsm
) )
3663 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
3665 if( GL_FALSE
== assemble_dst(pAsm
) )
3670 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3675 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3680 onecomp_PVSSRC(&(pAsm
->S
[0].src
), 0);
3681 onecomp_PVSSRC(&(pAsm
->S
[0].src
), 3);
3683 onecomp_PVSSRC(&(pAsm
->S
[1].src
), 0);
3684 onecomp_PVSSRC(&(pAsm
->S
[1].src
), 2);
3686 if ( GL_FALSE
== next_ins(pAsm
) )
3694 GLboolean
assemble_EX2(r700_AssemblerBase
*pAsm
)
3696 if(8 == pAsm
->unAsic
)
3698 return assemble_math_function(pAsm
, EG_OP2_INST_EXP_IEEE
);
3701 return assemble_math_function(pAsm
, SQ_OP2_INST_EXP_IEEE
);
3704 GLboolean
assemble_EXP(r700_AssemblerBase
*pAsm
)
3710 tmp
= gethelpr(pAsm
);
3715 if (pAsm
->pILInst
->DstReg
.WriteMask
& 0x1) {
3716 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FLOOR
;
3718 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3719 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3720 pAsm
->D
.dst
.reg
= tmp
;
3721 pAsm
->D
.dst
.writex
= 1;
3723 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3728 if( GL_FALSE
== next_ins(pAsm
) )
3733 if(8 == pAsm
->unAsic
)
3735 pAsm
->D
.dst
.opcode
= EG_OP2_INST_EXP_IEEE
;
3739 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
3741 pAsm
->D
.dst
.math
= 1;
3743 if( GL_FALSE
== assemble_dst(pAsm
) )
3748 pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writez
= pAsm
->D
.dst
.writew
= 0;
3750 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3751 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3752 pAsm
->S
[0].src
.reg
= tmp
;
3754 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3755 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3757 if( GL_FALSE
== next_ins(pAsm
) )
3765 if ((pAsm
->pILInst
->DstReg
.WriteMask
>> 1) & 0x1) {
3766 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FRACT
;
3768 if( GL_FALSE
== assemble_dst(pAsm
) )
3773 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3778 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writez
= pAsm
->D
.dst
.writew
= 0;
3780 if( GL_FALSE
== next_ins(pAsm
) )
3788 if ((pAsm
->pILInst
->DstReg
.WriteMask
>> 2) & 0x1) {
3789 if(8 == pAsm
->unAsic
)
3791 pAsm
->D
.dst
.opcode
= EG_OP2_INST_EXP_IEEE
;
3795 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
3797 pAsm
->D
.dst
.math
= 1;
3799 if( GL_FALSE
== assemble_dst(pAsm
) )
3804 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3809 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writew
= 0;
3811 if( GL_FALSE
== next_ins(pAsm
) )
3819 if ((pAsm
->pILInst
->DstReg
.WriteMask
>> 3) & 0x1) {
3820 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3822 if( GL_FALSE
== assemble_dst(pAsm
) )
3827 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writez
= 0;
3829 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3830 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3831 pAsm
->S
[0].src
.reg
= tmp
;
3833 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_1
);
3834 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3836 if( GL_FALSE
== next_ins(pAsm
) )
3845 GLboolean
assemble_FLR(r700_AssemblerBase
*pAsm
)
3849 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FLOOR
;
3851 if ( GL_FALSE
== assemble_dst(pAsm
) )
3856 if ( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3861 if ( GL_FALSE
== next_ins(pAsm
) )
3869 GLboolean
assemble_FLR_INT(r700_AssemblerBase
*pAsm
)
3871 if(8 == pAsm
->unAsic
)
3873 return assemble_math_function(pAsm
, EG_OP2_INST_FLT_TO_INT
);
3876 return assemble_math_function(pAsm
, SQ_OP2_INST_FLT_TO_INT
);
3879 GLboolean
assemble_FRC(r700_AssemblerBase
*pAsm
)
3883 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FRACT
;
3885 if ( GL_FALSE
== assemble_dst(pAsm
) )
3890 if ( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3895 if ( GL_FALSE
== next_ins(pAsm
) )
3903 GLboolean
assemble_KIL(r700_AssemblerBase
*pAsm
, GLuint opcode
)
3905 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
3907 if(pILInst
->Opcode
== OPCODE_KIL
)
3910 pAsm
->D
.dst
.opcode
= opcode
;
3911 //pAsm->D.dst.math = 1;
3913 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3914 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3915 pAsm
->D
.dst
.reg
= 0;
3916 pAsm
->D
.dst
.writex
= 0;
3917 pAsm
->D
.dst
.writey
= 0;
3918 pAsm
->D
.dst
.writez
= 0;
3919 pAsm
->D
.dst
.writew
= 0;
3921 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3922 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3923 pAsm
->S
[0].src
.reg
= 0;
3924 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_0
);
3925 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3927 if(pILInst
->Opcode
== OPCODE_KIL_NV
)
3929 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
3930 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
3931 pAsm
->S
[1].src
.reg
= 0;
3932 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_1
);
3933 neg_PVSSRC(&(pAsm
->S
[1].src
));
3937 if( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
3944 if ( GL_FALSE
== next_ins(pAsm
) )
3949 /* Doc says KILL has to be last(end) ALU clause */
3950 pAsm
->pR700Shader
->killIsUsed
= GL_TRUE
;
3951 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
3956 GLboolean
assemble_LG2(r700_AssemblerBase
*pAsm
)
3958 if(8 == pAsm
->unAsic
)
3960 return assemble_math_function(pAsm
, EG_OP2_INST_LOG_IEEE
);
3963 return assemble_math_function(pAsm
, SQ_OP2_INST_LOG_IEEE
);
3966 GLboolean
assemble_LRP(r700_AssemblerBase
*pAsm
)
3970 if( GL_FALSE
== checkop3(pAsm
) )
3975 tmp
= gethelpr(pAsm
);
3977 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
3979 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3980 pAsm
->D
.dst
.reg
= tmp
;
3981 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3982 nomask_PVSDST(&(pAsm
->D
.dst
));
3985 if( GL_FALSE
== assemble_src(pAsm
, 1, 0) )
3990 if ( GL_FALSE
== assemble_src(pAsm
, 2, 1) )
3995 neg_PVSSRC(&(pAsm
->S
[1].src
));
3997 if( GL_FALSE
== next_ins(pAsm
) )
4002 if(8 == pAsm
->unAsic
)
4004 pAsm
->D
.dst
.opcode
= EG_OP3_INST_MULADD
;
4008 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
4010 pAsm
->D
.dst
.op3
= 1;
4012 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4013 pAsm
->D
.dst
.reg
= tmp
;
4014 nomask_PVSDST(&(pAsm
->D
.dst
));
4015 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4017 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4018 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4019 pAsm
->S
[0].src
.reg
= tmp
;
4020 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
4023 if( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
4028 if( GL_FALSE
== assemble_src(pAsm
, 2, -1) )
4033 if( GL_FALSE
== next_ins(pAsm
) )
4038 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4040 if( GL_FALSE
== assemble_dst(pAsm
) )
4045 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4046 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4047 pAsm
->S
[0].src
.reg
= tmp
;
4048 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
4050 if( GL_FALSE
== next_ins(pAsm
) )
4058 GLboolean
assemble_LOG(r700_AssemblerBase
*pAsm
)
4060 BITS tmp1
, tmp2
, tmp3
;
4064 tmp1
= gethelpr(pAsm
);
4065 tmp2
= gethelpr(pAsm
);
4066 tmp3
= gethelpr(pAsm
);
4068 // FIXME: The hardware can do fabs() directly on input
4069 // elements, but the compiler doesn't have the
4070 // capability to use that.
4072 // MAX tmp1.x, a.x, -a.x (fabs(a.x))
4074 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
4076 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4077 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4078 pAsm
->D
.dst
.reg
= tmp1
;
4079 pAsm
->D
.dst
.writex
= 1;
4081 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4086 pAsm
->S
[1].bits
= pAsm
->S
[0].bits
;
4087 flipneg_PVSSRC(&(pAsm
->S
[1].src
));
4089 if ( GL_FALSE
== next_ins(pAsm
) )
4096 // LG2 tmp2.x, tmp1.x
4097 // FLOOR tmp3.x, tmp2.x
4098 // MOV dst.x, tmp3.x
4099 // ADD tmp3.x, tmp2.x, -tmp3.x
4100 // EX2 dst.y, tmp3.x
4101 // MOV dst.z, tmp2.x
4104 // LG2 tmp2.x, tmp1.x
4105 // FLOOR tmp3.x, tmp2.x
4107 if(8 == pAsm
->unAsic
)
4109 pAsm
->D
.dst
.opcode
= EG_OP2_INST_LOG_IEEE
;
4113 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_LOG_IEEE
;
4115 pAsm
->D
.dst
.math
= 1;
4117 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4118 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4119 pAsm
->D
.dst
.reg
= tmp2
;
4120 pAsm
->D
.dst
.writex
= 1;
4122 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4123 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
4124 pAsm
->S
[0].src
.reg
= tmp1
;
4126 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4127 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4129 if( GL_FALSE
== next_ins(pAsm
) )
4134 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FLOOR
;
4136 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4137 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4138 pAsm
->D
.dst
.reg
= tmp3
;
4139 pAsm
->D
.dst
.writex
= 1;
4141 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4142 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
4143 pAsm
->S
[0].src
.reg
= tmp2
;
4145 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4146 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4148 if( GL_FALSE
== next_ins(pAsm
) )
4153 // MOV dst.x, tmp3.x
4155 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4157 if( GL_FALSE
== assemble_dst(pAsm
) )
4162 pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writez
= pAsm
->D
.dst
.writew
= 0;
4164 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4165 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
4166 pAsm
->S
[0].src
.reg
= tmp3
;
4168 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4169 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4171 if( GL_FALSE
== next_ins(pAsm
) )
4176 // ADD tmp3.x, tmp2.x, -tmp3.x
4177 // EX2 dst.y, tmp3.x
4179 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
4181 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4182 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4183 pAsm
->D
.dst
.reg
= tmp3
;
4184 pAsm
->D
.dst
.writex
= 1;
4186 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4187 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
4188 pAsm
->S
[0].src
.reg
= tmp2
;
4190 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4191 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4193 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
4194 pAsm
->S
[1].src
.rtype
= DST_REG_TEMPORARY
;
4195 pAsm
->S
[1].src
.reg
= tmp3
;
4197 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_X
);
4198 neg_PVSSRC(&(pAsm
->S
[1].src
));
4200 if( GL_FALSE
== next_ins(pAsm
) )
4205 if(8 == pAsm
->unAsic
)
4207 pAsm
->D
.dst
.opcode
= EG_OP2_INST_EXP_IEEE
;
4211 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
4213 pAsm
->D
.dst
.math
= 1;
4215 if( GL_FALSE
== assemble_dst(pAsm
) )
4220 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writez
= pAsm
->D
.dst
.writew
= 0;
4222 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4223 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
4224 pAsm
->S
[0].src
.reg
= tmp3
;
4226 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4227 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4229 if( GL_FALSE
== next_ins(pAsm
) )
4234 // MOV dst.z, tmp2.x
4236 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4238 if( GL_FALSE
== assemble_dst(pAsm
) )
4243 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writew
= 0;
4245 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4246 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
4247 pAsm
->S
[0].src
.reg
= tmp2
;
4249 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4250 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4252 if( GL_FALSE
== next_ins(pAsm
) )
4259 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4261 if( GL_FALSE
== assemble_dst(pAsm
) )
4266 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writez
= 0;
4268 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4269 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4270 pAsm
->S
[0].src
.reg
= tmp1
;
4272 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_1
);
4273 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4275 if( GL_FALSE
== next_ins(pAsm
) )
4283 GLboolean
assemble_MAD(struct r700_AssemblerBase
*pAsm
)
4286 GLboolean bReplaceDst
= GL_FALSE
;
4287 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
4289 if( GL_FALSE
== checkop3(pAsm
) )
4294 if(8 == pAsm
->unAsic
)
4296 pAsm
->D
.dst
.opcode
= EG_OP3_INST_MULADD
;
4300 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
4302 pAsm
->D
.dst
.op3
= 1;
4306 if(PROGRAM_TEMPORARY
== pILInst
->DstReg
.File
)
4307 { /* TODO : more investigation on MAD src and dst using same register */
4308 for(ii
=0; ii
<3; ii
++)
4310 if( (PROGRAM_TEMPORARY
== pILInst
->SrcReg
[ii
].File
)
4311 && (pILInst
->DstReg
.Index
== pILInst
->SrcReg
[ii
].Index
) )
4313 bReplaceDst
= GL_TRUE
;
4318 if(0xF != pILInst
->DstReg
.WriteMask
)
4319 { /* OP3 has no support for write mask */
4320 bReplaceDst
= GL_TRUE
;
4323 if(GL_TRUE
== bReplaceDst
)
4325 tmp
= gethelpr(pAsm
);
4327 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4328 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4329 pAsm
->D
.dst
.reg
= tmp
;
4331 nomask_PVSDST(&(pAsm
->D
.dst
));
4335 if( GL_FALSE
== assemble_dst(pAsm
) )
4341 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4346 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4351 if( GL_FALSE
== assemble_src(pAsm
, 2, -1) )
4356 if ( GL_FALSE
== next_ins(pAsm
) )
4361 if (GL_TRUE
== bReplaceDst
)
4363 if( GL_FALSE
== assemble_dst(pAsm
) )
4368 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4371 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4372 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4373 pAsm
->S
[0].src
.reg
= tmp
;
4375 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4376 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
4378 if( GL_FALSE
== next_ins(pAsm
) )
4388 GLboolean
assemble_LIT(r700_AssemblerBase
*pAsm
)
4390 unsigned int dstReg
;
4391 unsigned int dstType
;
4393 int tmp
= gethelpr(pAsm
);
4395 if( GL_FALSE
== assemble_dst(pAsm
) )
4399 dstReg
= pAsm
->D
.dst
.reg
;
4400 dstType
= pAsm
->D
.dst
.rtype
;
4402 /* dst.xw, <- 1.0 */
4403 if( pAsm
->D
.dst
.writex
|| pAsm
->D
.dst
.writew
)
4405 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4410 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4411 pAsm
->D
.dst
.writey
= 0;
4412 pAsm
->D
.dst
.writez
= 0;
4413 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4414 pAsm
->S
[0].src
.reg
= tmp
;
4415 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4416 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4417 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_1
;
4418 pAsm
->S
[0].src
.swizzley
= SQ_SEL_1
;
4419 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_1
;
4420 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_1
;
4421 if( GL_FALSE
== next_ins(pAsm
) )
4427 if( GL_FALSE
== assemble_dst(pAsm
) )
4432 if( pAsm
->D
.dst
.writey
) {
4434 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4439 /* dst.y = max(src.x, 0.0) */
4440 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
4441 pAsm
->D
.dst
.writex
= 0;
4442 pAsm
->D
.dst
.writey
= 1;
4443 pAsm
->D
.dst
.writez
= 0;
4444 pAsm
->D
.dst
.writew
= 0;
4445 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
);
4446 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
4447 pAsm
->S
[1].src
.reg
= tmp
;
4448 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
4449 noneg_PVSSRC(&(pAsm
->S
[1].src
));
4450 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_0
;
4451 pAsm
->S
[1].src
.swizzley
= SQ_SEL_0
;
4452 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_0
;
4453 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_0
;
4454 if( GL_FALSE
== next_ins(pAsm
) )
4460 if( GL_FALSE
== assemble_dst(pAsm
) )
4464 if ( pAsm
->D
.dst
.writez
) {
4466 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4471 /* dst.z = log(src.y) */
4472 if(8 == pAsm
->unAsic
)
4474 pAsm
->D
.dst
.opcode
= EG_OP2_INST_LOG_CLAMPED
;
4478 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_LOG_CLAMPED
;
4480 pAsm
->D
.dst
.math
= 1;
4481 pAsm
->D
.dst
.writex
= 0;
4482 pAsm
->D
.dst
.writey
= 0;
4483 pAsm
->D
.dst
.writez
= 1;
4484 pAsm
->D
.dst
.writew
= 0;
4485 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Y
, SQ_SEL_Y
, SQ_SEL_Y
, SQ_SEL_Y
);
4486 if( GL_FALSE
== next_ins(pAsm
) )
4491 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4496 if( GL_FALSE
== assemble_src(pAsm
, 0, 2) )
4501 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
);
4503 swizzleagain_PVSSRC(&(pAsm
->S
[2].src
), SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
);
4505 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
4506 if(8 == pAsm
->unAsic
)
4508 pAsm
->D
.dst
.opcode
= EG_OP3_INST_MUL_LIT
;
4512 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MUL_LIT
;
4514 pAsm
->D
.dst
.math
= 1;
4515 pAsm
->D
.dst
.op3
= 1;
4516 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4517 pAsm
->D
.dst
.reg
= tmp
;
4518 pAsm
->D
.dst
.writex
= 1;
4519 pAsm
->D
.dst
.writey
= 0;
4520 pAsm
->D
.dst
.writez
= 0;
4521 pAsm
->D
.dst
.writew
= 0;
4524 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
4525 pAsm
->S
[1].src
.reg
= dstReg
;
4526 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
4527 noneg_PVSSRC(&(pAsm
->S
[1].src
));
4528 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_Z
;
4529 pAsm
->S
[1].src
.swizzley
= SQ_SEL_Z
;
4530 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_Z
;
4531 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_Z
;
4533 if( GL_FALSE
== next_ins(pAsm
) )
4538 /* dst.z = exp(tmp.x) */
4539 if( GL_FALSE
== assemble_dst(pAsm
) )
4543 if(8 == pAsm
->unAsic
)
4545 pAsm
->D
.dst
.opcode
= EG_OP2_INST_EXP_IEEE
;
4549 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
4551 pAsm
->D
.dst
.math
= 1;
4552 pAsm
->D
.dst
.writex
= 0;
4553 pAsm
->D
.dst
.writey
= 0;
4554 pAsm
->D
.dst
.writez
= 1;
4555 pAsm
->D
.dst
.writew
= 0;
4557 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4558 pAsm
->S
[0].src
.reg
= tmp
;
4559 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4560 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4561 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_X
;
4562 pAsm
->S
[0].src
.swizzley
= SQ_SEL_X
;
4563 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_X
;
4564 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_X
;
4566 if( GL_FALSE
== next_ins(pAsm
) )
4574 GLboolean
assemble_MAX(r700_AssemblerBase
*pAsm
)
4576 if( GL_FALSE
== checkop2(pAsm
) )
4581 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
4583 if( GL_FALSE
== assemble_dst(pAsm
) )
4588 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4593 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4598 if( GL_FALSE
== next_ins(pAsm
) )
4606 GLboolean
assemble_MIN(r700_AssemblerBase
*pAsm
)
4608 if( GL_FALSE
== checkop2(pAsm
) )
4613 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MIN
;
4615 if( GL_FALSE
== assemble_dst(pAsm
) )
4620 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4625 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4630 if( GL_FALSE
== next_ins(pAsm
) )
4638 GLboolean
assemble_MOV(r700_AssemblerBase
*pAsm
)
4642 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4644 if (GL_FALSE
== assemble_dst(pAsm
))
4649 if (GL_FALSE
== assemble_src(pAsm
, 0, -1))
4654 if ( GL_FALSE
== next_ins(pAsm
) )
4662 GLboolean
assemble_MUL(r700_AssemblerBase
*pAsm
)
4664 if( GL_FALSE
== checkop2(pAsm
) )
4669 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
4671 if( GL_FALSE
== assemble_dst(pAsm
) )
4676 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4681 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4686 if( GL_FALSE
== next_ins(pAsm
) )
4694 GLboolean
assemble_POW(r700_AssemblerBase
*pAsm
)
4700 tmp
= gethelpr(pAsm
);
4702 // LG2 tmp.x, a.swizzle
4703 if(8 == pAsm
->unAsic
)
4705 pAsm
->D
.dst
.opcode
= EG_OP2_INST_LOG_IEEE
;
4709 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_LOG_IEEE
;
4711 pAsm
->D
.dst
.math
= 1;
4713 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4714 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4715 pAsm
->D
.dst
.reg
= tmp
;
4716 nomask_PVSDST(&(pAsm
->D
.dst
));
4718 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4723 if( GL_FALSE
== next_ins(pAsm
) )
4728 // MUL tmp.x, tmp.x, b.swizzle
4729 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
4731 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4732 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4733 pAsm
->D
.dst
.reg
= tmp
;
4734 nomask_PVSDST(&(pAsm
->D
.dst
));
4736 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4737 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4738 pAsm
->S
[0].src
.reg
= tmp
;
4739 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4740 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4742 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4747 if( GL_FALSE
== next_ins(pAsm
) )
4752 // EX2 dst.mask, tmp.x
4754 if(8 == pAsm
->unAsic
)
4756 pAsm
->D
.dst
.opcode
= EG_OP2_INST_EXP_IEEE
;
4760 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
4762 pAsm
->D
.dst
.math
= 1;
4764 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4765 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4766 pAsm
->D
.dst
.reg
= tmp
;
4767 nomask_PVSDST(&(pAsm
->D
.dst
));
4769 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4770 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4771 pAsm
->S
[0].src
.reg
= tmp
;
4772 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4773 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4775 if( GL_FALSE
== next_ins(pAsm
) )
4780 // Now replicate result to all necessary channels in destination
4781 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4783 if( GL_FALSE
== assemble_dst(pAsm
) )
4788 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4789 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
4790 pAsm
->S
[0].src
.reg
= tmp
;
4792 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4793 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4795 if( GL_FALSE
== next_ins(pAsm
) )
4803 GLboolean
assemble_RCP(r700_AssemblerBase
*pAsm
)
4805 if(8 == pAsm
->unAsic
)
4807 return assemble_math_function(pAsm
, EG_OP2_INST_RECIP_IEEE
);
4810 return assemble_math_function(pAsm
, SQ_OP2_INST_RECIP_IEEE
);
4813 GLboolean
assemble_RSQ(r700_AssemblerBase
*pAsm
)
4815 if(8 == pAsm
->unAsic
)
4817 return assemble_math_function(pAsm
, EG_OP2_INST_RECIPSQRT_IEEE
);
4820 return assemble_math_function(pAsm
, SQ_OP2_INST_RECIPSQRT_IEEE
);
4823 GLboolean
assemble_SCS(r700_AssemblerBase
*pAsm
)
4829 tmp
= gethelpr(pAsm
);
4831 if(8 == pAsm
->unAsic
)
4833 pAsm
->D
.dst
.opcode
= EG_OP3_INST_MULADD
;
4837 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
4839 pAsm
->D
.dst
.op3
= 1;
4841 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4842 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4843 pAsm
->D
.dst
.reg
= tmp
;
4845 assemble_src(pAsm
, 0, -1);
4847 pAsm
->S
[1].src
.rtype
= SRC_REC_LITERAL
;
4848 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_X
);
4850 pAsm
->S
[2].src
.rtype
= SRC_REC_LITERAL
;
4851 setswizzle_PVSSRC(&(pAsm
->S
[2].src
), SQ_SEL_Y
);
4853 pAsm
->D2
.dst2
.literal_slots
= 1;
4854 pAsm
->C
[0].f
= 1/(3.1415926535 * 2);
4855 pAsm
->C
[1].f
= 0.5F
;
4857 if ( GL_FALSE
== next_ins(pAsm
) )
4862 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FRACT
;
4864 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4865 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4866 pAsm
->D
.dst
.reg
= tmp
;
4867 pAsm
->D
.dst
.writex
= 1;
4869 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4870 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4871 pAsm
->S
[0].src
.reg
= tmp
;
4872 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4874 if(( GL_FALSE
== next_ins(pAsm
) ))
4878 if(8 == pAsm
->unAsic
)
4880 pAsm
->D
.dst
.opcode
= EG_OP3_INST_MULADD
;
4884 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
4886 pAsm
->D
.dst
.op3
= 1;
4888 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4889 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4890 pAsm
->D
.dst
.reg
= tmp
;
4892 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4893 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4894 pAsm
->S
[0].src
.reg
= tmp
;
4895 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4897 pAsm
->S
[1].src
.rtype
= SRC_REC_LITERAL
;
4898 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_X
);
4900 pAsm
->S
[2].src
.rtype
= SRC_REC_LITERAL
;
4901 setswizzle_PVSSRC(&(pAsm
->S
[2].src
), SQ_SEL_Y
);
4903 pAsm
->D2
.dst2
.literal_slots
= 1;
4906 pAsm
->C
[0].f
= 3.1415926535897f
* 2.0f
;
4907 pAsm
->C
[1].f
= -3.1415926535897f
;
4909 pAsm
->C
[0].f
= 1.0f
;
4910 pAsm
->C
[1].f
= -0.5f
;
4913 if(( GL_FALSE
== next_ins(pAsm
) ))
4919 if(8 == pAsm
->unAsic
)
4921 pAsm
->D
.dst
.opcode
= EG_OP2_INST_COS
;
4925 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_COS
;
4927 pAsm
->D
.dst
.math
= 1;
4931 pAsm
->D
.dst
.writey
= 0;
4933 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4934 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4935 pAsm
->S
[0].src
.reg
= tmp
;
4936 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4937 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4939 if ( GL_FALSE
== next_ins(pAsm
) )
4945 if(8 == pAsm
->unAsic
)
4947 pAsm
->D
.dst
.opcode
= EG_OP2_INST_SIN
;
4951 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_SIN
;
4953 pAsm
->D
.dst
.math
= 1;
4957 pAsm
->D
.dst
.writex
= 0;
4959 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4960 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4961 pAsm
->S
[0].src
.reg
= tmp
;
4962 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4963 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4965 if( GL_FALSE
== next_ins(pAsm
) )
4973 GLboolean
assemble_LOGIC(r700_AssemblerBase
*pAsm
, BITS opcode
)
4975 if( GL_FALSE
== checkop2(pAsm
) )
4980 pAsm
->D
.dst
.opcode
= opcode
;
4981 //pAsm->D.dst.math = 1;
4983 if( GL_FALSE
== assemble_dst(pAsm
) )
4988 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4993 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4998 if( GL_FALSE
== next_ins(pAsm
) )
5006 GLboolean
assemble_LOGIC_PRED(r700_AssemblerBase
*pAsm
, BITS opcode
)
5008 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
5010 pAsm
->D
.dst
.opcode
= opcode
;
5011 pAsm
->D
.dst
.math
= 1;
5012 pAsm
->D
.dst
.predicated
= 1;
5014 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
5015 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
5016 pAsm
->D
.dst
.reg
= pAsm
->uHelpReg
;
5017 pAsm
->D
.dst
.writex
= 1;
5018 pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writez
= pAsm
->D
.dst
.writew
= 0;
5020 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
5021 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
5022 pAsm
->S
[0].src
.reg
= pAsm
->last_cond_register
+ pAsm
->starting_temp_register_number
;
5023 pAsm
->S
[0].src
.swizzlex
= pILInst
->DstReg
.CondSwizzle
& 0x7;
5024 noneg_PVSSRC(&(pAsm
->S
[0].src
));
5026 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
5027 pAsm
->S
[1].src
.reg
= pAsm
->uHelpReg
;
5028 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
5029 noneg_PVSSRC(&(pAsm
->S
[1].src
));
5030 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_0
;
5031 pAsm
->S
[1].src
.swizzley
= SQ_SEL_0
;
5032 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_0
;
5033 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_0
;
5035 if( GL_FALSE
== next_ins(pAsm
) )
5043 GLboolean
assemble_SGE(r700_AssemblerBase
*pAsm
)
5045 if( GL_FALSE
== checkop2(pAsm
) )
5050 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_SETGE
;
5052 if( GL_FALSE
== assemble_dst(pAsm
) )
5057 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
5062 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
5067 if( GL_FALSE
== next_ins(pAsm
) )
5075 GLboolean
assemble_SLT(r700_AssemblerBase
*pAsm
)
5077 if( GL_FALSE
== checkop2(pAsm
) )
5082 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_SETGT
;
5084 if( GL_FALSE
== assemble_dst(pAsm
) )
5089 if( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
5094 if( GL_FALSE
== assemble_src(pAsm
, 1, 0) )
5099 if( GL_FALSE
== next_ins(pAsm
) )
5107 GLboolean
assemble_SSG(r700_AssemblerBase
*pAsm
)
5111 GLuint tmp
= gethelpr(pAsm
);
5112 /* tmp = (src > 0 ? 1 : src) */
5113 if(8 == pAsm
->unAsic
)
5115 pAsm
->D
.dst
.opcode
= EG_OP3_INST_CNDGT
;
5119 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_CNDGT
;
5121 pAsm
->D
.dst
.op3
= 1;
5122 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
5123 pAsm
->D
.dst
.reg
= tmp
;
5125 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
5130 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_1
);
5132 if( GL_FALSE
== assemble_src(pAsm
, 0, 2) )
5137 if( GL_FALSE
== next_ins(pAsm
) )
5142 /* dst = (-tmp > 0 ? -1 : tmp) */
5143 if(8 == pAsm
->unAsic
)
5145 pAsm
->D
.dst
.opcode
= EG_OP3_INST_CNDGT
;
5149 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_CNDGT
;
5151 pAsm
->D
.dst
.op3
= 1;
5153 if( GL_FALSE
== assemble_dst(pAsm
) )
5158 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
5159 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
5160 pAsm
->S
[0].src
.reg
= tmp
;
5161 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
5162 neg_PVSSRC(&(pAsm
->S
[0].src
));
5164 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_1
);
5165 neg_PVSSRC(&(pAsm
->S
[1].src
));
5167 setaddrmode_PVSSRC(&(pAsm
->S
[2].src
), ADDR_ABSOLUTE
);
5168 pAsm
->S
[2].src
.rtype
= SRC_REG_TEMPORARY
;
5169 pAsm
->S
[2].src
.reg
= tmp
;
5170 noswizzle_PVSSRC(&(pAsm
->S
[2].src
));
5172 if( GL_FALSE
== next_ins(pAsm
) )
5180 GLboolean
assemble_STP(r700_AssemblerBase
*pAsm
)
5185 GLboolean
assemble_TEX(r700_AssemblerBase
*pAsm
)
5187 GLboolean src_const
;
5188 GLboolean need_barrier
= GL_FALSE
;
5192 switch (pAsm
->pILInst
[pAsm
->uiCurInst
].SrcReg
[0].File
)
5194 case PROGRAM_UNIFORM
:
5195 case PROGRAM_CONSTANT
:
5196 case PROGRAM_LOCAL_PARAM
:
5197 case PROGRAM_ENV_PARAM
:
5198 case PROGRAM_STATE_VAR
:
5199 src_const
= GL_TRUE
;
5201 case PROGRAM_TEMPORARY
:
5204 src_const
= GL_FALSE
;
5208 if (GL_TRUE
== src_const
)
5210 if ( GL_FALSE
== mov_temp(pAsm
, 0) )
5212 need_barrier
= GL_TRUE
;
5215 if (pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_TXP
)
5217 GLuint tmp
= gethelpr(pAsm
);
5218 if(8 == pAsm
->unAsic
)
5220 pAsm
->D
.dst
.opcode
= EG_OP2_INST_RECIP_IEEE
;
5224 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_RECIP_IEEE
;
5226 pAsm
->D
.dst
.math
= 1;
5227 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
5228 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
5229 pAsm
->D
.dst
.reg
= tmp
;
5230 pAsm
->D
.dst
.writew
= 1;
5232 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
5236 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
);
5237 if( GL_FALSE
== next_ins(pAsm
) )
5242 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
5243 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
5244 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
5245 pAsm
->D
.dst
.reg
= tmp
;
5246 pAsm
->D
.dst
.writex
= 1;
5247 pAsm
->D
.dst
.writey
= 1;
5248 pAsm
->D
.dst
.writez
= 1;
5249 pAsm
->D
.dst
.writew
= 0;
5251 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
5255 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
5256 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
5257 pAsm
->S
[1].src
.reg
= tmp
;
5258 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_W
);
5260 if( GL_FALSE
== next_ins(pAsm
) )
5265 pAsm
->aArgSubst
[1] = tmp
;
5266 need_barrier
= GL_TRUE
;
5269 if (pAsm
->pILInst
[pAsm
->uiCurInst
].TexSrcTarget
== TEXTURE_CUBE_INDEX
)
5271 GLuint tmp1
= gethelpr(pAsm
);
5272 GLuint tmp2
= gethelpr(pAsm
);
5274 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
5275 if(8 == pAsm
->unAsic
)
5277 pAsm
->D
.dst
.opcode
= EG_OP2_INST_CUBE
;
5281 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_CUBE
;
5283 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
5284 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
5285 pAsm
->D
.dst
.reg
= tmp1
;
5286 nomask_PVSDST(&(pAsm
->D
.dst
));
5288 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
5293 if( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
5298 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Z
, SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_Y
);
5299 swizzleagain_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Y
, SQ_SEL_X
, SQ_SEL_Z
, SQ_SEL_Z
);
5301 if( GL_FALSE
== next_ins(pAsm
) )
5306 /* tmp1.z = RCP_e(|tmp1.z|) */
5307 if(8 == pAsm
->unAsic
)
5309 pAsm
->D
.dst
.opcode
= EG_OP2_INST_RECIP_IEEE
;
5313 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_RECIP_IEEE
;
5315 pAsm
->D
.dst
.math
= 1;
5316 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
5317 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
5318 pAsm
->D
.dst
.reg
= tmp1
;
5319 pAsm
->D
.dst
.writez
= 1;
5321 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
5322 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
5323 pAsm
->S
[0].src
.reg
= tmp1
;
5324 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_Z
;
5325 pAsm
->S
[0].src
.abs
= 1;
5329 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
5330 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
5331 * muladd has no writemask, have to use another temp
5333 if(8 == pAsm
->unAsic
)
5335 pAsm
->D
.dst
.opcode
= EG_OP3_INST_MULADD
;
5339 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
5341 pAsm
->D
.dst
.op3
= 1;
5342 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
5343 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
5344 pAsm
->D
.dst
.reg
= tmp2
;
5346 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
5347 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
5348 pAsm
->S
[0].src
.reg
= tmp1
;
5349 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
5350 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
5351 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
5352 pAsm
->S
[1].src
.reg
= tmp1
;
5353 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Z
);
5354 setaddrmode_PVSSRC(&(pAsm
->S
[2].src
), ADDR_ABSOLUTE
);
5355 /* immediate c 1.5 */
5356 pAsm
->D2
.dst2
.literal_slots
= 1;
5357 pAsm
->C
[0].f
= 1.5F
;
5358 pAsm
->S
[2].src
.rtype
= SRC_REC_LITERAL
;
5359 pAsm
->S
[2].src
.reg
= tmp1
;
5360 setswizzle_PVSSRC(&(pAsm
->S
[2].src
), SQ_SEL_X
);
5364 /* tmp1.xy = temp2.xy */
5365 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
5366 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
5367 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
5368 pAsm
->D
.dst
.reg
= tmp1
;
5369 pAsm
->D
.dst
.writex
= 1;
5370 pAsm
->D
.dst
.writey
= 1;
5371 pAsm
->D
.dst
.writez
= 0;
5372 pAsm
->D
.dst
.writew
= 0;
5374 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
5375 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
5376 pAsm
->S
[0].src
.reg
= tmp2
;
5377 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
5380 pAsm
->aArgSubst
[1] = tmp1
;
5381 need_barrier
= GL_TRUE
;
5385 switch(pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
)
5388 /* will these need WQM(1) on CF inst ? */
5389 pAsm
->D
.dst
.opcode
= SQ_TEX_INST_GET_GRADIENTS_H
;
5392 pAsm
->D
.dst
.opcode
= SQ_TEX_INST_GET_GRADIENTS_V
;
5395 /* this should actually be SAMPLE_LB but that needs bias to be
5396 * embedded in the instruction - cant do here */
5397 pAsm
->D
.dst
.opcode
= SQ_TEX_INST_SAMPLE_L
;
5400 pAsm
->D
.dst
.opcode
= SQ_TEX_INST_SAMPLE_L
;
5403 if(pAsm
->pILInst
[pAsm
->uiCurInst
].TexShadow
== 1)
5404 pAsm
->D
.dst
.opcode
= SQ_TEX_INST_SAMPLE_C
;
5406 pAsm
->D
.dst
.opcode
= SQ_TEX_INST_SAMPLE
;
5409 pAsm
->is_tex
= GL_TRUE
;
5410 if ( GL_TRUE
== need_barrier
)
5412 pAsm
->is_tex
= GL_TRUE
;
5413 if ( GL_TRUE
== need_barrier
)
5415 pAsm
->need_tex_barrier
= GL_TRUE
;
5417 // Set src1 to tex unit id
5418 pAsm
->S
[1].src
.reg
= pAsm
->SamplerUnits
[pAsm
->pILInst
[pAsm
->uiCurInst
].TexSrcUnit
];
5419 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
5421 //No sw info from mesa compiler, so hard code here.
5422 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_X
;
5423 pAsm
->S
[1].src
.swizzley
= SQ_SEL_Y
;
5424 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_Z
;
5425 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_W
;
5427 if( GL_FALSE
== tex_dst(pAsm
) )
5432 if( GL_FALSE
== tex_src(pAsm
) )
5437 if(pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_TXP
)
5439 /* hopefully did swizzles before */
5440 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
5443 if(pAsm
->pILInst
[pAsm
->uiCurInst
].TexSrcTarget
== TEXTURE_CUBE_INDEX
)
5445 /* SAMPLE dst, tmp.yxwy, CUBE */
5446 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_Y
;
5447 pAsm
->S
[0].src
.swizzley
= SQ_SEL_X
;
5448 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_W
;
5449 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_Y
;
5452 if(pAsm
->pILInst
[pAsm
->uiCurInst
].TexShadow
== 1)
5454 /* compare value goes to w chan ? */
5455 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_Z
;
5458 if ( GL_FALSE
== next_ins(pAsm
) )
5463 /* add ARB shadow ambient but clamp to 0..1 */
5464 if(pAsm
->pILInst
[pAsm
->uiCurInst
].TexShadow
== 1)
5466 /* ADD_SAT dst, dst, ambient[texunit] */
5467 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
5469 if( GL_FALSE
== assemble_dst(pAsm
) )
5473 pAsm
->D2
.dst2
.SaturateMode
= 1;
5475 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
5476 pAsm
->S
[0].src
.reg
= pAsm
->D
.dst
.reg
;
5477 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
5478 noneg_PVSSRC(&(pAsm
->S
[0].src
));
5480 pAsm
->S
[1].src
.rtype
= SRC_REG_CONSTANT
;
5481 pAsm
->S
[1].src
.reg
= pAsm
->shadow_regs
[pAsm
->pILInst
[pAsm
->uiCurInst
].TexSrcUnit
];
5482 noswizzle_PVSSRC(&(pAsm
->S
[1].src
));
5483 noneg_PVSSRC(&(pAsm
->S
[1].src
));
5485 if( GL_FALSE
== next_ins(pAsm
) )
5495 GLboolean
assemble_XPD(r700_AssemblerBase
*pAsm
)
5500 if( GL_FALSE
== checkop2(pAsm
) )
5505 tmp1
= gethelpr(pAsm
);
5507 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
5509 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
5510 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
5511 pAsm
->D
.dst
.reg
= tmp1
;
5512 nomask_PVSDST(&(pAsm
->D
.dst
));
5514 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
5519 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
5524 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_Y
, SQ_SEL_0
);
5525 swizzleagain_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Y
, SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_0
);
5527 if( GL_FALSE
== next_ins(pAsm
) )
5532 if(8 == pAsm
->unAsic
)
5534 pAsm
->D
.dst
.opcode
= EG_OP3_INST_MULADD
;
5538 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
5540 pAsm
->D
.dst
.op3
= 1;
5542 if(0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
5544 tmp2
= gethelpr(pAsm
);
5546 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
5547 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
5548 pAsm
->D
.dst
.reg
= tmp2
;
5550 nomask_PVSDST(&(pAsm
->D
.dst
));
5554 if( GL_FALSE
== assemble_dst(pAsm
) )
5560 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
5565 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
5570 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Y
, SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_0
);
5571 swizzleagain_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_Y
, SQ_SEL_0
);
5573 // result1 + (neg) result0
5574 setaddrmode_PVSSRC(&(pAsm
->S
[2].src
),ADDR_ABSOLUTE
);
5575 pAsm
->S
[2].src
.rtype
= SRC_REG_TEMPORARY
;
5576 pAsm
->S
[2].src
.reg
= tmp1
;
5578 neg_PVSSRC(&(pAsm
->S
[2].src
));
5579 noswizzle_PVSSRC(&(pAsm
->S
[2].src
));
5581 if( GL_FALSE
== next_ins(pAsm
) )
5587 if(0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
5589 if( GL_FALSE
== assemble_dst(pAsm
) )
5594 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
5596 // Use tmp as source
5597 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
5598 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
5599 pAsm
->S
[0].src
.reg
= tmp2
;
5601 noneg_PVSSRC(&(pAsm
->S
[0].src
));
5602 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
5604 if( GL_FALSE
== next_ins(pAsm
) )
5613 GLboolean
assemble_EXPORT(r700_AssemblerBase
*pAsm
)
5618 static inline void decreaseCurrent(r700_AssemblerBase
*pAsm
, GLuint uReason
)
5623 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
--;
5626 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
-= 4;
5629 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
-= 4;
5632 /* TODO : for 16 vp asic, should -= 2; */
5633 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
-= 1;
5638 static inline void checkStackDepth(r700_AssemblerBase
*pAsm
, GLuint uReason
, GLboolean bCheckMaxOnly
)
5640 if(GL_TRUE
== bCheckMaxOnly
)
5645 if((pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+ 1)
5646 > pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
)
5648 pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
=
5649 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+ 1;
5653 if((pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+ 4)
5654 > pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
)
5656 pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
=
5657 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+ 4;
5667 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
++;
5670 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+= 4;
5673 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+= 4;
5676 /* TODO : for 16 vp asic, should += 2; */
5677 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+= 1;
5681 if(pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
5682 > pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
)
5684 pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
=
5685 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
;
5689 GLboolean
jumpToOffest(r700_AssemblerBase
*pAsm
, GLuint pops
, GLint offset
)
5691 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5696 if(8 == pAsm
->unAsic
)
5698 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5700 EG_CF_WORD1__CF_INST_shift
, EG_CF_WORD1__CF_INST_mask
);
5701 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5703 EG_CF_WORD1__POP_COUNT_shift
, EG_CF_WORD1__POP_COUNT_mask
);
5704 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5706 EG_CF_WORD1__CF_CONST_shift
, EG_CF_WORD1__CF_CONST_mask
);
5707 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5709 EG_CF_WORD1__COND_shift
, EG_CF_WORD1__COND_mask
);
5710 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5712 EG_CF_WORD1__COUNT_shift
, EG_CF_WORD1__COUNT_mask
);
5713 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5715 EG_CF_WORD1__VPM_shift
, EG_CF_WORD1__VPM_bit
);
5716 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5718 EG_CF_WORD1__EOP_shift
, EG_CF_WORD1__EOP_bit
);
5719 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5721 EG_CF_WORD1__WQM_shift
, EG_CF_WORD1__WQM_bit
);
5722 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5724 EG_CF_WORD1__BARRIER_shift
, EG_CF_WORD1__BARRIER_bit
);
5728 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= pops
;
5729 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5730 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5732 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5733 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5734 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_JUMP
;
5735 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5737 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5740 pAsm
->cf_current_cf_clause_ptr
->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
+ offset
;
5745 GLboolean
pops(r700_AssemblerBase
*pAsm
, GLuint pops
)
5747 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5752 if(8 == pAsm
->unAsic
)
5754 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5756 EG_CF_WORD1__CF_INST_shift
, EG_CF_WORD1__CF_INST_mask
);
5757 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5759 EG_CF_WORD1__POP_COUNT_shift
, EG_CF_WORD1__POP_COUNT_mask
);
5760 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5762 EG_CF_WORD1__CF_CONST_shift
, EG_CF_WORD1__CF_CONST_mask
);
5763 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5765 EG_CF_WORD1__COND_shift
, EG_CF_WORD1__COND_mask
);
5766 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5768 EG_CF_WORD1__EOP_shift
, EG_CF_WORD1__EOP_bit
);
5769 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5771 EG_CF_WORD1__VPM_shift
, EG_CF_WORD1__VPM_bit
);
5772 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5774 EG_CF_WORD1__WQM_shift
, EG_CF_WORD1__WQM_bit
);
5775 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5777 EG_CF_WORD1__BARRIER_shift
, EG_CF_WORD1__BARRIER_bit
);
5778 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5780 EG_CF_WORD1__COUNT_shift
, EG_CF_WORD1__COUNT_mask
);
5784 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= pops
;
5785 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5786 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5788 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5789 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5790 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_POP
;
5792 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5794 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5796 pAsm
->cf_current_cf_clause_ptr
->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
+ 1;
5801 GLboolean
assemble_IF(r700_AssemblerBase
*pAsm
, GLboolean bHasElse
)
5803 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
5805 assemble_LOGIC_PRED(pAsm
, SQ_OP2_INST_PRED_SETNE
);
5808 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5813 if(8 == pAsm
->unAsic
)
5815 if(GL_TRUE
!= bHasElse
)
5817 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5819 EG_CF_WORD1__POP_COUNT_shift
, EG_CF_WORD1__POP_COUNT_mask
);
5823 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5825 EG_CF_WORD1__POP_COUNT_shift
, EG_CF_WORD1__POP_COUNT_mask
);
5828 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5830 EG_CF_WORD1__CF_INST_shift
, EG_CF_WORD1__CF_INST_mask
);
5831 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5833 EG_CF_WORD1__CF_CONST_shift
, EG_CF_WORD1__CF_CONST_mask
);
5834 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5836 EG_CF_WORD1__COND_shift
, EG_CF_WORD1__COND_mask
);
5837 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5839 EG_CF_WORD1__EOP_shift
, EG_CF_WORD1__EOP_bit
);
5840 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5842 EG_CF_WORD1__VPM_shift
, EG_CF_WORD1__VPM_bit
);
5843 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5845 EG_CF_WORD1__WQM_shift
, EG_CF_WORD1__WQM_bit
);
5846 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5848 EG_CF_WORD1__BARRIER_shift
, EG_CF_WORD1__BARRIER_bit
);
5849 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5851 EG_CF_WORD1__COUNT_shift
, EG_CF_WORD1__COUNT_mask
);
5855 if(GL_TRUE
!= bHasElse
)
5857 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
5861 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 0;
5863 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5864 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5866 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5867 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5868 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_JUMP
;
5869 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5871 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5875 pAsm
->fc_stack
[pAsm
->FCSP
].type
= FC_IF
;
5876 pAsm
->fc_stack
[pAsm
->FCSP
].mid
= NULL
;
5877 pAsm
->fc_stack
[pAsm
->FCSP
].midLen
= 0;
5878 pAsm
->fc_stack
[pAsm
->FCSP
].first
= pAsm
->cf_current_cf_clause_ptr
;
5880 #ifndef USE_CF_FOR_POP_AFTER
5881 if(GL_TRUE
!= bHasElse
)
5883 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU_POP_AFTER
;
5885 #endif /* USE_CF_FOR_POP_AFTER */
5887 checkStackDepth(pAsm
, FC_PUSH_VPM
, GL_FALSE
);
5892 GLboolean
assemble_ELSE(r700_AssemblerBase
*pAsm
)
5894 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5899 if(8 == pAsm
->unAsic
)
5901 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5903 EG_CF_WORD1__POP_COUNT_shift
, EG_CF_WORD1__POP_COUNT_mask
);
5904 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5906 EG_CF_WORD1__CF_INST_shift
, EG_CF_WORD1__CF_INST_mask
);
5907 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5909 EG_CF_WORD1__CF_CONST_shift
, EG_CF_WORD1__CF_CONST_mask
);
5910 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5912 EG_CF_WORD1__COND_shift
, EG_CF_WORD1__COND_mask
);
5913 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5915 EG_CF_WORD1__EOP_shift
, EG_CF_WORD1__EOP_bit
);
5916 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5918 EG_CF_WORD1__VPM_shift
, EG_CF_WORD1__VPM_bit
);
5919 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5921 EG_CF_WORD1__WQM_shift
, EG_CF_WORD1__WQM_bit
);
5922 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5924 EG_CF_WORD1__BARRIER_shift
, EG_CF_WORD1__BARRIER_bit
);
5925 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5927 EG_CF_WORD1__COUNT_shift
, EG_CF_WORD1__COUNT_mask
);
5931 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1; ///
5932 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5933 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5935 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5936 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5937 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_ELSE
;
5938 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5940 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5943 pAsm
->fc_stack
[pAsm
->FCSP
].mid
= (R700ControlFlowGenericClause
**)_mesa_realloc( (void *)pAsm
->fc_stack
[pAsm
->FCSP
].mid
,
5945 sizeof(R700ControlFlowGenericClause
*) );
5946 pAsm
->fc_stack
[pAsm
->FCSP
].mid
[0] = pAsm
->cf_current_cf_clause_ptr
;
5947 //pAsm->fc_stack[pAsm->FCSP].unNumMid = 1;
5949 #ifndef USE_CF_FOR_POP_AFTER
5950 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU_POP_AFTER
;
5951 #endif /* USE_CF_FOR_POP_AFTER */
5953 pAsm
->fc_stack
[pAsm
->FCSP
].first
->m_Word0
.f
.addr
= pAsm
->pR700Shader
->plstCFInstructions_active
->uNumOfNode
- 1;
5958 GLboolean
assemble_ENDIF(r700_AssemblerBase
*pAsm
)
5960 #ifdef USE_CF_FOR_POP_AFTER
5962 #endif /* USE_CF_FOR_POP_AFTER */
5964 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
5966 if(NULL
== pAsm
->fc_stack
[pAsm
->FCSP
].mid
)
5968 /* no else in between */
5969 pAsm
->fc_stack
[pAsm
->FCSP
].first
->m_Word0
.f
.addr
= pAsm
->pR700Shader
->plstCFInstructions_active
->uNumOfNode
;
5973 pAsm
->fc_stack
[pAsm
->FCSP
].mid
[0]->m_Word0
.f
.addr
= pAsm
->pR700Shader
->plstCFInstructions_active
->uNumOfNode
;
5976 if(NULL
!= pAsm
->fc_stack
[pAsm
->FCSP
].mid
)
5978 FREE(pAsm
->fc_stack
[pAsm
->FCSP
].mid
);
5981 if(pAsm
->fc_stack
[pAsm
->FCSP
].type
!= FC_IF
)
5983 radeon_error("if/endif in shader code are not paired. \n");
5989 decreaseCurrent(pAsm
, FC_PUSH_VPM
);
5994 GLboolean
assemble_BGNLOOP(r700_AssemblerBase
*pAsm
)
5996 if(GL_FALSE
== add_cf_instruction(pAsm
) )
6001 if(8 == pAsm
->unAsic
)
6003 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6005 EG_CF_WORD1__POP_COUNT_shift
, EG_CF_WORD1__POP_COUNT_mask
);
6006 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6007 EG_CF_INST_LOOP_START_NO_AL
,
6008 EG_CF_WORD1__CF_INST_shift
, EG_CF_WORD1__CF_INST_mask
);
6009 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6011 EG_CF_WORD1__CF_CONST_shift
, EG_CF_WORD1__CF_CONST_mask
);
6012 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6014 EG_CF_WORD1__COND_shift
, EG_CF_WORD1__COND_mask
);
6015 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6017 EG_CF_WORD1__EOP_shift
, EG_CF_WORD1__EOP_bit
);
6018 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6020 EG_CF_WORD1__VPM_shift
, EG_CF_WORD1__VPM_bit
);
6021 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6023 EG_CF_WORD1__WQM_shift
, EG_CF_WORD1__WQM_bit
);
6024 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6026 EG_CF_WORD1__BARRIER_shift
, EG_CF_WORD1__BARRIER_bit
);
6027 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6029 EG_CF_WORD1__COUNT_shift
, EG_CF_WORD1__COUNT_mask
);
6033 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 0;
6034 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
6035 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
6037 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
6038 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
6039 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_LOOP_START_NO_AL
;
6040 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
6042 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
6046 pAsm
->fc_stack
[pAsm
->FCSP
].type
= FC_LOOP
;
6047 pAsm
->fc_stack
[pAsm
->FCSP
].mid
= NULL
;
6048 pAsm
->fc_stack
[pAsm
->FCSP
].unNumMid
= 0;
6049 pAsm
->fc_stack
[pAsm
->FCSP
].midLen
= 0;
6050 pAsm
->fc_stack
[pAsm
->FCSP
].first
= pAsm
->cf_current_cf_clause_ptr
;
6052 checkStackDepth(pAsm
, FC_LOOP
, GL_FALSE
);
6057 GLboolean
assemble_BRK(r700_AssemblerBase
*pAsm
)
6059 #ifdef USE_CF_FOR_CONTINUE_BREAK
6061 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
6063 assemble_LOGIC_PRED(pAsm
, SQ_OP2_INST_PRED_SETNE
);
6065 unsigned int unFCSP
;
6066 for(unFCSP
=pAsm
->FCSP
; unFCSP
>0; unFCSP
--)
6068 if(FC_LOOP
== pAsm
->fc_stack
[unFCSP
].type
)
6075 radeon_error("Break is not inside loop/endloop pair.\n");
6079 if(GL_FALSE
== add_cf_instruction(pAsm
) )
6084 if(8 == pAsm
->unAsic
)
6086 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6088 EG_CF_WORD1__POP_COUNT_shift
, EG_CF_WORD1__POP_COUNT_mask
);
6089 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6090 EG_CF_INST_LOOP_BREAK
,
6091 EG_CF_WORD1__CF_INST_shift
, EG_CF_WORD1__CF_INST_mask
);
6092 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6094 EG_CF_WORD1__CF_CONST_shift
, EG_CF_WORD1__CF_CONST_mask
);
6095 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6097 EG_CF_WORD1__COND_shift
, EG_CF_WORD1__COND_mask
);
6098 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6100 EG_CF_WORD1__EOP_shift
, EG_CF_WORD1__EOP_bit
);
6101 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6103 EG_CF_WORD1__VPM_shift
, EG_CF_WORD1__VPM_bit
);
6104 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6106 EG_CF_WORD1__WQM_shift
, EG_CF_WORD1__WQM_bit
);
6107 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6109 EG_CF_WORD1__BARRIER_shift
, EG_CF_WORD1__BARRIER_bit
);
6110 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6112 EG_CF_WORD1__COUNT_shift
, EG_CF_WORD1__COUNT_mask
);
6116 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
6117 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
6118 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
6120 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
6121 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
6122 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_LOOP_BREAK
;
6124 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
6126 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
6129 pAsm
->fc_stack
[unFCSP
].mid
= (R700ControlFlowGenericClause
**)_mesa_realloc(
6130 (void *)pAsm
->fc_stack
[unFCSP
].mid
,
6131 sizeof(R700ControlFlowGenericClause
*) * pAsm
->fc_stack
[unFCSP
].unNumMid
,
6132 sizeof(R700ControlFlowGenericClause
*) * (pAsm
->fc_stack
[unFCSP
].unNumMid
+ 1) );
6133 pAsm
->fc_stack
[unFCSP
].mid
[pAsm
->fc_stack
[unFCSP
].unNumMid
] = pAsm
->cf_current_cf_clause_ptr
;
6134 pAsm
->fc_stack
[unFCSP
].unNumMid
++;
6136 if(GL_FALSE
== add_cf_instruction(pAsm
) )
6141 if(8 == pAsm
->unAsic
)
6143 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6145 EG_CF_WORD1__POP_COUNT_shift
, EG_CF_WORD1__POP_COUNT_mask
);
6146 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6148 EG_CF_WORD1__CF_INST_shift
, EG_CF_WORD1__CF_INST_mask
);
6149 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6151 EG_CF_WORD1__CF_CONST_shift
, EG_CF_WORD1__CF_CONST_mask
);
6152 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6154 EG_CF_WORD1__COND_shift
, EG_CF_WORD1__COND_mask
);
6155 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6157 EG_CF_WORD1__EOP_shift
, EG_CF_WORD1__EOP_bit
);
6158 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6160 EG_CF_WORD1__VPM_shift
, EG_CF_WORD1__VPM_bit
);
6161 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6163 EG_CF_WORD1__WQM_shift
, EG_CF_WORD1__WQM_bit
);
6164 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6166 EG_CF_WORD1__BARRIER_shift
, EG_CF_WORD1__BARRIER_bit
);
6167 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6169 EG_CF_WORD1__COUNT_shift
, EG_CF_WORD1__COUNT_mask
);
6173 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
6174 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
6175 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
6177 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
6178 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
6179 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_POP
;
6181 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
6183 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
6186 pAsm
->cf_current_cf_clause_ptr
->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
+ 1;
6188 checkStackDepth(pAsm
, FC_PUSH_VPM
, GL_TRUE
);
6190 #endif //USE_CF_FOR_CONTINUE_BREAK
6194 GLboolean
assemble_CONT(r700_AssemblerBase
*pAsm
)
6196 #ifdef USE_CF_FOR_CONTINUE_BREAK
6197 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
6199 assemble_LOGIC_PRED(pAsm
, SQ_OP2_INST_PRED_SETNE
);
6201 unsigned int unFCSP
;
6202 for(unFCSP
=pAsm
->FCSP
; unFCSP
>0; unFCSP
--)
6204 if(FC_LOOP
== pAsm
->fc_stack
[unFCSP
].type
)
6211 radeon_error("Continue is not inside loop/endloop pair.\n");
6215 if(GL_FALSE
== add_cf_instruction(pAsm
) )
6220 if(8 == pAsm
->unAsic
)
6222 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6224 EG_CF_WORD1__POP_COUNT_shift
, EG_CF_WORD1__POP_COUNT_mask
);
6225 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6226 EG_CF_INST_LOOP_CONTINUE
,
6227 EG_CF_WORD1__CF_INST_shift
, EG_CF_WORD1__CF_INST_mask
);
6228 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6230 EG_CF_WORD1__CF_CONST_shift
, EG_CF_WORD1__CF_CONST_mask
);
6231 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6233 EG_CF_WORD1__COND_shift
, EG_CF_WORD1__COND_mask
);
6234 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6236 EG_CF_WORD1__EOP_shift
, EG_CF_WORD1__EOP_bit
);
6237 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6239 EG_CF_WORD1__VPM_shift
, EG_CF_WORD1__VPM_bit
);
6240 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6242 EG_CF_WORD1__WQM_shift
, EG_CF_WORD1__WQM_bit
);
6243 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6245 EG_CF_WORD1__BARRIER_shift
, EG_CF_WORD1__BARRIER_bit
);
6246 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6248 EG_CF_WORD1__COUNT_shift
, EG_CF_WORD1__COUNT_mask
);
6252 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
6253 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
6254 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
6256 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
6257 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
6258 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_LOOP_CONTINUE
;
6260 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
6262 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
6265 pAsm
->fc_stack
[unFCSP
].mid
= (R700ControlFlowGenericClause
**)_mesa_realloc(
6266 (void *)pAsm
->fc_stack
[unFCSP
].mid
,
6267 sizeof(R700ControlFlowGenericClause
*) * pAsm
->fc_stack
[unFCSP
].unNumMid
,
6268 sizeof(R700ControlFlowGenericClause
*) * (pAsm
->fc_stack
[unFCSP
].unNumMid
+ 1) );
6269 pAsm
->fc_stack
[unFCSP
].mid
[pAsm
->fc_stack
[unFCSP
].unNumMid
] = pAsm
->cf_current_cf_clause_ptr
;
6270 pAsm
->fc_stack
[unFCSP
].unNumMid
++;
6272 if(GL_FALSE
== add_cf_instruction(pAsm
) )
6277 if(8 == pAsm
->unAsic
)
6279 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6281 EG_CF_WORD1__POP_COUNT_shift
, EG_CF_WORD1__POP_COUNT_mask
);
6282 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6284 EG_CF_WORD1__CF_INST_shift
, EG_CF_WORD1__CF_INST_mask
);
6285 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6287 EG_CF_WORD1__CF_CONST_shift
, EG_CF_WORD1__CF_CONST_mask
);
6288 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6290 EG_CF_WORD1__COND_shift
, EG_CF_WORD1__COND_mask
);
6291 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6293 EG_CF_WORD1__EOP_shift
, EG_CF_WORD1__EOP_bit
);
6294 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6296 EG_CF_WORD1__VPM_shift
, EG_CF_WORD1__VPM_bit
);
6297 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6299 EG_CF_WORD1__WQM_shift
, EG_CF_WORD1__WQM_bit
);
6300 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6302 EG_CF_WORD1__BARRIER_shift
, EG_CF_WORD1__BARRIER_bit
);
6303 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6305 EG_CF_WORD1__COUNT_shift
, EG_CF_WORD1__COUNT_mask
);
6309 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
6310 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
6311 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
6313 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
6314 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
6315 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_POP
;
6317 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
6319 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
6322 pAsm
->cf_current_cf_clause_ptr
->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
+ 1;
6324 checkStackDepth(pAsm
, FC_PUSH_VPM
, GL_TRUE
);
6326 #endif /* USE_CF_FOR_CONTINUE_BREAK */
6331 GLboolean
assemble_ENDLOOP(r700_AssemblerBase
*pAsm
)
6335 if(GL_FALSE
== add_cf_instruction(pAsm
) )
6340 if(8 == pAsm
->unAsic
)
6342 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6344 EG_CF_WORD1__POP_COUNT_shift
, EG_CF_WORD1__POP_COUNT_mask
);
6345 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6346 EG_CF_INST_LOOP_END
,
6347 EG_CF_WORD1__CF_INST_shift
, EG_CF_WORD1__CF_INST_mask
);
6348 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6350 EG_CF_WORD1__CF_CONST_shift
, EG_CF_WORD1__CF_CONST_mask
);
6351 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6353 EG_CF_WORD1__COND_shift
, EG_CF_WORD1__COND_mask
);
6354 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6356 EG_CF_WORD1__EOP_shift
, EG_CF_WORD1__EOP_bit
);
6357 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6359 EG_CF_WORD1__VPM_shift
, EG_CF_WORD1__VPM_bit
);
6360 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6362 EG_CF_WORD1__WQM_shift
, EG_CF_WORD1__WQM_bit
);
6363 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6365 EG_CF_WORD1__BARRIER_shift
, EG_CF_WORD1__BARRIER_bit
);
6366 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6368 EG_CF_WORD1__COUNT_shift
, EG_CF_WORD1__COUNT_mask
);
6372 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 0;
6373 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
6374 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
6376 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
6377 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
6378 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_LOOP_END
;
6379 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
6381 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
6384 pAsm
->cf_current_cf_clause_ptr
->m_Word0
.f
.addr
= pAsm
->fc_stack
[pAsm
->FCSP
].first
->m_uIndex
+ 1;
6385 pAsm
->fc_stack
[pAsm
->FCSP
].first
->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
+ 1;
6387 #ifdef USE_CF_FOR_CONTINUE_BREAK
6388 for(i
=0; i
<pAsm
->fc_stack
[pAsm
->FCSP
].unNumMid
; i
++)
6390 pAsm
->fc_stack
[pAsm
->FCSP
].mid
[i
]->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
;
6392 if(NULL
!= pAsm
->fc_stack
[pAsm
->FCSP
].mid
)
6394 FREE(pAsm
->fc_stack
[pAsm
->FCSP
].mid
);
6398 if(pAsm
->fc_stack
[pAsm
->FCSP
].type
!= FC_LOOP
)
6400 radeon_error("loop/endloop in shader code are not paired. \n");
6406 if((pAsm
->unCFflags
& HAS_CURRENT_LOOPRET
) > 0)
6408 for(unFCSP
=(pAsm
->FCSP
-1); unFCSP
>pAsm
->CALLSTACK
[pAsm
->CALLSP
].FCSP_BeforeEntry
; unFCSP
--)
6410 if(FC_LOOP
== pAsm
->fc_stack
[unFCSP
].type
)
6412 breakLoopOnFlag(pAsm
, unFCSP
);
6415 else if(FC_IF
== pAsm
->fc_stack
[unFCSP
].type
)
6420 if(unFCSP
<= pAsm
->CALLSTACK
[pAsm
->CALLSP
].FCSP_BeforeEntry
)
6422 #ifdef USE_CF_FOR_POP_AFTER
6423 returnOnFlag(pAsm
, unIF
);
6425 returnOnFlag(pAsm
, 0);
6426 #endif /* USE_CF_FOR_POP_AFTER */
6427 pAsm
->unCFflags
&= ~HAS_CURRENT_LOOPRET
;
6433 decreaseCurrent(pAsm
, FC_LOOP
);
6438 void add_return_inst(r700_AssemblerBase
*pAsm
)
6440 if(GL_FALSE
== add_cf_instruction(pAsm
) )
6445 if(8 == pAsm
->unAsic
)
6447 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6449 EG_CF_WORD1__POP_COUNT_shift
, EG_CF_WORD1__POP_COUNT_mask
);
6450 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6452 EG_CF_WORD1__CF_INST_shift
, EG_CF_WORD1__CF_INST_mask
);
6453 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6455 EG_CF_WORD1__CF_CONST_shift
, EG_CF_WORD1__CF_CONST_mask
);
6456 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6458 EG_CF_WORD1__COND_shift
, EG_CF_WORD1__COND_mask
);
6459 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6461 EG_CF_WORD1__EOP_shift
, EG_CF_WORD1__EOP_bit
);
6462 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6464 EG_CF_WORD1__VPM_shift
, EG_CF_WORD1__VPM_bit
);
6465 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6467 EG_CF_WORD1__WQM_shift
, EG_CF_WORD1__WQM_bit
);
6468 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6470 EG_CF_WORD1__BARRIER_shift
, EG_CF_WORD1__BARRIER_bit
);
6471 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6473 EG_CF_WORD1__COUNT_shift
, EG_CF_WORD1__COUNT_mask
);
6477 //pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
6478 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 0;
6479 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
6480 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
6482 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
6483 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
6484 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_RETURN
;
6485 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
6487 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
6491 GLboolean
assemble_BGNSUB(r700_AssemblerBase
*pAsm
, GLint nILindex
, GLuint uiIL_Shift
)
6494 if( (pAsm
->unSubArrayPointer
+ 1) > pAsm
->unSubArraySize
)
6496 pAsm
->subs
= (SUB_OFFSET
*)_mesa_realloc( (void *)pAsm
->subs
,
6497 sizeof(SUB_OFFSET
) * pAsm
->unSubArraySize
,
6498 sizeof(SUB_OFFSET
) * (pAsm
->unSubArraySize
+ 10) );
6499 if(NULL
== pAsm
->subs
)
6503 pAsm
->unSubArraySize
+= 10;
6506 pAsm
->subs
[pAsm
->unSubArrayPointer
].subIL_Offset
= nILindex
+ uiIL_Shift
;
6507 pAsm
->subs
[pAsm
->unSubArrayPointer
].lstCFInstructions_local
.pHead
=NULL
;
6508 pAsm
->subs
[pAsm
->unSubArrayPointer
].lstCFInstructions_local
.pTail
=NULL
;
6509 pAsm
->subs
[pAsm
->unSubArrayPointer
].lstCFInstructions_local
.uNumOfNode
=0;
6512 pAsm
->CALLSTACK
[pAsm
->CALLSP
].subDescIndex
= pAsm
->unSubArrayPointer
;
6513 pAsm
->CALLSTACK
[pAsm
->CALLSP
].FCSP_BeforeEntry
= pAsm
->FCSP
;
6514 pAsm
->CALLSTACK
[pAsm
->CALLSP
].plstCFInstructions_local
6515 = &(pAsm
->subs
[pAsm
->unSubArrayPointer
].lstCFInstructions_local
);
6516 pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
= 0;
6517 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
= 0;
6518 SetActiveCFlist(pAsm
->pR700Shader
,
6519 pAsm
->CALLSTACK
[pAsm
->CALLSP
].plstCFInstructions_local
);
6521 pAsm
->unSubArrayPointer
++;
6524 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
6527 pAsm
->fc_stack
[pAsm
->FCSP
].type
= FC_REP
;
6529 checkStackDepth(pAsm
, FC_REP
, GL_FALSE
);
6534 GLboolean
assemble_ENDSUB(r700_AssemblerBase
*pAsm
)
6536 if(pAsm
->fc_stack
[pAsm
->FCSP
].type
!= FC_REP
)
6538 radeon_error("BGNSUB/ENDSUB in shader code are not paired. \n");
6542 /* copy max to sub structure */
6543 pAsm
->subs
[pAsm
->CALLSTACK
[pAsm
->CALLSP
].subDescIndex
].unStackDepthMax
6544 = pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
;
6546 decreaseCurrent(pAsm
, FC_REP
);
6549 SetActiveCFlist(pAsm
->pR700Shader
,
6550 pAsm
->CALLSTACK
[pAsm
->CALLSP
].plstCFInstructions_local
);
6552 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
6559 GLboolean
assemble_RET(r700_AssemblerBase
*pAsm
)
6563 if(pAsm
->CALLSP
> 0)
6566 for(unFCSP
=pAsm
->FCSP
; unFCSP
>pAsm
->CALLSTACK
[pAsm
->CALLSP
].FCSP_BeforeEntry
; unFCSP
--)
6568 if(FC_LOOP
== pAsm
->fc_stack
[unFCSP
].type
)
6570 setRetInLoopFlag(pAsm
, SQ_SEL_1
);
6571 breakLoopOnFlag(pAsm
, unFCSP
);
6572 pAsm
->unCFflags
|= LOOPRET_FLAGS
;
6576 else if(FC_IF
== pAsm
->fc_stack
[unFCSP
].type
)
6583 #ifdef USE_CF_FOR_POP_AFTER
6588 #endif /* USE_CF_FOR_POP_AFTER */
6590 add_return_inst(pAsm
);
6595 GLboolean
assemble_CAL(r700_AssemblerBase
*pAsm
,
6598 GLuint uiNumberInsts
,
6599 struct prog_instruction
*pILInst
,
6600 PRESUB_DESC
* pPresubDesc
)
6604 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
6606 if(GL_FALSE
== add_cf_instruction(pAsm
) )
6611 if(8 == pAsm
->unAsic
)
6613 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6615 EG_CF_WORD1__POP_COUNT_shift
, EG_CF_WORD1__POP_COUNT_mask
);
6616 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6618 EG_CF_WORD1__CF_INST_shift
, EG_CF_WORD1__CF_INST_mask
);
6619 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6621 EG_CF_WORD1__CF_CONST_shift
, EG_CF_WORD1__CF_CONST_mask
);
6622 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6624 EG_CF_WORD1__COND_shift
, EG_CF_WORD1__COND_mask
);
6625 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6627 EG_CF_WORD1__EOP_shift
, EG_CF_WORD1__EOP_bit
);
6628 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6630 EG_CF_WORD1__VPM_shift
, EG_CF_WORD1__VPM_bit
);
6631 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6633 EG_CF_WORD1__WQM_shift
, EG_CF_WORD1__WQM_bit
);
6634 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6636 EG_CF_WORD1__BARRIER_shift
, EG_CF_WORD1__BARRIER_bit
);
6637 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6639 EG_CF_WORD1__COUNT_shift
, EG_CF_WORD1__COUNT_mask
);
6643 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.call_count
= 1;
6644 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 0;
6645 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
6646 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
6648 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
6649 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
6650 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_CALL
;
6651 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
6653 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
6657 if( (pAsm
->unCallerArrayPointer
+ 1) > pAsm
->unCallerArraySize
)
6659 pAsm
->callers
= (CALLER_POINTER
*)_mesa_realloc( (void *)pAsm
->callers
,
6660 sizeof(CALLER_POINTER
) * pAsm
->unCallerArraySize
,
6661 sizeof(CALLER_POINTER
) * (pAsm
->unCallerArraySize
+ 10) );
6662 if(NULL
== pAsm
->callers
)
6666 pAsm
->unCallerArraySize
+= 10;
6669 uiIL_Offset
= nILindex
+ uiIL_Shift
;
6670 pAsm
->callers
[pAsm
->unCallerArrayPointer
].subIL_Offset
= uiIL_Offset
;
6671 pAsm
->callers
[pAsm
->unCallerArrayPointer
].cf_ptr
= pAsm
->cf_current_cf_clause_ptr
;
6673 pAsm
->callers
[pAsm
->unCallerArrayPointer
].finale_cf_ptr
= NULL
;
6674 pAsm
->callers
[pAsm
->unCallerArrayPointer
].prelude_cf_ptr
= NULL
;
6676 pAsm
->unCallerArrayPointer
++;
6682 for(j
=0; j
<pAsm
->unSubArrayPointer
; j
++)
6684 if(uiIL_Offset
== pAsm
->subs
[j
].subIL_Offset
)
6685 { /* compiled before */
6687 max
= pAsm
->subs
[j
].unStackDepthMax
6688 + pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
;
6689 if(max
> pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
)
6691 pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
= max
;
6694 pAsm
->callers
[pAsm
->unCallerArrayPointer
- 1].subDescIndex
= j
;
6699 pAsm
->callers
[pAsm
->unCallerArrayPointer
- 1].subDescIndex
= pAsm
->unSubArrayPointer
;
6700 unSubID
= pAsm
->unSubArrayPointer
;
6702 bRet
= AssembleInstr(nILindex
, uiIL_Shift
, uiNumberInsts
, pILInst
, pAsm
);
6706 max
= pAsm
->subs
[unSubID
].unStackDepthMax
6707 + pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
;
6708 if(max
> pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
)
6710 pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
= max
;
6713 pAsm
->subs
[unSubID
].pPresubDesc
= pPresubDesc
;
6719 GLboolean
setRetInLoopFlag(r700_AssemblerBase
*pAsm
, GLuint flagValue
)
6721 /*GLfloat fLiteral[2] = {0.1, 0.0};*/
6723 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
6724 pAsm
->D
.dst
.op3
= 0;
6725 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
6726 pAsm
->D
.dst
.reg
= pAsm
->flag_reg_index
;
6727 pAsm
->D
.dst
.writex
= 1;
6728 pAsm
->D
.dst
.writey
= 0;
6729 pAsm
->D
.dst
.writez
= 0;
6730 pAsm
->D
.dst
.writew
= 0;
6731 pAsm
->D2
.dst2
.literal_slots
= 1;
6732 pAsm
->D2
.dst2
.SaturateMode
= SATURATE_OFF
;
6733 pAsm
->D
.dst
.predicated
= 0;
6734 /* in reloc where dislink flag init inst, only one slot alu inst is handled. */
6735 pAsm
->D
.dst
.math
= 1; /* TODO : not math really, but one channel op, more generic alu assembler needed */
6736 pAsm
->D2
.dst2
.index_mode
= SQ_INDEX_LOOP
; /* Check this ! */
6738 pAsm
->S
[0].src
.rtype
= SRC_REC_LITERAL
;
6739 //pAsm->S[0].src.reg = 0;
6740 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
6741 noneg_PVSSRC(&(pAsm
->S
[0].src
));
6742 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_X
;
6743 pAsm
->S
[0].src
.swizzley
= SQ_SEL_Y
;
6744 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_Z
;
6745 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_W
;
6747 if( GL_FALSE
== next_ins_literal(pAsm
, &(fLiteral
[0])) )
6752 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
6753 pAsm
->S
[0].src
.reg
= 0;
6754 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
6755 noneg_PVSSRC(&(pAsm
->S
[0].src
));
6756 pAsm
->S
[0].src
.swizzlex
= flagValue
;
6757 pAsm
->S
[0].src
.swizzley
= flagValue
;
6758 pAsm
->S
[0].src
.swizzlez
= flagValue
;
6759 pAsm
->S
[0].src
.swizzlew
= flagValue
;
6761 if( GL_FALSE
== next_ins(pAsm
) )
6770 GLboolean
testFlag(r700_AssemblerBase
*pAsm
)
6772 /*GLfloat fLiteral[2] = {0.1, 0.0};*/
6775 GLuint tmp
= gethelpr(pAsm
);
6776 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
6778 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_PRED_SETE
;
6779 pAsm
->D
.dst
.math
= 1;
6780 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
6781 pAsm
->D
.dst
.reg
= tmp
;
6782 pAsm
->D
.dst
.writex
= 1;
6783 pAsm
->D
.dst
.writey
= 0;
6784 pAsm
->D
.dst
.writez
= 0;
6785 pAsm
->D
.dst
.writew
= 0;
6786 pAsm
->D2
.dst2
.literal_slots
= 1;
6787 pAsm
->D2
.dst2
.SaturateMode
= SATURATE_OFF
;
6788 pAsm
->D
.dst
.predicated
= 1;
6789 pAsm
->D2
.dst2
.index_mode
= SQ_INDEX_LOOP
; /* Check this ! */
6791 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
6792 pAsm
->S
[0].src
.reg
= pAsm
->flag_reg_index
;
6793 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
6794 noneg_PVSSRC(&(pAsm
->S
[0].src
));
6795 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_X
;
6796 pAsm
->S
[0].src
.swizzley
= SQ_SEL_Y
;
6797 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_Z
;
6798 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_W
;
6800 pAsm
->S
[1].src
.rtype
= SRC_REC_LITERAL
;
6801 //pAsm->S[1].src.reg = 0;
6802 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
6803 noneg_PVSSRC(&(pAsm
->S
[1].src
));
6804 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_X
;
6805 pAsm
->S
[1].src
.swizzley
= SQ_SEL_Y
;
6806 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_Z
;
6807 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_W
;
6809 if( GL_FALSE
== next_ins_literal(pAsm
, &(fLiteral
[0])) )
6814 pAsm
->S
[1].src
.rtype
= DST_REG_TEMPORARY
;
6815 pAsm
->S
[1].src
.reg
= 0;
6816 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
6817 noneg_PVSSRC(&(pAsm
->S
[1].src
));
6818 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_1
;
6819 pAsm
->S
[1].src
.swizzley
= SQ_SEL_1
;
6820 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_1
;
6821 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_1
;
6823 if( GL_FALSE
== next_ins(pAsm
) )
6829 checkStackDepth(pAsm
, FC_PUSH_VPM
, GL_TRUE
);
6834 GLboolean
returnOnFlag(r700_AssemblerBase
*pAsm
, GLuint unIF
)
6837 jumpToOffest(pAsm
, 1, 4);
6838 setRetInLoopFlag(pAsm
, SQ_SEL_0
);
6839 pops(pAsm
, unIF
+ 1);
6840 add_return_inst(pAsm
);
6845 GLboolean
breakLoopOnFlag(r700_AssemblerBase
*pAsm
, GLuint unFCSP
)
6850 if(GL_FALSE
== add_cf_instruction(pAsm
) )
6855 if(8 == pAsm
->unAsic
)
6857 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6859 EG_CF_WORD1__POP_COUNT_shift
, EG_CF_WORD1__POP_COUNT_mask
);
6860 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6861 EG_CF_INST_LOOP_BREAK
,
6862 EG_CF_WORD1__CF_INST_shift
, EG_CF_WORD1__CF_INST_mask
);
6863 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6865 EG_CF_WORD1__CF_CONST_shift
, EG_CF_WORD1__CF_CONST_mask
);
6866 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6868 EG_CF_WORD1__COND_shift
, EG_CF_WORD1__COND_mask
);
6869 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6871 EG_CF_WORD1__EOP_shift
, EG_CF_WORD1__EOP_bit
);
6872 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6874 EG_CF_WORD1__VPM_shift
, EG_CF_WORD1__VPM_bit
);
6875 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6877 EG_CF_WORD1__WQM_shift
, EG_CF_WORD1__WQM_bit
);
6878 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6880 EG_CF_WORD1__BARRIER_shift
, EG_CF_WORD1__BARRIER_bit
);
6881 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6883 EG_CF_WORD1__COUNT_shift
, EG_CF_WORD1__COUNT_mask
);
6887 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
6888 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
6889 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
6891 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
6892 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
6893 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_LOOP_BREAK
;
6894 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
6896 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
6899 pAsm
->fc_stack
[unFCSP
].mid
= (R700ControlFlowGenericClause
**)_mesa_realloc(
6900 (void *)pAsm
->fc_stack
[unFCSP
].mid
,
6901 sizeof(R700ControlFlowGenericClause
*) * pAsm
->fc_stack
[unFCSP
].unNumMid
,
6902 sizeof(R700ControlFlowGenericClause
*) * (pAsm
->fc_stack
[unFCSP
].unNumMid
+ 1) );
6903 pAsm
->fc_stack
[unFCSP
].mid
[pAsm
->fc_stack
[unFCSP
].unNumMid
] = pAsm
->cf_current_cf_clause_ptr
;
6904 pAsm
->fc_stack
[unFCSP
].unNumMid
++;
6911 GLboolean
AssembleInstr(GLuint uiFirstInst
,
6913 GLuint uiNumberInsts
,
6914 struct prog_instruction
*pILInst
,
6915 r700_AssemblerBase
*pR700AsmCode
)
6919 pR700AsmCode
->pILInst
= pILInst
;
6920 for(i
=uiFirstInst
; i
<uiNumberInsts
; i
++)
6922 pR700AsmCode
->uiCurInst
= i
;
6924 #ifndef USE_CF_FOR_CONTINUE_BREAK
6925 if(OPCODE_BRK
== pILInst
[i
+1].Opcode
)
6927 switch(pILInst
[i
].Opcode
)
6930 pILInst
[i
].Opcode
= OPCODE_SGT
;
6933 pILInst
[i
].Opcode
= OPCODE_SGE
;
6936 pILInst
[i
].Opcode
= OPCODE_SLT
;
6939 pILInst
[i
].Opcode
= OPCODE_SLE
;
6942 pILInst
[i
].Opcode
= OPCODE_SNE
;
6945 pILInst
[i
].Opcode
= OPCODE_SEQ
;
6952 if(pILInst
[i
].CondUpdate
== 1)
6954 /* remember dest register used for cond evaluation */
6955 /* XXX also handle PROGRAM_OUTPUT registers here? */
6956 pR700AsmCode
->last_cond_register
= pILInst
[i
].DstReg
.Index
;
6959 switch (pILInst
[i
].Opcode
)
6962 if ( GL_FALSE
== assemble_ABS(pR700AsmCode
) )
6967 if ( GL_FALSE
== assemble_ADD(pR700AsmCode
) )
6972 if ( GL_FALSE
== assemble_ARL(pR700AsmCode
) )
6976 radeon_error("Not yet implemented instruction OPCODE_ARR \n");
6977 //if ( GL_FALSE == assemble_BAD("ARR") )
6982 if ( GL_FALSE
== assemble_CMP(pR700AsmCode
) )
6986 if(8 == pR700AsmCode
->unAsic
)
6988 if ( GL_FALSE
== assemble_TRIG(pR700AsmCode
, EG_OP2_INST_COS
) )
6993 if ( GL_FALSE
== assemble_TRIG(pR700AsmCode
, SQ_OP2_INST_COS
) )
7002 if ( GL_FALSE
== assemble_DOT(pR700AsmCode
) )
7007 if ( GL_FALSE
== assemble_DST(pR700AsmCode
) )
7012 if ( GL_FALSE
== assemble_EX2(pR700AsmCode
) )
7016 if ( GL_FALSE
== assemble_EXP(pR700AsmCode
) )
7021 if ( GL_FALSE
== assemble_FLR(pR700AsmCode
) )
7024 //case OP_FLR_INT: ;
7026 // if ( GL_FALSE == assemble_FLR_INT() )
7031 if ( GL_FALSE
== assemble_FRC(pR700AsmCode
) )
7037 if ( GL_FALSE
== assemble_KIL(pR700AsmCode
, SQ_OP2_INST_KILLGT
) )
7041 if ( GL_FALSE
== assemble_LG2(pR700AsmCode
) )
7045 if ( GL_FALSE
== assemble_LIT(pR700AsmCode
) )
7049 if ( GL_FALSE
== assemble_LRP(pR700AsmCode
) )
7053 if ( GL_FALSE
== assemble_LOG(pR700AsmCode
) )
7058 if ( GL_FALSE
== assemble_MAD(pR700AsmCode
) )
7062 if ( GL_FALSE
== assemble_MAX(pR700AsmCode
) )
7066 if ( GL_FALSE
== assemble_MIN(pR700AsmCode
) )
7071 if ( GL_FALSE
== assemble_MOV(pR700AsmCode
) )
7075 if ( GL_FALSE
== assemble_MUL(pR700AsmCode
) )
7081 callPreSub(pR700AsmCode
,
7084 pILInst
->DstReg
.Index
+ pR700AsmCode
->starting_temp_register_number
,
7086 radeon_error("noise1: not yet supported shader instruction\n");
7090 radeon_error("noise2: not yet supported shader instruction\n");
7093 radeon_error("noise3: not yet supported shader instruction\n");
7096 radeon_error("noise4: not yet supported shader instruction\n");
7100 if ( GL_FALSE
== assemble_POW(pR700AsmCode
) )
7104 if ( GL_FALSE
== assemble_RCP(pR700AsmCode
) )
7108 if ( GL_FALSE
== assemble_RSQ(pR700AsmCode
) )
7112 if(8 == pR700AsmCode
->unAsic
)
7114 if ( GL_FALSE
== assemble_TRIG(pR700AsmCode
, EG_OP2_INST_SIN
) )
7119 if ( GL_FALSE
== assemble_TRIG(pR700AsmCode
, SQ_OP2_INST_SIN
) )
7124 if ( GL_FALSE
== assemble_SCS(pR700AsmCode
) )
7129 if ( GL_FALSE
== assemble_LOGIC(pR700AsmCode
, SQ_OP2_INST_SETE
) )
7136 if ( GL_FALSE
== assemble_LOGIC(pR700AsmCode
, SQ_OP2_INST_SETGT
) )
7143 if ( GL_FALSE
== assemble_SGE(pR700AsmCode
) )
7149 /* NO LT, LE, TODO : use GE => LE, GT => LT : reverse 2 src order would be simpliest. Or use SQ_CF_COND_FALSE for SQ_CF_COND_ACTIVE.*/
7152 struct prog_src_register SrcRegSave
[2];
7153 SrcRegSave
[0] = pILInst
[i
].SrcReg
[0];
7154 SrcRegSave
[1] = pILInst
[i
].SrcReg
[1];
7155 pILInst
[i
].SrcReg
[0] = SrcRegSave
[1];
7156 pILInst
[i
].SrcReg
[1] = SrcRegSave
[0];
7157 if ( GL_FALSE
== assemble_LOGIC(pR700AsmCode
, SQ_OP2_INST_SETGT
) )
7159 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
7160 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
7163 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
7164 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
7170 struct prog_src_register SrcRegSave
[2];
7171 SrcRegSave
[0] = pILInst
[i
].SrcReg
[0];
7172 SrcRegSave
[1] = pILInst
[i
].SrcReg
[1];
7173 pILInst
[i
].SrcReg
[0] = SrcRegSave
[1];
7174 pILInst
[i
].SrcReg
[1] = SrcRegSave
[0];
7175 if ( GL_FALSE
== assemble_LOGIC(pR700AsmCode
, SQ_OP2_INST_SETGE
) )
7177 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
7178 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
7181 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
7182 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
7187 if ( GL_FALSE
== assemble_LOGIC(pR700AsmCode
, SQ_OP2_INST_SETNE
) )
7194 // if ( GL_FALSE == assemble_STP(pR700AsmCode) )
7199 if ( GL_FALSE
== assemble_SSG(pR700AsmCode
) )
7206 if ( GL_FALSE
== assemble_MOV(pR700AsmCode
) )
7212 if( (i
+1)<uiNumberInsts
)
7214 if(OPCODE_END
!= pILInst
[i
+1].Opcode
)
7216 if( GL_TRUE
== IsTex(pILInst
[i
+1].Opcode
) )
7218 pR700AsmCode
->pInstDeps
[i
+1].nDstDep
= i
+1; //=1?
7230 if ( GL_FALSE
== assemble_TEX(pR700AsmCode
) )
7235 if ( GL_FALSE
== assemble_math_function(pR700AsmCode
, SQ_OP2_INST_TRUNC
) )
7240 if ( GL_FALSE
== assemble_XPD(pR700AsmCode
) )
7246 GLboolean bHasElse
= GL_FALSE
;
7248 if(pILInst
[pILInst
[i
].BranchTarget
].Opcode
== OPCODE_ELSE
)
7253 if ( GL_FALSE
== assemble_IF(pR700AsmCode
, bHasElse
) )
7261 if ( GL_FALSE
== assemble_ELSE(pR700AsmCode
) )
7266 if ( GL_FALSE
== assemble_ENDIF(pR700AsmCode
) )
7270 case OPCODE_BGNLOOP
:
7271 if( GL_FALSE
== assemble_BGNLOOP(pR700AsmCode
) )
7278 if( GL_FALSE
== assemble_BRK(pR700AsmCode
) )
7285 if( GL_FALSE
== assemble_CONT(pR700AsmCode
) )
7291 case OPCODE_ENDLOOP
:
7292 if( GL_FALSE
== assemble_ENDLOOP(pR700AsmCode
) )
7299 if( GL_FALSE
== assemble_BGNSUB(pR700AsmCode
, i
, uiIL_Shift
) )
7306 if( GL_FALSE
== assemble_RET(pR700AsmCode
) )
7313 if( GL_FALSE
== assemble_CAL(pR700AsmCode
,
7314 pILInst
[i
].BranchTarget
,
7324 //case OPCODE_EXPORT:
7325 // if ( GL_FALSE == assemble_EXPORT() )
7330 return assemble_ENDSUB(pR700AsmCode
);
7333 //pR700AsmCode->uiCurInst = i;
7334 //This is to remaind that if in later exoort there is depth/stencil
7335 //export, we need a mov to re-arrange DST channel, where using a
7336 //psuedo inst, we will use this end inst to do it.
7340 radeon_error("r600: unknown instruction %d\n", pILInst
[i
].Opcode
);
7348 GLboolean
InitShaderProgram(r700_AssemblerBase
* pAsm
)
7350 #ifndef GENERATE_SHADER_FOR_2D
7351 setRetInLoopFlag(pAsm
, SQ_SEL_0
);
7354 if((SPT_FP
== pAsm
->currentShaderType
) && (8 == pAsm
->unAsic
))
7356 EG_add_ps_interp(pAsm
);
7359 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
7363 GLboolean
RelocProgram(r700_AssemblerBase
* pAsm
, struct gl_program
* pILProg
)
7367 TypedShaderList
* plstCFmain
;
7368 TypedShaderList
* plstCFsub
;
7370 R700ShaderInstruction
* pInst
;
7371 R700ControlFlowGenericClause
* pCFInst
;
7373 R700ControlFlowALUClause
* pCF_ALU
;
7374 R700ALUInstruction
* pALU
;
7375 GLuint unConstOffset
= 0;
7377 GLuint unMinRegIndex
;
7379 plstCFmain
= pAsm
->CALLSTACK
[0].plstCFInstructions_local
;
7381 #ifndef GENERATE_SHADER_FOR_2D
7382 /* remove flags init if they are not used */
7383 if((pAsm
->unCFflags
& HAS_LOOPRET
) == 0)
7385 R700ControlFlowALUClause
* pCF_ALU
;
7386 pInst
= plstCFmain
->pHead
;
7389 if(SIT_CF_ALU
== pInst
->m_ShaderInstType
)
7391 pCF_ALU
= (R700ControlFlowALUClause
*)pInst
;
7392 if(0 == pCF_ALU
->m_Word1
.f
.count
)
7394 pCF_ALU
->m_Word1
.f
.cf_inst
= SQ_CF_INST_NOP
;
7398 R700ALUInstruction
* pALU
= pCF_ALU
->m_pLinkedALUInstruction
;
7400 pALU
->m_pLinkedALUClause
= NULL
;
7401 pALU
= (R700ALUInstruction
*)(pALU
->pNextInst
);
7402 pALU
->m_pLinkedALUClause
= pCF_ALU
;
7403 pCF_ALU
->m_pLinkedALUInstruction
= pALU
;
7405 pCF_ALU
->m_Word1
.f
.count
--;
7409 pInst
= pInst
->pNextInst
;
7412 #endif /* GENERATE_SHADER_FOR_2D */
7414 if(pAsm
->CALLSTACK
[0].max
> 0)
7416 pAsm
->pR700Shader
->uStackSize
= ((pAsm
->CALLSTACK
[0].max
+ 3)>>2) + 2;
7419 if(0 == pAsm
->unSubArrayPointer
)
7424 unCFoffset
= plstCFmain
->uNumOfNode
;
7426 if(NULL
!= pILProg
->Parameters
)
7428 unConstOffset
= pILProg
->Parameters
->NumParameters
;
7432 for(i
=0; i
<pAsm
->unSubArrayPointer
; i
++)
7434 pAsm
->subs
[i
].unCFoffset
= unCFoffset
;
7435 plstCFsub
= &(pAsm
->subs
[i
].lstCFInstructions_local
);
7437 pInst
= plstCFsub
->pHead
;
7439 /* reloc instructions */
7442 if(SIT_CF_GENERIC
== pInst
->m_ShaderInstType
)
7444 pCFInst
= (R700ControlFlowGenericClause
*)pInst
;
7446 switch (pCFInst
->m_Word1
.f
.cf_inst
)
7448 case SQ_CF_INST_POP
:
7449 case SQ_CF_INST_JUMP
:
7450 case SQ_CF_INST_ELSE
:
7451 case SQ_CF_INST_LOOP_END
:
7452 case SQ_CF_INST_LOOP_START
:
7453 case SQ_CF_INST_LOOP_START_NO_AL
:
7454 case SQ_CF_INST_LOOP_CONTINUE
:
7455 case SQ_CF_INST_LOOP_BREAK
:
7456 pCFInst
->m_Word0
.f
.addr
+= unCFoffset
;
7463 pInst
->m_uIndex
+= unCFoffset
;
7465 pInst
= pInst
->pNextInst
;
7468 if(NULL
!= pAsm
->subs
[i
].pPresubDesc
)
7472 unMinRegIndex
= pAsm
->subs
[i
].pPresubDesc
->pCompiledSub
->MinRegIndex
;
7473 unRegOffset
= pAsm
->subs
[i
].pPresubDesc
->maxStartReg
;
7474 unConstOffset
+= pAsm
->subs
[i
].pPresubDesc
->unConstantsStart
;
7476 pInst
= plstCFsub
->pHead
;
7479 if(SIT_CF_ALU
== pInst
->m_ShaderInstType
)
7481 pCF_ALU
= (R700ControlFlowALUClause
*)pInst
;
7483 pALU
= pCF_ALU
->m_pLinkedALUInstruction
;
7484 for(int j
=0; j
<=pCF_ALU
->m_Word1
.f
.count
; j
++)
7486 pALU
->m_Word1
.f
.dst_gpr
= pALU
->m_Word1
.f
.dst_gpr
+ unRegOffset
- unMinRegIndex
;
7488 if(pALU
->m_Word0
.f
.src0_sel
< SQ_ALU_SRC_GPR_SIZE
)
7490 pALU
->m_Word0
.f
.src0_sel
= pALU
->m_Word0
.f
.src0_sel
+ unRegOffset
- unMinRegIndex
;
7492 else if(pALU
->m_Word0
.f
.src0_sel
>= SQ_ALU_SRC_CFILE_BASE
)
7494 pALU
->m_Word0
.f
.src0_sel
+= unConstOffset
;
7497 if( ((pALU
->m_Word1
.val
>> SQ_ALU_WORD1_OP3_ALU_INST_SHIFT
) & 0x0000001F)
7498 >= SQ_OP3_INST_MUL_LIT
)
7499 { /* op3 : 3 srcs */
7500 if(pALU
->m_Word1_OP3
.f
.src2_sel
< SQ_ALU_SRC_GPR_SIZE
)
7502 pALU
->m_Word1_OP3
.f
.src2_sel
= pALU
->m_Word1_OP3
.f
.src2_sel
+ unRegOffset
- unMinRegIndex
;
7504 else if(pALU
->m_Word1_OP3
.f
.src2_sel
>= SQ_ALU_SRC_CFILE_BASE
)
7506 pALU
->m_Word1_OP3
.f
.src2_sel
+= unConstOffset
;
7508 if(pALU
->m_Word0
.f
.src1_sel
< SQ_ALU_SRC_GPR_SIZE
)
7510 pALU
->m_Word0
.f
.src1_sel
= pALU
->m_Word0
.f
.src1_sel
+ unRegOffset
- unMinRegIndex
;
7512 else if(pALU
->m_Word0
.f
.src1_sel
>= SQ_ALU_SRC_CFILE_BASE
)
7514 pALU
->m_Word0
.f
.src1_sel
+= unConstOffset
;
7519 if(8 == pAsm
->unAsic
)
7521 uNumSrc
= EG_GetNumOperands(pALU
->m_Word1_OP2
.f
.alu_inst
, 0);
7527 uNumSrc
= r700GetNumOperands(pALU
->m_Word1_OP2
.f6
.alu_inst
, 0);
7531 uNumSrc
= r700GetNumOperands(pALU
->m_Word1_OP2
.f
.alu_inst
, 0);
7536 if(pALU
->m_Word0
.f
.src1_sel
< SQ_ALU_SRC_GPR_SIZE
)
7538 pALU
->m_Word0
.f
.src1_sel
= pALU
->m_Word0
.f
.src1_sel
+ unRegOffset
- unMinRegIndex
;
7540 else if(pALU
->m_Word0
.f
.src1_sel
>= SQ_ALU_SRC_CFILE_BASE
)
7542 pALU
->m_Word0
.f
.src1_sel
+= unConstOffset
;
7546 pALU
= (R700ALUInstruction
*)(pALU
->pNextInst
);
7549 pInst
= pInst
->pNextInst
;
7553 /* Put sub into main */
7554 plstCFmain
->pTail
->pNextInst
= plstCFsub
->pHead
;
7555 plstCFmain
->pTail
= plstCFsub
->pTail
;
7556 plstCFmain
->uNumOfNode
+= plstCFsub
->uNumOfNode
;
7558 unCFoffset
+= plstCFsub
->uNumOfNode
;
7562 for(i
=0; i
<pAsm
->unCallerArrayPointer
; i
++)
7564 pAsm
->callers
[i
].cf_ptr
->m_Word0
.f
.addr
7565 = pAsm
->subs
[pAsm
->callers
[i
].subDescIndex
].unCFoffset
;
7567 if(NULL
!= pAsm
->subs
[pAsm
->callers
[i
].subDescIndex
].pPresubDesc
)
7569 unMinRegIndex
= pAsm
->subs
[pAsm
->callers
[i
].subDescIndex
].pPresubDesc
->pCompiledSub
->MinRegIndex
;
7570 unRegOffset
= pAsm
->subs
[pAsm
->callers
[i
].subDescIndex
].pPresubDesc
->maxStartReg
;
7572 if(NULL
!= pAsm
->callers
[i
].prelude_cf_ptr
)
7574 pCF_ALU
= (R700ControlFlowALUClause
* )(pAsm
->callers
[i
].prelude_cf_ptr
);
7575 pALU
= pCF_ALU
->m_pLinkedALUInstruction
;
7576 for(int j
=0; j
<=pCF_ALU
->m_Word1
.f
.count
; j
++)
7578 pALU
->m_Word1
.f
.dst_gpr
= pALU
->m_Word1
.f
.dst_gpr
+ unRegOffset
- unMinRegIndex
;
7579 pALU
= (R700ALUInstruction
*)(pALU
->pNextInst
);
7582 if(NULL
!= pAsm
->callers
[i
].finale_cf_ptr
)
7584 pCF_ALU
= (R700ControlFlowALUClause
* )(pAsm
->callers
[i
].finale_cf_ptr
);
7585 pALU
= pCF_ALU
->m_pLinkedALUInstruction
;
7586 for(int j
=0; j
<=pCF_ALU
->m_Word1
.f
.count
; j
++)
7588 pALU
->m_Word0
.f
.src0_sel
= pALU
->m_Word0
.f
.src0_sel
+ unRegOffset
- unMinRegIndex
;
7589 pALU
= (R700ALUInstruction
*)(pALU
->pNextInst
);
7598 GLboolean
callPreSub(r700_AssemblerBase
* pAsm
,
7599 LOADABLE_SCRIPT_SIGNITURE scriptSigniture
,
7600 COMPILED_SUB
* pCompiledSub
,
7602 GLshort uNumValidSrc
)
7604 /* save assemble context */
7605 GLuint starting_temp_register_number_save
;
7606 GLuint number_used_registers_save
;
7607 GLuint uFirstHelpReg_save
;
7608 GLuint uHelpReg_save
;
7609 GLuint uiCurInst_save
;
7610 struct prog_instruction
*pILInst_save
;
7611 PRESUB_DESC
* pPresubDesc
;
7615 R700ControlFlowGenericClause
* prelude_cf_ptr
= NULL
;
7617 /* copy srcs to presub inputs */
7618 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
7619 for(i
=0; i
<uNumValidSrc
; i
++)
7621 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
7622 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
7623 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
7624 pAsm
->D
.dst
.reg
= pCompiledSub
->srcRegIndex
[i
];
7625 pAsm
->D
.dst
.writex
= 1;
7626 pAsm
->D
.dst
.writey
= 1;
7627 pAsm
->D
.dst
.writez
= 1;
7628 pAsm
->D
.dst
.writew
= 1;
7630 if( GL_FALSE
== assemble_src(pAsm
, i
, 0) )
7637 if(uNumValidSrc
> 0)
7639 prelude_cf_ptr
= (R700ControlFlowGenericClause
*) pAsm
->cf_current_alu_clause_ptr
;
7640 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
7643 /* browse thro existing presubs. */
7644 for(i
=0; i
<pAsm
->unNumPresub
; i
++)
7646 if(pAsm
->presubs
[i
].sptSigniture
== scriptSigniture
)
7652 if(i
== pAsm
->unNumPresub
)
7653 { /* not loaded yet */
7654 /* save assemble context */
7655 number_used_registers_save
= pAsm
->number_used_registers
;
7656 uFirstHelpReg_save
= pAsm
->uFirstHelpReg
;
7657 uHelpReg_save
= pAsm
->uHelpReg
;
7658 starting_temp_register_number_save
= pAsm
->starting_temp_register_number
;
7659 pILInst_save
= pAsm
->pILInst
;
7660 uiCurInst_save
= pAsm
->uiCurInst
;
7662 /* alloc in presub */
7663 if( (pAsm
->unNumPresub
+ 1) > pAsm
->unPresubArraySize
)
7665 pAsm
->presubs
= (PRESUB_DESC
*)_mesa_realloc( (void *)pAsm
->presubs
,
7666 sizeof(PRESUB_DESC
) * pAsm
->unPresubArraySize
,
7667 sizeof(PRESUB_DESC
) * (pAsm
->unPresubArraySize
+ 4) );
7668 if(NULL
== pAsm
->presubs
)
7670 radeon_error("No memeory to allocate built in shader function description structures. \n");
7673 pAsm
->unPresubArraySize
+= 4;
7676 pPresubDesc
= &(pAsm
->presubs
[i
]);
7677 pPresubDesc
->sptSigniture
= scriptSigniture
;
7679 /* constants offsets need to be final resolved at reloc. */
7680 if(0 == pAsm
->unNumPresub
)
7682 pPresubDesc
->unConstantsStart
= 0;
7686 pPresubDesc
->unConstantsStart
= pAsm
->presubs
[i
-1].unConstantsStart
7687 + pAsm
->presubs
[i
-1].pCompiledSub
->NumParameters
;
7690 pPresubDesc
->pCompiledSub
= pCompiledSub
;
7692 pPresubDesc
->subIL_Shift
= pAsm
->unCurNumILInsts
;
7693 pPresubDesc
->maxStartReg
= uFirstHelpReg_save
;
7694 pAsm
->unCurNumILInsts
+= pCompiledSub
->NumInstructions
;
7696 pAsm
->unNumPresub
++;
7698 /* setup new assemble context */
7699 pAsm
->starting_temp_register_number
= 0;
7700 pAsm
->number_used_registers
= pCompiledSub
->NumTemporaries
;
7701 pAsm
->uFirstHelpReg
= pAsm
->number_used_registers
;
7702 pAsm
->uHelpReg
= pAsm
->uFirstHelpReg
;
7704 bRet
= assemble_CAL(pAsm
,
7706 pPresubDesc
->subIL_Shift
,
7707 pCompiledSub
->NumInstructions
,
7708 pCompiledSub
->Instructions
,
7712 pPresubDesc
->number_used_registers
= pAsm
->number_used_registers
;
7714 /* restore assemble context */
7715 pAsm
->number_used_registers
= number_used_registers_save
;
7716 pAsm
->uFirstHelpReg
= uFirstHelpReg_save
;
7717 pAsm
->uHelpReg
= uHelpReg_save
;
7718 pAsm
->starting_temp_register_number
= starting_temp_register_number_save
;
7719 pAsm
->pILInst
= pILInst_save
;
7720 pAsm
->uiCurInst
= uiCurInst_save
;
7724 pPresubDesc
= &(pAsm
->presubs
[i
]);
7726 bRet
= assemble_CAL(pAsm
,
7728 pPresubDesc
->subIL_Shift
,
7729 pCompiledSub
->NumInstructions
,
7730 pCompiledSub
->Instructions
,
7734 if(GL_FALSE
== bRet
)
7736 radeon_error("Shader presub assemble failed. \n");
7740 /* copy presub output to real dst */
7741 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
7742 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
7744 if( GL_FALSE
== assemble_dst(pAsm
) )
7749 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
7750 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
7751 pAsm
->S
[0].src
.reg
= pCompiledSub
->dstRegIndex
;
7752 pAsm
->S
[0].src
.swizzlex
= pCompiledSub
->outputSwizzleX
;
7753 pAsm
->S
[0].src
.swizzley
= pCompiledSub
->outputSwizzleY
;
7754 pAsm
->S
[0].src
.swizzlez
= pCompiledSub
->outputSwizzleZ
;
7755 pAsm
->S
[0].src
.swizzlew
= pCompiledSub
->outputSwizzleW
;
7759 pAsm
->callers
[pAsm
->unCallerArrayPointer
- 1].finale_cf_ptr
= (R700ControlFlowGenericClause
*) pAsm
->cf_current_alu_clause_ptr
;
7760 pAsm
->callers
[pAsm
->unCallerArrayPointer
- 1].prelude_cf_ptr
= prelude_cf_ptr
;
7761 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
7764 if( (pPresubDesc
->number_used_registers
+ pAsm
->uFirstHelpReg
) > pAsm
->number_used_registers
)
7766 pAsm
->number_used_registers
= pPresubDesc
->number_used_registers
+ pAsm
->uFirstHelpReg
;
7768 if(pAsm
->uFirstHelpReg
> pPresubDesc
->maxStartReg
)
7770 pPresubDesc
->maxStartReg
= pAsm
->uFirstHelpReg
;
7776 GLboolean
Process_Export(r700_AssemblerBase
* pAsm
,
7778 GLuint export_starting_index
,
7779 GLuint export_count
,
7780 GLuint starting_register_number
,
7781 GLboolean is_depth_export
)
7783 check_current_clause(pAsm
, CF_EMPTY_CLAUSE
);
7784 check_current_clause(pAsm
, CF_EXPORT_CLAUSE
); //alloc the cf_current_export_clause_ptr
7786 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.type
= type
;
7790 case SQ_EXPORT_PIXEL
:
7791 if(GL_TRUE
== is_depth_export
)
7793 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= SQ_CF_PIXEL_Z
;
7797 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= SQ_CF_PIXEL_MRT0
+ export_starting_index
;
7802 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= SQ_CF_POS_0
+ export_starting_index
;
7805 case SQ_EXPORT_PARAM
:
7806 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= 0x0 + export_starting_index
;
7810 radeon_error("Unknown export type: %d\n", type
);
7815 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.rw_gpr
= starting_register_number
;
7817 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.rw_rel
= SQ_ABSOLUTE
;
7818 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.index_gpr
= 0x0;
7819 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.elem_size
= 0x3;
7821 if(8 == pAsm
->unAsic
)
7823 SETfield(pAsm
->cf_current_export_clause_ptr
->m_Word1
.val
,
7825 EG_CF_ALLOC_EXPORT_WORD1__BURST_COUNT_shift
,
7826 EG_CF_ALLOC_EXPORT_WORD1__BURST_COUNT_mask
);
7827 SETfield(pAsm
->cf_current_export_clause_ptr
->m_Word1
.val
,
7829 EG_CF_ALLOC_EXPORT_WORD1__EOP_shift
,
7830 EG_CF_ALLOC_EXPORT_WORD1__EOP_bit
);
7831 SETfield(pAsm
->cf_current_export_clause_ptr
->m_Word1
.val
,
7833 EG_CF_ALLOC_EXPORT_WORD1__VPM_shift
,
7834 EG_CF_ALLOC_EXPORT_WORD1__VPM_bit
);
7835 SETfield(pAsm
->cf_current_export_clause_ptr
->m_Word1
.val
,
7837 EG_CF_WORD1__CF_INST_shift
,
7838 EG_CF_WORD1__CF_INST_mask
);
7839 SETfield(pAsm
->cf_current_export_clause_ptr
->m_Word1
.val
,
7841 EG_CF_ALLOC_EXPORT_WORD1__MARK_shift
,
7842 EG_CF_ALLOC_EXPORT_WORD1__MARK_bit
);
7843 SETfield(pAsm
->cf_current_export_clause_ptr
->m_Word1
.val
,
7845 EG_CF_ALLOC_EXPORT_WORD1__BARRIER_shift
,
7846 EG_CF_ALLOC_EXPORT_WORD1__BARRIER_bit
);
7850 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.burst_count
= (export_count
- 1);
7851 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
7852 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
7853 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT
; // _DONE
7854 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
7855 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
7858 if (export_count
== 1)
7860 assert(starting_register_number
>= pAsm
->starting_export_register_number
);
7862 /* exports Z as a float into Red channel */
7863 if (GL_TRUE
== is_depth_export
)
7865 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_Z
;
7866 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_MASK
;
7867 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_MASK
;
7868 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_MASK
;
7872 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_X
;
7873 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_Y
;
7874 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_Z
;
7875 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_W
;
7880 // This should only be used if all components for all registers have been written
7881 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_X
;
7882 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_Y
;
7883 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_Z
;
7884 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_W
;
7887 pAsm
->cf_last_export_ptr
= pAsm
->cf_current_export_clause_ptr
;
7892 GLboolean
Process_Fragment_Exports(r700_AssemblerBase
*pR700AsmCode
,
7893 GLbitfield OutputsWritten
)
7896 GLuint export_count
= 0;
7899 for (i
= 0; i
< FRAG_RESULT_MAX
; ++i
)
7903 if (OutputsWritten
& unBit
)
7905 GLboolean is_depth
= i
== FRAG_RESULT_DEPTH
? GL_TRUE
: GL_FALSE
;
7906 if (!Process_Export(pR700AsmCode
, SQ_EXPORT_PIXEL
, 0, 1, pR700AsmCode
->uiFP_OutputMap
[i
], is_depth
))
7912 /* Need to export something, otherwise we'll hang
7913 * results are undefined anyway */
7914 if(export_count
== 0)
7916 Process_Export(pR700AsmCode
, SQ_EXPORT_PIXEL
, 0, 1, pR700AsmCode
->starting_export_register_number
, GL_FALSE
);
7919 if(pR700AsmCode
->cf_last_export_ptr
!= NULL
)
7921 if(8 == pR700AsmCode
->unAsic
)
7923 SETfield(pR700AsmCode
->cf_last_export_ptr
->m_Word1
.val
,
7925 EG_CF_ALLOC_EXPORT_WORD1__EOP_shift
,
7926 EG_CF_ALLOC_EXPORT_WORD1__EOP_bit
);
7927 SETfield(pR700AsmCode
->cf_last_export_ptr
->m_Word1
.val
,
7928 EG_CF_INST_EXPORT_DONE
,
7929 EG_CF_WORD1__CF_INST_shift
,
7930 EG_CF_WORD1__CF_INST_mask
);
7934 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
7935 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.end_of_program
= 0x1;
7942 GLboolean
Process_Vertex_Exports(r700_AssemblerBase
*pR700AsmCode
,
7943 GLbitfield OutputsWritten
)
7948 GLuint export_starting_index
= 0;
7949 GLuint export_count
= pR700AsmCode
->number_of_exports
;
7951 unBit
= 1 << VERT_RESULT_HPOS
;
7952 if(OutputsWritten
& unBit
)
7954 if( GL_FALSE
== Process_Export(pR700AsmCode
,
7956 export_starting_index
,
7958 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_HPOS
],
7963 export_starting_index
++;
7967 unBit
= 1 << VERT_RESULT_PSIZ
;
7968 if(OutputsWritten
& unBit
)
7970 if( GL_FALSE
== Process_Export(pR700AsmCode
,
7972 export_starting_index
,
7974 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_PSIZ
],
7982 if(8 == pR700AsmCode
->unAsic
)
7984 SETfield(pR700AsmCode
->cf_last_export_ptr
->m_Word1
.val
,
7985 EG_CF_INST_EXPORT_DONE
,
7986 EG_CF_WORD1__CF_INST_shift
,
7987 EG_CF_WORD1__CF_INST_mask
);
7991 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
7995 pR700AsmCode
->number_of_exports
= export_count
;
7996 export_starting_index
= 0;
7998 unBit
= 1 << VERT_RESULT_COL0
;
7999 if(OutputsWritten
& unBit
)
8001 if( GL_FALSE
== Process_Export(pR700AsmCode
,
8003 export_starting_index
,
8005 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_COL0
],
8011 export_starting_index
++;
8014 unBit
= 1 << VERT_RESULT_COL1
;
8015 if(OutputsWritten
& unBit
)
8017 if( GL_FALSE
== Process_Export(pR700AsmCode
,
8019 export_starting_index
,
8021 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_COL1
],
8027 export_starting_index
++;
8030 unBit
= 1 << VERT_RESULT_FOGC
;
8031 if(OutputsWritten
& unBit
)
8033 if( GL_FALSE
== Process_Export(pR700AsmCode
,
8035 export_starting_index
,
8037 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_FOGC
],
8043 export_starting_index
++;
8048 unBit
= 1 << (VERT_RESULT_TEX0
+ i
);
8049 if(OutputsWritten
& unBit
)
8051 if( GL_FALSE
== Process_Export(pR700AsmCode
,
8053 export_starting_index
,
8055 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_TEX0
+ i
],
8061 export_starting_index
++;
8065 for(i
=VERT_RESULT_VAR0
; i
<VERT_RESULT_MAX
; i
++)
8068 if(OutputsWritten
& unBit
)
8070 if( GL_FALSE
== Process_Export(pR700AsmCode
,
8072 export_starting_index
,
8074 pR700AsmCode
->ucVP_OutputMap
[i
],
8080 export_starting_index
++;
8084 // At least one param should be exported
8087 if(8 == pR700AsmCode
->unAsic
)
8089 SETfield(pR700AsmCode
->cf_last_export_ptr
->m_Word1
.val
,
8090 EG_CF_INST_EXPORT_DONE
,
8091 EG_CF_WORD1__CF_INST_shift
,
8092 EG_CF_WORD1__CF_INST_mask
);
8096 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
8101 if( GL_FALSE
== Process_Export(pR700AsmCode
,
8105 pR700AsmCode
->starting_export_register_number
,
8111 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_0
;
8112 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_0
;
8113 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_0
;
8114 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_1
;
8115 if(8 == pR700AsmCode
->unAsic
)
8117 SETfield(pR700AsmCode
->cf_last_export_ptr
->m_Word1
.val
,
8118 EG_CF_INST_EXPORT_DONE
,
8119 EG_CF_WORD1__CF_INST_shift
,
8120 EG_CF_WORD1__CF_INST_mask
);
8124 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
8128 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.end_of_program
= 0x1;
8133 GLboolean
Clean_Up_Assembler(r700_AssemblerBase
*pR700AsmCode
)
8135 if(NULL
!= pR700AsmCode
->pInstDeps
)
8137 FREE(pR700AsmCode
->pInstDeps
);
8138 pR700AsmCode
->pInstDeps
= NULL
;
8141 if(NULL
!= pR700AsmCode
->subs
)
8143 FREE(pR700AsmCode
->subs
);
8144 pR700AsmCode
->subs
= NULL
;
8146 if(NULL
!= pR700AsmCode
->callers
)
8148 FREE(pR700AsmCode
->callers
);
8149 pR700AsmCode
->callers
= NULL
;
8152 if(NULL
!= pR700AsmCode
->presubs
)
8154 FREE(pR700AsmCode
->presubs
);
8155 pR700AsmCode
->presubs
= NULL
;