2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
27 #ifndef _R700_ASSEMBLER_H_
28 #define _R700_ASSEMBLER_H_
30 #include "main/mtypes.h"
31 #include "shader/prog_instruction.h"
33 #include "r700_chip.h"
34 #include "r700_shaderinst.h"
35 #include "r700_shader.h"
37 typedef enum SHADER_PIPE_TYPE
43 typedef enum ConstantCycles
46 NUMBER_OF_COMPONENTS
= 4
49 typedef enum HARDWARE_LIMIT_VALUES
51 TEMPORARY_REGISTER_OFFSET
= SQ_ALU_SRC_GPR_BASE
,
52 MAX_TEMPORARY_REGISTERS
= SQ_ALU_SRC_GPR_SIZE
,
53 MAX_CONSTANT_REGISTERS
= SQ_ALU_SRC_CFILE_SIZE
,
54 CFILE_REGISTER_OFFSET
= SQ_ALU_SRC_CFILE_BASE
,
55 NUMBER_OF_INPUT_COLORS
= 2,
56 NUMBER_OF_OUTPUT_COLORS
= 8,
57 NUMBER_OF_TEXTURE_UNITS
= 16,
59 } HARDWARE_LIMIT_VALUES
;
61 typedef enum AddressMode
65 ADDR_RELATIVE_FLI_0
= 2,
66 NUMBER_OF_ADDR_MOD
= 3
69 typedef enum SrcRegisterType
71 SRC_REG_TEMPORARY
= 0,
74 SRC_REG_ALT_TEMPORARY
= 3,
76 NUMBER_OF_SRC_REG_TYPE
= 5
79 typedef enum DstRegisterType
81 DST_REG_TEMPORARY
= 0,
84 DST_REG_OUT_X_REPL
= 3,
85 DST_REG_ALT_TEMPORARY
= 4,
87 NUMBER_OF_DST_REG_TYPE
= 6
90 typedef unsigned int BITS
;
92 typedef struct PVSDSTtag
94 BITS opcode
:8; //(:6) //@@@ really should be 10 bits for OP2
96 BITS predicated
:1; //10 //8
97 BITS pred_inv
:1; //11 //8
100 BITS reg
:10; //24 //20
107 BITS op3
:1; // 29 Represents *_OP3_* ALU opcode
109 BITS dualop
:1; // 30 //26
111 BITS addrmode0
:1; //31 //29
112 BITS addrmode1
:1; //32
115 typedef struct PVSINSTtag
118 BITS SaturateMode
:2;
121 typedef struct PVSSRCtag
125 BITS reg
:10; //15 (8)
129 BITS swizzlew
:3; //27
136 BITS addrmode1
:1; //32
139 typedef struct PVSMATHtag
146 BITS dstoff
:2; // 2 bits of dest offset into alt ram
150 BITS dstcomp
:2; // select dest component
154 typedef union PVSDWORDtag
164 typedef struct VAP_OUT_VTX_FMT_0tag
170 BITS pos_param
:1; // 4
185 BITS point_size
:1; // 15
187 BITS rta_index
:1; // shares same channel as kill_flag
189 BITS viewport_index
:1; // 19
191 BITS resvd1
:12; // 20
194 typedef struct VAP_OUT_VTX_FMT_1tag
208 typedef struct VAP_OUT_VTX_FMT_2tag
222 typedef struct OUT_FRAGMENT_FMT_0tag
235 BITS coverage_to_mask
:1;
239 } OUT_FRAGMENT_FMT_0
;
241 typedef enum CF_CLAUSE_TYPE
249 NUMBER_CF_CLAUSE_TYPES
254 MAX_BOOL_CONSTANTS
= 32,
255 MAX_INT_CONSTANTS
= 32,
256 MAX_FLOAT_CONSTANTS
= 256,
270 SAFEDIST_TEX
= 6, ///< safe distance for using result of texture lookup in alu or another tex lookup
271 SAFEDIST_ALU
= 6 ///< the same for alu->fc
274 typedef struct FC_LEVEL
276 R700ControlFlowGenericClause
* first
;
277 R700ControlFlowGenericClause
** mid
;
278 unsigned int unNumMid
;
283 int id
; ///< id of bool or int variable
286 typedef struct VTX_FETCH_METHOD
288 GLboolean bEnableMini
;
289 GLuint mega_fetch_remainder
;
292 typedef struct SUB_OFFSET
296 GLuint unStackDepthMax
;
297 TypedShaderList lstCFInstructions_local
;
300 typedef struct CALLER_POINTER
304 R700ControlFlowGenericClause
* cf_ptr
;
307 #define SQ_MAX_CALL_DEPTH 0x00000020
309 typedef struct CALL_LEVEL
311 unsigned int FCSP_BeforeEntry
;
315 TypedShaderList
* plstCFInstructions_local
;
318 #define HAS_CURRENT_LOOPRET 0x1L
319 #define HAS_LOOPRET 0x2L
320 #define LOOPRET_FLAGS HAS_LOOPRET | HAS_CURRENT_LOOPRET
322 typedef struct r700_AssemblerBase
324 R700ControlFlowSXClause
* cf_last_export_ptr
;
325 R700ControlFlowSXClause
* cf_current_export_clause_ptr
;
326 R700ControlFlowALUClause
* cf_current_alu_clause_ptr
;
327 R700ControlFlowGenericClause
* cf_current_tex_clause_ptr
;
328 R700ControlFlowGenericClause
* cf_current_vtx_clause_ptr
;
329 R700ControlFlowGenericClause
* cf_current_cf_clause_ptr
;
332 R700_Shader
* pR700Shader
;
334 // No clause has been created yet
335 CF_CLAUSE_TYPE cf_current_clause_type
;
339 GLuint number_of_exports
;
340 GLuint number_of_colorandz_exports
;
341 GLuint number_of_export_opcodes
;
347 unsigned int uLastPosUpdate
;
349 OUT_FRAGMENT_FMT_0 fp_stOutFmt0
;
353 unsigned int number_used_registers
;
354 unsigned int uUsedConsts
;
356 unsigned int flag_reg_index
;
359 unsigned int uiFP_AttributeMap
[FRAG_ATTRIB_MAX
];
360 unsigned int uiFP_OutputMap
[FRAG_RESULT_MAX
];
361 unsigned int uBoolConsts
;
362 unsigned int uIntConsts
;
364 unsigned int uConsts
;
367 unsigned char ucVP_AttributeMap
[VERT_ATTRIB_MAX
];
368 unsigned char ucVP_OutputMap
[VERT_RESULT_MAX
];
370 unsigned char * pucOutMask
;
372 //-----------------------------------------------------------------------------------
373 // flow control members
374 //-----------------------------------------------------------------------------------
376 FC_LEVEL fc_stack
[32];
378 //-----------------------------------------------------------------------------------
379 // ArgSubst used in Assemble_Source() function
380 //-----------------------------------------------------------------------------------
383 GLint hw_gpr
[ NUMBER_OF_CYCLES
][ NUMBER_OF_COMPONENTS
];
384 GLint hw_cfile_addr
[ NUMBER_OF_COMPONENTS
];
385 GLint hw_cfile_chan
[ NUMBER_OF_COMPONENTS
];
389 GLint color_export_register_number
[NUMBER_OF_OUTPUT_COLORS
];
390 GLint depth_export_register_number
;
392 GLint stencil_export_register_number
;
393 GLint coverage_to_mask_export_register_number
;
394 GLint mask_export_register_number
;
396 GLuint starting_export_register_number
;
397 GLuint starting_vfetch_register_number
;
398 GLuint starting_temp_register_number
;
400 GLuint uFirstHelpReg
;
402 GLboolean input_position_is_used
;
403 GLboolean input_normal_is_used
;
405 GLboolean input_color_is_used
[NUMBER_OF_INPUT_COLORS
];
407 GLboolean input_texture_unit_is_used
[NUMBER_OF_TEXTURE_UNITS
];
409 R700VertexGenericFetch
* vfetch_instruction_ptr_array
[VERT_ATTRIB_MAX
];
411 GLuint number_of_inputs
;
415 SHADER_PIPE_TYPE currentShaderType
;
416 struct prog_instruction
* pILInst
;
419 /* helper to decide which type of instruction to assemble */
421 /* we inserted helper intructions and need barrier on next TEX ins */
422 GLboolean need_tex_barrier
;
425 GLuint unSubArraySize
;
426 GLuint unSubArrayPointer
;
427 CALLER_POINTER
* callers
;
428 GLuint unCallerArraySize
;
429 GLuint unCallerArrayPointer
;
431 CALL_LEVEL CALLSTACK
[SQ_MAX_CALL_DEPTH
];
435 } r700_AssemblerBase
;
438 BITS
addrmode_PVSDST(PVSDST
* pPVSDST
);
439 void setaddrmode_PVSDST(PVSDST
* pPVSDST
, BITS addrmode
);
440 void nomask_PVSDST(PVSDST
* pPVSDST
);
441 BITS
addrmode_PVSSRC(PVSSRC
* pPVSSRC
);
442 void setaddrmode_PVSSRC(PVSSRC
* pPVSSRC
, BITS addrmode
);
443 void setswizzle_PVSSRC(PVSSRC
* pPVSSRC
, BITS swz
);
444 void noswizzle_PVSSRC(PVSSRC
* pPVSSRC
);
445 void swizzleagain_PVSSRC(PVSSRC
* pPVSSRC
, BITS x
, BITS y
, BITS z
, BITS w
);
446 void neg_PVSSRC(PVSSRC
* pPVSSRC
);
447 void noneg_PVSSRC(PVSSRC
* pPVSSRC
);
448 void flipneg_PVSSRC(PVSSRC
* pPVSSRC
);
449 void zerocomp_PVSSRC(PVSSRC
* pPVSSRC
, int c
);
450 void onecomp_PVSSRC(PVSSRC
* pPVSSRC
, int c
);
451 BITS
is_misc_component_exported(VAP_OUT_VTX_FMT_0
* pOutVTXFmt0
);
452 BITS
is_depth_component_exported(OUT_FRAGMENT_FMT_0
* pFPOutFmt
) ;
453 GLboolean
is_reduction_opcode(PVSDWORD
* dest
);
454 GLuint
GetSurfaceFormat(GLenum eType
, GLuint nChannels
, GLuint
* pClient_size
);
456 unsigned int r700GetNumOperands(r700_AssemblerBase
* pAsm
);
458 GLboolean
IsTex(gl_inst_opcode Opcode
);
459 GLboolean
IsAlu(gl_inst_opcode Opcode
);
460 int check_current_clause(r700_AssemblerBase
* pAsm
,
461 CF_CLAUSE_TYPE new_clause_type
);
462 GLboolean
add_vfetch_instruction(r700_AssemblerBase
* pAsm
,
463 R700VertexInstruction
* vertex_instruction_ptr
);
464 GLboolean
add_tex_instruction(r700_AssemblerBase
* pAsm
,
465 R700TextureInstruction
* tex_instruction_ptr
);
466 GLboolean
assemble_vfetch_instruction(r700_AssemblerBase
* pAsm
,
468 GLuint destination_register
,
469 GLuint number_of_elements
,
470 GLenum dataElementType
,
471 VTX_FETCH_METHOD
* pFetchMethod
);
472 GLboolean
assemble_vfetch_instruction2(r700_AssemblerBase
* pAsm
,
473 GLuint destination_register
,
479 VTX_FETCH_METHOD
* pFetchMethod
);
480 GLboolean
cleanup_vfetch_instructions(r700_AssemblerBase
* pAsm
);
481 GLuint
gethelpr(r700_AssemblerBase
* pAsm
);
482 void resethelpr(r700_AssemblerBase
* pAsm
);
483 void checkop_init(r700_AssemblerBase
* pAsm
);
484 GLboolean
mov_temp(r700_AssemblerBase
* pAsm
, int src
);
485 GLboolean
checkop1(r700_AssemblerBase
* pAsm
);
486 GLboolean
checkop2(r700_AssemblerBase
* pAsm
);
487 GLboolean
checkop3(r700_AssemblerBase
* pAsm
);
488 GLboolean
assemble_src(r700_AssemblerBase
*pAsm
,
491 GLboolean
assemble_dst(r700_AssemblerBase
*pAsm
);
492 GLboolean
tex_dst(r700_AssemblerBase
*pAsm
);
493 GLboolean
tex_src(r700_AssemblerBase
*pAsm
);
494 GLboolean
assemble_tex_instruction(r700_AssemblerBase
*pAsm
, GLboolean normalized
);
495 void initialize(r700_AssemblerBase
*pAsm
);
496 GLboolean
assemble_alu_src(R700ALUInstruction
* alu_instruction_ptr
,
499 BITS scalar_channel_index
);
500 GLboolean
add_alu_instruction(r700_AssemblerBase
* pAsm
,
501 R700ALUInstruction
* alu_instruction_ptr
,
502 GLuint contiguous_slots_needed
);
504 GLboolean
add_cf_instruction(r700_AssemblerBase
* pAsm
);
505 void add_return_inst(r700_AssemblerBase
*pAsm
);
507 void get_src_properties(R700ALUInstruction
* alu_instruction_ptr
,
513 int is_cfile(BITS sel
);
514 int is_const(BITS sel
);
515 int is_gpr(BITS sel
);
516 GLboolean
reserve_cfile(r700_AssemblerBase
* pAsm
,
519 GLboolean
reserve_gpr(r700_AssemblerBase
* pAsm
, GLuint sel
, GLuint chan
, GLuint cycle
);
520 GLboolean
cycle_for_scalar_bank_swizzle(const int swiz
, const int sel
, GLuint
* pCycle
);
521 GLboolean
cycle_for_vector_bank_swizzle(const int swiz
, const int sel
, GLuint
* pCycle
);
522 GLboolean
check_scalar(r700_AssemblerBase
* pAsm
,
523 R700ALUInstruction
* alu_instruction_ptr
);
524 GLboolean
check_vector(r700_AssemblerBase
* pAsm
,
525 R700ALUInstruction
* alu_instruction_ptr
);
526 GLboolean
assemble_alu_instruction(r700_AssemblerBase
*pAsm
);
527 GLboolean
next_ins(r700_AssemblerBase
*pAsm
);
529 GLboolean
next_ins2(r700_AssemblerBase
*pAsm
);
530 GLboolean
assemble_alu_instruction2(r700_AssemblerBase
*pAsm
);
532 /* TODO : merge next_ins/2/literal, assemble_alu_instruction/2/literal */
533 GLboolean
next_ins_literal(r700_AssemblerBase
*pAsm
, GLfloat
* pLiteral
);
534 GLboolean
assemble_alu_instruction_literal(r700_AssemblerBase
*pAsm
, GLfloat
* pLiteral
);
536 GLboolean
pops(r700_AssemblerBase
*pAsm
, GLuint pops
);
537 GLboolean
jumpToOffest(r700_AssemblerBase
*pAsm
, GLuint pops
, GLint offset
);
538 GLboolean
setRetInLoopFlag(r700_AssemblerBase
*pAsm
, GLuint flagValue
);
539 GLboolean
testFlag(r700_AssemblerBase
*pAsm
);
540 GLboolean
breakLoopOnFlag(r700_AssemblerBase
*pAsm
, GLuint unFCSP
);
541 GLboolean
returnOnFlag(r700_AssemblerBase
*pAsm
, GLuint unIF
);
543 GLboolean
assemble_math_function(r700_AssemblerBase
* pAsm
, BITS opcode
);
544 GLboolean
assemble_ABS(r700_AssemblerBase
*pAsm
);
545 GLboolean
assemble_ADD(r700_AssemblerBase
*pAsm
);
546 GLboolean
assemble_ARL(r700_AssemblerBase
*pAsm
);
547 GLboolean
assemble_BAD(char *opcode_str
);
548 GLboolean
assemble_CMP(r700_AssemblerBase
*pAsm
);
549 GLboolean
assemble_COS(r700_AssemblerBase
*pAsm
);
550 GLboolean
assemble_DOT(r700_AssemblerBase
*pAsm
);
551 GLboolean
assemble_DST(r700_AssemblerBase
*pAsm
);
552 GLboolean
assemble_EX2(r700_AssemblerBase
*pAsm
);
553 GLboolean
assemble_EXP(r700_AssemblerBase
*pAsm
);
554 GLboolean
assemble_FLR(r700_AssemblerBase
*pAsm
);
555 GLboolean
assemble_FLR_INT(r700_AssemblerBase
*pAsm
);
556 GLboolean
assemble_FRC(r700_AssemblerBase
*pAsm
);
557 GLboolean
assemble_KIL(r700_AssemblerBase
*pAsm
, GLuint opcode
);
558 GLboolean
assemble_LG2(r700_AssemblerBase
*pAsm
);
559 GLboolean
assemble_LRP(r700_AssemblerBase
*pAsm
);
560 GLboolean
assemble_LOG(r700_AssemblerBase
*pAsm
);
561 GLboolean
assemble_MAD(r700_AssemblerBase
*pAsm
);
562 GLboolean
assemble_LIT(r700_AssemblerBase
*pAsm
);
563 GLboolean
assemble_MAX(r700_AssemblerBase
*pAsm
);
564 GLboolean
assemble_MIN(r700_AssemblerBase
*pAsm
);
565 GLboolean
assemble_MOV(r700_AssemblerBase
*pAsm
);
566 GLboolean
assemble_MUL(r700_AssemblerBase
*pAsm
);
567 GLboolean
assemble_POW(r700_AssemblerBase
*pAsm
);
568 GLboolean
assemble_RCP(r700_AssemblerBase
*pAsm
);
569 GLboolean
assemble_RSQ(r700_AssemblerBase
*pAsm
);
570 GLboolean
assemble_SIN(r700_AssemblerBase
*pAsm
);
571 GLboolean
assemble_SCS(r700_AssemblerBase
*pAsm
);
572 GLboolean
assemble_SGE(r700_AssemblerBase
*pAsm
);
574 GLboolean
assemble_LOGIC(r700_AssemblerBase
*pAsm
, BITS opcode
);
575 GLboolean
assemble_LOGIC_PRED(r700_AssemblerBase
*pAsm
, BITS opcode
);
577 GLboolean
assemble_SLT(r700_AssemblerBase
*pAsm
);
578 GLboolean
assemble_STP(r700_AssemblerBase
*pAsm
);
579 GLboolean
assemble_TEX(r700_AssemblerBase
*pAsm
);
580 GLboolean
assemble_XPD(r700_AssemblerBase
*pAsm
);
581 GLboolean
assemble_EXPORT(r700_AssemblerBase
*pAsm
);
582 GLboolean
assemble_IF(r700_AssemblerBase
*pAsm
, GLboolean bHasElse
);
583 GLboolean
assemble_ELSE(r700_AssemblerBase
*pAsm
);
584 GLboolean
assemble_ENDIF(r700_AssemblerBase
*pAsm
);
586 GLboolean
assemble_BGNLOOP(r700_AssemblerBase
*pAsm
);
587 GLboolean
assemble_BRK(r700_AssemblerBase
*pAsm
);
588 GLboolean
assemble_COND(r700_AssemblerBase
*pAsm
);
589 GLboolean
assemble_ENDLOOP(r700_AssemblerBase
*pAsm
);
591 GLboolean
assemble_BGNSUB(r700_AssemblerBase
*pAsm
, GLint nILindex
);
592 GLboolean
assemble_ENDSUB(r700_AssemblerBase
*pAsm
);
593 GLboolean
assemble_RET(r700_AssemblerBase
*pAsm
);
594 GLboolean
assemble_CAL(r700_AssemblerBase
*pAsm
,
596 GLuint uiNumberInsts
,
597 struct prog_instruction
*pILInst
);
599 GLboolean
Process_Export(r700_AssemblerBase
* pAsm
,
601 GLuint export_starting_index
,
603 GLuint starting_register_number
,
604 GLboolean is_depth_export
);
605 GLboolean
Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase
*pAsm
,
606 BITS depth_channel_select
);
610 GLboolean
AssembleInstr(GLuint uiFirstInst
,
611 GLuint uiNumberInsts
,
612 struct prog_instruction
*pILInst
,
613 r700_AssemblerBase
*pR700AsmCode
);
614 GLboolean
Process_Fragment_Exports(r700_AssemblerBase
*pR700AsmCode
, GLbitfield OutputsWritten
);
615 GLboolean
Process_Vertex_Exports(r700_AssemblerBase
*pR700AsmCode
, GLbitfield OutputsWritten
);
617 GLboolean
RelocProgram(r700_AssemblerBase
* pAsm
);
618 GLboolean
InitShaderProgram(r700_AssemblerBase
* pAsm
);
620 int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt
, r700_AssemblerBase
* pAsm
, R700_Shader
* pShader
);
621 GLboolean
Clean_Up_Assembler(r700_AssemblerBase
*pR700AsmCode
);
623 #endif //_R700_ASSEMBLER_H_