2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
27 #ifndef _R700_ASSEMBLER_H_
28 #define _R700_ASSEMBLER_H_
30 #include "main/mtypes.h"
31 #include "shader/prog_instruction.h"
33 #include "r700_chip.h"
34 #include "r700_shaderinst.h"
35 #include "r700_shader.h"
37 typedef enum SHADER_PIPE_TYPE
43 typedef enum ConstantCycles
46 NUMBER_OF_COMPONENTS
= 4
49 typedef enum HARDWARE_LIMIT_VALUES
51 TEMPORARY_REGISTER_OFFSET
= SQ_ALU_SRC_GPR_BASE
,
52 MAX_TEMPORARY_REGISTERS
= SQ_ALU_SRC_GPR_SIZE
,
53 MAX_CONSTANT_REGISTERS
= SQ_ALU_SRC_CFILE_SIZE
,
54 CFILE_REGISTER_OFFSET
= SQ_ALU_SRC_CFILE_BASE
,
55 NUMBER_OF_INPUT_COLORS
= 2,
56 NUMBER_OF_OUTPUT_COLORS
= 8,
57 NUMBER_OF_TEXTURE_UNITS
= 16,
59 } HARDWARE_LIMIT_VALUES
;
61 typedef enum AddressMode
65 ADDR_RELATIVE_FLI_0
= 2,
66 NUMBER_OF_ADDR_MOD
= 3
69 typedef enum SrcRegisterType
71 SRC_REG_TEMPORARY
= 0,
74 SRC_REG_ALT_TEMPORARY
= 3,
76 NUMBER_OF_SRC_REG_TYPE
= 5
79 typedef enum DstRegisterType
81 DST_REG_TEMPORARY
= 0,
84 DST_REG_OUT_X_REPL
= 3,
85 DST_REG_ALT_TEMPORARY
= 4,
87 NUMBER_OF_DST_REG_TYPE
= 6
90 typedef unsigned int BITS
;
92 typedef struct PVSDSTtag
94 BITS opcode
:8; //(:6) //@@@ really should be 10 bits for OP2
96 BITS predicated
:1; //10 //8
97 BITS pred_inv
:1; //11 //8
100 BITS reg
:10; //24 //20
107 BITS op3
:1; // 29 Represents *_OP3_* ALU opcode
109 BITS dualop
:1; // 30 //26
111 BITS addrmode0
:1; //31 //29
112 BITS addrmode1
:1; //32
115 typedef struct PVSINSTtag
117 BITS literal_slots
:2;
118 BITS SaturateMode
:2;
122 typedef struct PVSSRCtag
126 BITS reg
:10; //14 (8)
130 BITS swizzlew
:3; //26
138 BITS addrmode1
:1; //32
141 typedef struct PVSMATHtag
148 BITS dstoff
:2; // 2 bits of dest offset into alt ram
152 BITS dstcomp
:2; // select dest component
156 typedef union PVSDWORDtag
166 typedef struct VAP_OUT_VTX_FMT_0tag
172 BITS pos_param
:1; // 4
187 BITS point_size
:1; // 15
189 BITS rta_index
:1; // shares same channel as kill_flag
191 BITS viewport_index
:1; // 19
193 BITS resvd1
:12; // 20
196 typedef struct VAP_OUT_VTX_FMT_1tag
210 typedef struct VAP_OUT_VTX_FMT_2tag
224 typedef struct OUT_FRAGMENT_FMT_0tag
237 BITS coverage_to_mask
:1;
241 } OUT_FRAGMENT_FMT_0
;
243 typedef enum CF_CLAUSE_TYPE
251 NUMBER_CF_CLAUSE_TYPES
256 MAX_BOOL_CONSTANTS
= 32,
257 MAX_INT_CONSTANTS
= 32,
258 MAX_FLOAT_CONSTANTS
= 256,
272 SAFEDIST_TEX
= 6, ///< safe distance for using result of texture lookup in alu or another tex lookup
273 SAFEDIST_ALU
= 6 ///< the same for alu->fc
276 typedef struct FC_LEVEL
278 R700ControlFlowGenericClause
* first
;
279 R700ControlFlowGenericClause
** mid
;
280 unsigned int unNumMid
;
285 int id
; ///< id of bool or int variable
288 typedef struct VTX_FETCH_METHOD
290 GLboolean bEnableMini
;
291 GLuint mega_fetch_remainder
;
294 typedef struct SUB_OFFSET
298 GLuint unStackDepthMax
;
299 TypedShaderList lstCFInstructions_local
;
302 typedef struct CALLER_POINTER
306 R700ControlFlowGenericClause
* cf_ptr
;
309 #define SQ_MAX_CALL_DEPTH 0x00000020
311 typedef struct CALL_LEVEL
313 unsigned int FCSP_BeforeEntry
;
317 TypedShaderList
* plstCFInstructions_local
;
320 #define HAS_CURRENT_LOOPRET 0x1L
321 #define HAS_LOOPRET 0x2L
322 #define LOOPRET_FLAGS HAS_LOOPRET | HAS_CURRENT_LOOPRET
324 typedef struct r700_AssemblerBase
326 R700ControlFlowSXClause
* cf_last_export_ptr
;
327 R700ControlFlowSXClause
* cf_current_export_clause_ptr
;
328 R700ControlFlowALUClause
* cf_current_alu_clause_ptr
;
329 R700ControlFlowGenericClause
* cf_current_tex_clause_ptr
;
330 R700ControlFlowGenericClause
* cf_current_vtx_clause_ptr
;
331 R700ControlFlowGenericClause
* cf_current_cf_clause_ptr
;
334 R700_Shader
* pR700Shader
;
336 // No clause has been created yet
337 CF_CLAUSE_TYPE cf_current_clause_type
;
341 GLuint number_of_exports
;
342 GLuint number_of_colorandz_exports
;
343 GLuint number_of_export_opcodes
;
350 unsigned int uLastPosUpdate
;
351 unsigned int last_cond_register
;
353 OUT_FRAGMENT_FMT_0 fp_stOutFmt0
;
357 unsigned int number_used_registers
;
358 unsigned int uUsedConsts
;
360 unsigned int flag_reg_index
;
363 unsigned int uiFP_AttributeMap
[FRAG_ATTRIB_MAX
];
364 unsigned int uiFP_OutputMap
[FRAG_RESULT_MAX
];
365 unsigned int uBoolConsts
;
366 unsigned int uIntConsts
;
368 unsigned int uConsts
;
371 unsigned char ucVP_AttributeMap
[VERT_ATTRIB_MAX
];
372 unsigned char ucVP_OutputMap
[VERT_RESULT_MAX
];
374 unsigned char * pucOutMask
;
376 //-----------------------------------------------------------------------------------
377 // flow control members
378 //-----------------------------------------------------------------------------------
380 FC_LEVEL fc_stack
[32];
382 //-----------------------------------------------------------------------------------
383 // ArgSubst used in Assemble_Source() function
384 //-----------------------------------------------------------------------------------
387 GLint hw_gpr
[ NUMBER_OF_CYCLES
][ NUMBER_OF_COMPONENTS
];
388 GLint hw_cfile_addr
[ NUMBER_OF_COMPONENTS
];
389 GLint hw_cfile_chan
[ NUMBER_OF_COMPONENTS
];
393 GLint color_export_register_number
[NUMBER_OF_OUTPUT_COLORS
];
394 GLint depth_export_register_number
;
396 GLint stencil_export_register_number
;
397 GLint coverage_to_mask_export_register_number
;
398 GLint mask_export_register_number
;
400 GLuint starting_export_register_number
;
401 GLuint starting_vfetch_register_number
;
402 GLuint starting_temp_register_number
;
404 GLuint uFirstHelpReg
;
406 GLboolean input_position_is_used
;
407 GLboolean input_normal_is_used
;
409 GLboolean input_color_is_used
[NUMBER_OF_INPUT_COLORS
];
411 GLboolean input_texture_unit_is_used
[NUMBER_OF_TEXTURE_UNITS
];
413 R700VertexGenericFetch
* vfetch_instruction_ptr_array
[VERT_ATTRIB_MAX
];
415 GLuint number_of_inputs
;
419 SHADER_PIPE_TYPE currentShaderType
;
420 struct prog_instruction
* pILInst
;
422 GLubyte SamplerUnits
[MAX_SAMPLERS
];
424 /* helper to decide which type of instruction to assemble */
426 /* we inserted helper intructions and need barrier on next TEX ins */
427 GLboolean need_tex_barrier
;
430 GLuint unSubArraySize
;
431 GLuint unSubArrayPointer
;
432 CALLER_POINTER
* callers
;
433 GLuint unCallerArraySize
;
434 GLuint unCallerArrayPointer
;
436 CALL_LEVEL CALLSTACK
[SQ_MAX_CALL_DEPTH
];
440 } r700_AssemblerBase
;
443 BITS
addrmode_PVSDST(PVSDST
* pPVSDST
);
444 void setaddrmode_PVSDST(PVSDST
* pPVSDST
, BITS addrmode
);
445 void nomask_PVSDST(PVSDST
* pPVSDST
);
446 BITS
addrmode_PVSSRC(PVSSRC
* pPVSSRC
);
447 void setaddrmode_PVSSRC(PVSSRC
* pPVSSRC
, BITS addrmode
);
448 void setswizzle_PVSSRC(PVSSRC
* pPVSSRC
, BITS swz
);
449 void noswizzle_PVSSRC(PVSSRC
* pPVSSRC
);
450 void swizzleagain_PVSSRC(PVSSRC
* pPVSSRC
, BITS x
, BITS y
, BITS z
, BITS w
);
451 void neg_PVSSRC(PVSSRC
* pPVSSRC
);
452 void noneg_PVSSRC(PVSSRC
* pPVSSRC
);
453 void flipneg_PVSSRC(PVSSRC
* pPVSSRC
);
454 void zerocomp_PVSSRC(PVSSRC
* pPVSSRC
, int c
);
455 void onecomp_PVSSRC(PVSSRC
* pPVSSRC
, int c
);
456 BITS
is_misc_component_exported(VAP_OUT_VTX_FMT_0
* pOutVTXFmt0
);
457 BITS
is_depth_component_exported(OUT_FRAGMENT_FMT_0
* pFPOutFmt
) ;
458 GLboolean
is_reduction_opcode(PVSDWORD
* dest
);
459 GLuint
GetSurfaceFormat(GLenum eType
, GLuint nChannels
, GLuint
* pClient_size
);
461 unsigned int r700GetNumOperands(r700_AssemblerBase
* pAsm
);
463 GLboolean
IsTex(gl_inst_opcode Opcode
);
464 GLboolean
IsAlu(gl_inst_opcode Opcode
);
465 int check_current_clause(r700_AssemblerBase
* pAsm
,
466 CF_CLAUSE_TYPE new_clause_type
);
467 GLboolean
add_vfetch_instruction(r700_AssemblerBase
* pAsm
,
468 R700VertexInstruction
* vertex_instruction_ptr
);
469 GLboolean
add_tex_instruction(r700_AssemblerBase
* pAsm
,
470 R700TextureInstruction
* tex_instruction_ptr
);
471 GLboolean
assemble_vfetch_instruction(r700_AssemblerBase
* pAsm
,
473 GLuint destination_register
,
474 GLuint number_of_elements
,
475 GLenum dataElementType
,
476 VTX_FETCH_METHOD
* pFetchMethod
);
477 GLboolean
assemble_vfetch_instruction2(r700_AssemblerBase
* pAsm
,
478 GLuint destination_register
,
484 VTX_FETCH_METHOD
* pFetchMethod
);
485 GLboolean
cleanup_vfetch_instructions(r700_AssemblerBase
* pAsm
);
486 GLuint
gethelpr(r700_AssemblerBase
* pAsm
);
487 void resethelpr(r700_AssemblerBase
* pAsm
);
488 void checkop_init(r700_AssemblerBase
* pAsm
);
489 GLboolean
mov_temp(r700_AssemblerBase
* pAsm
, int src
);
490 GLboolean
checkop1(r700_AssemblerBase
* pAsm
);
491 GLboolean
checkop2(r700_AssemblerBase
* pAsm
);
492 GLboolean
checkop3(r700_AssemblerBase
* pAsm
);
493 GLboolean
assemble_src(r700_AssemblerBase
*pAsm
,
496 GLboolean
assemble_dst(r700_AssemblerBase
*pAsm
);
497 GLboolean
tex_dst(r700_AssemblerBase
*pAsm
);
498 GLboolean
tex_src(r700_AssemblerBase
*pAsm
);
499 GLboolean
assemble_tex_instruction(r700_AssemblerBase
*pAsm
, GLboolean normalized
);
500 void initialize(r700_AssemblerBase
*pAsm
);
501 GLboolean
assemble_alu_src(R700ALUInstruction
* alu_instruction_ptr
,
504 BITS scalar_channel_index
);
505 GLboolean
add_alu_instruction(r700_AssemblerBase
* pAsm
,
506 R700ALUInstruction
* alu_instruction_ptr
,
507 GLuint contiguous_slots_needed
);
509 GLboolean
add_cf_instruction(r700_AssemblerBase
* pAsm
);
510 void add_return_inst(r700_AssemblerBase
*pAsm
);
512 void get_src_properties(R700ALUInstruction
* alu_instruction_ptr
,
518 int is_cfile(BITS sel
);
519 int is_const(BITS sel
);
520 int is_gpr(BITS sel
);
521 GLboolean
reserve_cfile(r700_AssemblerBase
* pAsm
,
524 GLboolean
reserve_gpr(r700_AssemblerBase
* pAsm
, GLuint sel
, GLuint chan
, GLuint cycle
);
525 GLboolean
cycle_for_scalar_bank_swizzle(const int swiz
, const int sel
, GLuint
* pCycle
);
526 GLboolean
cycle_for_vector_bank_swizzle(const int swiz
, const int sel
, GLuint
* pCycle
);
527 GLboolean
check_scalar(r700_AssemblerBase
* pAsm
,
528 R700ALUInstruction
* alu_instruction_ptr
);
529 GLboolean
check_vector(r700_AssemblerBase
* pAsm
,
530 R700ALUInstruction
* alu_instruction_ptr
);
531 GLboolean
assemble_alu_instruction(r700_AssemblerBase
*pAsm
);
532 GLboolean
next_ins(r700_AssemblerBase
*pAsm
);
534 /* TODO : merge next_ins/literal, assemble_alu_instruction/literal */
535 GLboolean
next_ins_literal(r700_AssemblerBase
*pAsm
, GLfloat
* pLiteral
);
536 GLboolean
assemble_alu_instruction_literal(r700_AssemblerBase
*pAsm
, GLfloat
* pLiteral
);
538 GLboolean
pops(r700_AssemblerBase
*pAsm
, GLuint pops
);
539 GLboolean
jumpToOffest(r700_AssemblerBase
*pAsm
, GLuint pops
, GLint offset
);
540 GLboolean
setRetInLoopFlag(r700_AssemblerBase
*pAsm
, GLuint flagValue
);
541 GLboolean
testFlag(r700_AssemblerBase
*pAsm
);
542 GLboolean
breakLoopOnFlag(r700_AssemblerBase
*pAsm
, GLuint unFCSP
);
543 GLboolean
returnOnFlag(r700_AssemblerBase
*pAsm
, GLuint unIF
);
545 GLboolean
assemble_math_function(r700_AssemblerBase
* pAsm
, BITS opcode
);
546 GLboolean
assemble_ABS(r700_AssemblerBase
*pAsm
);
547 GLboolean
assemble_ADD(r700_AssemblerBase
*pAsm
);
548 GLboolean
assemble_ARL(r700_AssemblerBase
*pAsm
);
549 GLboolean
assemble_BAD(char *opcode_str
);
550 GLboolean
assemble_CMP(r700_AssemblerBase
*pAsm
);
551 GLboolean
assemble_DOT(r700_AssemblerBase
*pAsm
);
552 GLboolean
assemble_DST(r700_AssemblerBase
*pAsm
);
553 GLboolean
assemble_EX2(r700_AssemblerBase
*pAsm
);
554 GLboolean
assemble_EXP(r700_AssemblerBase
*pAsm
);
555 GLboolean
assemble_FLR(r700_AssemblerBase
*pAsm
);
556 GLboolean
assemble_FLR_INT(r700_AssemblerBase
*pAsm
);
557 GLboolean
assemble_FRC(r700_AssemblerBase
*pAsm
);
558 GLboolean
assemble_KIL(r700_AssemblerBase
*pAsm
, GLuint opcode
);
559 GLboolean
assemble_LG2(r700_AssemblerBase
*pAsm
);
560 GLboolean
assemble_LRP(r700_AssemblerBase
*pAsm
);
561 GLboolean
assemble_LOG(r700_AssemblerBase
*pAsm
);
562 GLboolean
assemble_MAD(r700_AssemblerBase
*pAsm
);
563 GLboolean
assemble_LIT(r700_AssemblerBase
*pAsm
);
564 GLboolean
assemble_MAX(r700_AssemblerBase
*pAsm
);
565 GLboolean
assemble_MIN(r700_AssemblerBase
*pAsm
);
566 GLboolean
assemble_MOV(r700_AssemblerBase
*pAsm
);
567 GLboolean
assemble_MUL(r700_AssemblerBase
*pAsm
);
568 GLboolean
assemble_POW(r700_AssemblerBase
*pAsm
);
569 GLboolean
assemble_RCP(r700_AssemblerBase
*pAsm
);
570 GLboolean
assemble_RSQ(r700_AssemblerBase
*pAsm
);
571 GLboolean
assemble_SCS(r700_AssemblerBase
*pAsm
);
572 GLboolean
assemble_SGE(r700_AssemblerBase
*pAsm
);
574 GLboolean
assemble_LOGIC(r700_AssemblerBase
*pAsm
, BITS opcode
);
575 GLboolean
assemble_LOGIC_PRED(r700_AssemblerBase
*pAsm
, BITS opcode
);
576 GLboolean
assemble_TRIG(r700_AssemblerBase
*pAsm
, BITS opcode
);
578 GLboolean
assemble_SLT(r700_AssemblerBase
*pAsm
);
579 GLboolean
assemble_STP(r700_AssemblerBase
*pAsm
);
580 GLboolean
assemble_TEX(r700_AssemblerBase
*pAsm
);
581 GLboolean
assemble_XPD(r700_AssemblerBase
*pAsm
);
582 GLboolean
assemble_EXPORT(r700_AssemblerBase
*pAsm
);
583 GLboolean
assemble_IF(r700_AssemblerBase
*pAsm
, GLboolean bHasElse
);
584 GLboolean
assemble_ELSE(r700_AssemblerBase
*pAsm
);
585 GLboolean
assemble_ENDIF(r700_AssemblerBase
*pAsm
);
587 GLboolean
assemble_BGNLOOP(r700_AssemblerBase
*pAsm
);
588 GLboolean
assemble_BRK(r700_AssemblerBase
*pAsm
);
589 GLboolean
assemble_COND(r700_AssemblerBase
*pAsm
);
590 GLboolean
assemble_ENDLOOP(r700_AssemblerBase
*pAsm
);
592 GLboolean
assemble_BGNSUB(r700_AssemblerBase
*pAsm
, GLint nILindex
);
593 GLboolean
assemble_ENDSUB(r700_AssemblerBase
*pAsm
);
594 GLboolean
assemble_RET(r700_AssemblerBase
*pAsm
);
595 GLboolean
assemble_CAL(r700_AssemblerBase
*pAsm
,
597 GLuint uiNumberInsts
,
598 struct prog_instruction
*pILInst
);
600 GLboolean
Process_Export(r700_AssemblerBase
* pAsm
,
602 GLuint export_starting_index
,
604 GLuint starting_register_number
,
605 GLboolean is_depth_export
);
606 GLboolean
Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase
*pAsm
,
607 BITS depth_channel_select
);
611 GLboolean
AssembleInstr(GLuint uiFirstInst
,
612 GLuint uiNumberInsts
,
613 struct prog_instruction
*pILInst
,
614 r700_AssemblerBase
*pR700AsmCode
);
615 GLboolean
Process_Fragment_Exports(r700_AssemblerBase
*pR700AsmCode
, GLbitfield OutputsWritten
);
616 GLboolean
Process_Vertex_Exports(r700_AssemblerBase
*pR700AsmCode
, GLbitfield OutputsWritten
);
618 GLboolean
RelocProgram(r700_AssemblerBase
* pAsm
);
619 GLboolean
InitShaderProgram(r700_AssemblerBase
* pAsm
);
621 int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt
, r700_AssemblerBase
* pAsm
, R700_Shader
* pShader
);
622 GLboolean
Clean_Up_Assembler(r700_AssemblerBase
*pR700AsmCode
);
624 #endif //_R700_ASSEMBLER_H_