c66db502a1704f98aa00127c85a29f1154a0737b
[mesa.git] / src / mesa / drivers / dri / r600 / r700_assembler.h
1 /*
2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 /*
23 * Authors:
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25 */
26
27 #ifndef _R700_ASSEMBLER_H_
28 #define _R700_ASSEMBLER_H_
29
30 #include "main/mtypes.h"
31 #include "shader/prog_instruction.h"
32
33 #include "r700_chip.h"
34 #include "r700_shaderinst.h"
35 #include "r700_shader.h"
36
37 typedef enum SHADER_PIPE_TYPE
38 {
39 SPT_VP = 0,
40 SPT_FP = 1
41 } SHADER_PIPE_TYPE;
42
43 typedef enum ConstantCycles
44 {
45 NUMBER_OF_CYCLES = 3,
46 NUMBER_OF_COMPONENTS = 4
47 } ConstantCycles;
48
49 typedef enum HARDWARE_LIMIT_VALUES
50 {
51 TEMPORARY_REGISTER_OFFSET = SQ_ALU_SRC_GPR_BASE,
52 MAX_TEMPORARY_REGISTERS = SQ_ALU_SRC_GPR_SIZE,
53 MAX_CONSTANT_REGISTERS = SQ_ALU_SRC_CFILE_SIZE,
54 CFILE_REGISTER_OFFSET = SQ_ALU_SRC_CFILE_BASE,
55 NUMBER_OF_INPUT_COLORS = 2,
56 NUMBER_OF_OUTPUT_COLORS = 8,
57 NUMBER_OF_TEXTURE_UNITS = 16,
58 MEGA_FETCH_BYTES = 32
59 } HARDWARE_LIMIT_VALUES;
60
61 typedef enum AddressMode
62 {
63 ADDR_ABSOLUTE = 0,
64 ADDR_RELATIVE_A0 = 1,
65 ADDR_RELATIVE_FLI_0 = 2,
66 NUMBER_OF_ADDR_MOD = 3
67 } AddressMode;
68
69 typedef enum SrcRegisterType
70 {
71 SRC_REG_TEMPORARY = 0,
72 SRC_REG_INPUT = 1,
73 SRC_REG_CONSTANT = 2,
74 SRC_REG_ALT_TEMPORARY = 3,
75 NUMBER_OF_SRC_REG_TYPE = 4
76 } SrcRegisterType;
77
78 typedef enum DstRegisterType
79 {
80 DST_REG_TEMPORARY = 0,
81 DST_REG_A0 = 1,
82 DST_REG_OUT = 2,
83 DST_REG_OUT_X_REPL = 3,
84 DST_REG_ALT_TEMPORARY = 4,
85 DST_REG_INPUT = 5,
86 NUMBER_OF_DST_REG_TYPE = 6
87 } DstRegisterType;
88
89 typedef unsigned int BITS;
90
91 typedef struct PVSDSTtag
92 {
93 BITS opcode:8; //(:6) //@@@ really should be 10 bits for OP2
94 BITS math:1;
95 BITS predicated:1; //10 //8
96 BITS pred_inv :1; //11 //8
97
98 BITS rtype:3;
99 BITS reg:10; //24 //20
100
101 BITS writex:1;
102 BITS writey:1;
103 BITS writez:1;
104 BITS writew:1; //28
105
106 BITS op3:1; // 29 Represents *_OP3_* ALU opcode
107
108 BITS dualop:1; // 30 //26
109
110 BITS addrmode0:1; //31 //29
111 BITS addrmode1:1; //32
112 } PVSDST;
113
114 typedef struct PVSSRCtag
115 {
116 BITS rtype:4;
117 BITS addrmode0:1;
118 BITS reg:10; //15 (8)
119 BITS swizzlex:3;
120 BITS swizzley:3;
121 BITS swizzlez:3;
122 BITS swizzlew:3; //27
123
124 BITS negx:1;
125 BITS negy:1;
126 BITS negz:1;
127 BITS negw:1; //31
128 //BITS addrsel:2;
129 BITS addrmode1:1; //32
130 } PVSSRC;
131
132 typedef struct PVSMATHtag
133 {
134 BITS rtype:4;
135 BITS spare:1;
136 BITS reg:8;
137 BITS swizzlex:3;
138 BITS swizzley:3;
139 BITS dstoff:2; // 2 bits of dest offset into alt ram
140 BITS opcode:4;
141 BITS negx:1;
142 BITS negy:1;
143 BITS dstcomp:2; // select dest component
144 BITS spare2:3;
145 } PVSMATH;
146
147 typedef union PVSDWORDtag
148 {
149 BITS bits;
150 PVSDST dst;
151 PVSSRC src;
152 PVSMATH math;
153 float f;
154 } PVSDWORD;
155
156 typedef struct VAP_OUT_VTX_FMT_0tag
157 {
158 BITS pos:1; // 0
159 BITS misc:1;
160 BITS clip_dist0:1;
161 BITS clip_dist1:1;
162 BITS pos_param:1; // 4
163
164 BITS color0:1; // 5
165 BITS color1:1;
166 BITS color2:1;
167 BITS color3:1;
168 BITS color4:1;
169 BITS color5:1;
170 BITS color6:1;
171 BITS color7:1;
172
173 BITS normal:1;
174
175 BITS depth:1; // 14
176
177 BITS point_size:1; // 15
178 BITS edge_flag:1;
179 BITS rta_index:1; // shares same channel as kill_flag
180 BITS kill_flag:1;
181 BITS viewport_index:1; // 19
182
183 BITS resvd1:12; // 20
184 } VAP_OUT_VTX_FMT_0;
185
186 typedef struct VAP_OUT_VTX_FMT_1tag
187 {
188 BITS tex0comp:3;
189 BITS tex1comp:3;
190 BITS tex2comp:3;
191 BITS tex3comp:3;
192 BITS tex4comp:3;
193 BITS tex5comp:3;
194 BITS tex6comp:3;
195 BITS tex7comp:3;
196
197 BITS resvd:8;
198 } VAP_OUT_VTX_FMT_1;
199
200 typedef struct VAP_OUT_VTX_FMT_2tag
201 {
202 BITS tex8comp :3;
203 BITS tex9comp :3;
204 BITS tex10comp:3;
205 BITS tex11comp:3;
206 BITS tex12comp:3;
207 BITS tex13comp:3;
208 BITS tex14comp:3;
209 BITS tex15comp:3;
210
211 BITS resvd:8;
212 } VAP_OUT_VTX_FMT_2;
213
214 typedef struct OUT_FRAGMENT_FMT_0tag
215 {
216 BITS color0:1;
217 BITS color1:1;
218 BITS color2:1;
219 BITS color3:1;
220 BITS color4:1;
221 BITS color5:1;
222 BITS color6:1;
223 BITS color7:1;
224
225 BITS depth:1;
226 BITS stencil_ref:1;
227 BITS coverage_to_mask:1;
228 BITS mask:1;
229
230 BITS resvd1:20;
231 } OUT_FRAGMENT_FMT_0;
232
233 typedef enum CF_CLAUSE_TYPE
234 {
235 CF_EXPORT_CLAUSE,
236 CF_ALU_CLAUSE,
237 CF_TEX_CLAUSE,
238 CF_VTX_CLAUSE,
239 CF_OTHER_CLAUSE,
240 CF_EMPTY_CLAUSE,
241 NUMBER_CF_CLAUSE_TYPES
242 } CF_CLAUSE_TYPE;
243
244 enum
245 {
246 MAX_BOOL_CONSTANTS = 32,
247 MAX_INT_CONSTANTS = 32,
248 MAX_FLOAT_CONSTANTS = 256,
249
250 FC_NONE = 0,
251 FC_IF = 1,
252 FC_LOOP = 2,
253 FC_REP = 3,
254
255 COND_NONE = 0,
256 COND_BOOL = 1,
257 COND_PRED = 2,
258 COND_ALU = 3,
259
260 SAFEDIST_TEX = 6, ///< safe distance for using result of texture lookup in alu or another tex lookup
261 SAFEDIST_ALU = 6 ///< the same for alu->fc
262 };
263
264 typedef struct FC_LEVEL
265 {
266 unsigned int first; ///< first fc instruction on level (if, rep, loop)
267 unsigned int* mid; ///< middle instructions - else or all breaks on this level
268 unsigned int midLen;
269 unsigned int type;
270 unsigned int cond;
271 unsigned int inv;
272 unsigned int bpush; ///< 1 if first instruction does branch stack push
273 int id; ///< id of bool or int variable
274 } FC_LEVEL;
275
276 typedef struct VTX_FETCH_METHOD
277 {
278 GLboolean bEnableMini;
279 GLuint mega_fetch_remainder;
280 } VTX_FETCH_METHOD;
281
282 typedef struct r700_AssemblerBase
283 {
284 R700ControlFlowSXClause* cf_last_export_ptr;
285 R700ControlFlowSXClause* cf_current_export_clause_ptr;
286 R700ControlFlowALUClause* cf_current_alu_clause_ptr;
287 R700ControlFlowGenericClause* cf_current_tex_clause_ptr;
288 R700ControlFlowGenericClause* cf_current_vtx_clause_ptr;
289 R700ControlFlowGenericClause* cf_current_cf_clause_ptr;
290
291 //Result shader
292 R700_Shader * pR700Shader;
293
294 // No clause has been created yet
295 CF_CLAUSE_TYPE cf_current_clause_type;
296
297 GLuint number_of_exports;
298 GLuint number_of_colorandz_exports;
299 GLuint number_of_export_opcodes;
300
301 PVSDWORD D;
302 PVSDWORD S[3];
303
304 unsigned int uLastPosUpdate;
305
306 OUT_FRAGMENT_FMT_0 fp_stOutFmt0;
307
308 unsigned int uIIns;
309 unsigned int uOIns;
310 unsigned int number_used_registers;
311 unsigned int uUsedConsts;
312
313 // Fragment programs
314 unsigned int uiFP_AttributeMap[FRAG_ATTRIB_MAX];
315 unsigned int uiFP_OutputMap[FRAG_RESULT_MAX];
316 unsigned int uBoolConsts;
317 unsigned int uIntConsts;
318 unsigned int uInsts;
319 unsigned int uConsts;
320
321 // Vertex programs
322 unsigned char ucVP_AttributeMap[VERT_ATTRIB_MAX];
323 unsigned char ucVP_OutputMap[VERT_RESULT_MAX];
324
325 unsigned char * pucOutMask;
326
327 //-----------------------------------------------------------------------------------
328 // flow control members
329 //-----------------------------------------------------------------------------------
330 unsigned int FCSP;
331 FC_LEVEL fc_stack[32];
332
333 unsigned int branch_depth;
334 unsigned int max_branch_depth;
335
336 //-----------------------------------------------------------------------------------
337 // ArgSubst used in Assemble_Source() function
338 //-----------------------------------------------------------------------------------
339 int aArgSubst[4];
340
341 GLint hw_gpr[ NUMBER_OF_CYCLES ][ NUMBER_OF_COMPONENTS ];
342 GLint hw_cfile_addr[ NUMBER_OF_COMPONENTS ];
343 GLint hw_cfile_chan[ NUMBER_OF_COMPONENTS ];
344
345 GLuint uOutputs;
346
347 GLint color_export_register_number[NUMBER_OF_OUTPUT_COLORS];
348 GLint depth_export_register_number;
349
350 GLint stencil_export_register_number;
351 GLint coverage_to_mask_export_register_number;
352 GLint mask_export_register_number;
353
354 GLuint starting_export_register_number;
355 GLuint starting_vfetch_register_number;
356 GLuint starting_temp_register_number;
357 GLuint uHelpReg;
358 GLuint uFirstHelpReg;
359
360 GLboolean input_position_is_used;
361 GLboolean input_normal_is_used;
362
363 GLboolean input_color_is_used[NUMBER_OF_INPUT_COLORS];
364
365 GLboolean input_texture_unit_is_used[NUMBER_OF_TEXTURE_UNITS];
366
367 R700VertexGenericFetch* vfetch_instruction_ptr_array[VERT_ATTRIB_MAX];
368
369 GLuint number_of_inputs;
370
371 InstDeps *pInstDeps;
372
373 SHADER_PIPE_TYPE currentShaderType;
374 struct prog_instruction * pILInst;
375 GLuint uiCurInst;
376 GLboolean bR6xx;
377 /* helper to decide which type of instruction to assemble */
378 GLboolean is_tex;
379 /* we inserted helper intructions and need barrier on next TEX ins */
380 GLboolean need_tex_barrier;
381 } r700_AssemblerBase;
382
383 //Internal use
384 BITS addrmode_PVSDST(PVSDST * pPVSDST);
385 void setaddrmode_PVSDST(PVSDST * pPVSDST, BITS addrmode);
386 void nomask_PVSDST(PVSDST * pPVSDST);
387 BITS addrmode_PVSSRC(PVSSRC* pPVSSRC);
388 void setaddrmode_PVSSRC(PVSSRC* pPVSSRC, BITS addrmode);
389 void setswizzle_PVSSRC(PVSSRC* pPVSSRC, BITS swz);
390 void noswizzle_PVSSRC(PVSSRC* pPVSSRC);
391 void swizzleagain_PVSSRC(PVSSRC * pPVSSRC, BITS x, BITS y, BITS z, BITS w);
392 void neg_PVSSRC(PVSSRC* pPVSSRC);
393 void noneg_PVSSRC(PVSSRC* pPVSSRC);
394 void flipneg_PVSSRC(PVSSRC* pPVSSRC);
395 void zerocomp_PVSSRC(PVSSRC* pPVSSRC, int c);
396 void onecomp_PVSSRC(PVSSRC* pPVSSRC, int c);
397 BITS is_misc_component_exported(VAP_OUT_VTX_FMT_0* pOutVTXFmt0);
398 BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt) ;
399 GLboolean is_reduction_opcode(PVSDWORD * dest);
400 GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size);
401
402 unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm);
403
404 GLboolean IsTex(gl_inst_opcode Opcode);
405 GLboolean IsAlu(gl_inst_opcode Opcode);
406 int check_current_clause(r700_AssemblerBase* pAsm,
407 CF_CLAUSE_TYPE new_clause_type);
408 GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm,
409 R700VertexInstruction* vertex_instruction_ptr);
410 GLboolean add_tex_instruction(r700_AssemblerBase* pAsm,
411 R700TextureInstruction* tex_instruction_ptr);
412 GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
413 GLuint gl_client_id,
414 GLuint destination_register,
415 GLuint number_of_elements,
416 GLenum dataElementType,
417 VTX_FETCH_METHOD* pFetchMethod);
418 GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm,
419 GLuint destination_register,
420 GLenum type,
421 GLint size,
422 GLubyte element,
423 GLuint _signed,
424 GLboolean normalize,
425 VTX_FETCH_METHOD * pFetchMethod);
426 GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm);
427 GLuint gethelpr(r700_AssemblerBase* pAsm);
428 void resethelpr(r700_AssemblerBase* pAsm);
429 void checkop_init(r700_AssemblerBase* pAsm);
430 GLboolean mov_temp(r700_AssemblerBase* pAsm, int src);
431 GLboolean checkop1(r700_AssemblerBase* pAsm);
432 GLboolean checkop2(r700_AssemblerBase* pAsm);
433 GLboolean checkop3(r700_AssemblerBase* pAsm);
434 GLboolean assemble_src(r700_AssemblerBase *pAsm,
435 int src,
436 int fld);
437 GLboolean assemble_dst(r700_AssemblerBase *pAsm);
438 GLboolean tex_dst(r700_AssemblerBase *pAsm);
439 GLboolean tex_src(r700_AssemblerBase *pAsm);
440 GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalized);
441 void initialize(r700_AssemblerBase *pAsm);
442 GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr,
443 int source_index,
444 PVSSRC* pSource,
445 BITS scalar_channel_index);
446 GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
447 R700ALUInstruction* alu_instruction_ptr,
448 GLuint contiguous_slots_needed);
449 void get_src_properties(R700ALUInstruction* alu_instruction_ptr,
450 int source_index,
451 BITS* psrc_sel,
452 BITS* psrc_rel,
453 BITS* psrc_chan,
454 BITS* psrc_neg);
455 int is_cfile(BITS sel);
456 int is_const(BITS sel);
457 int is_gpr(BITS sel);
458 GLboolean reserve_cfile(r700_AssemblerBase* pAsm,
459 GLuint sel,
460 GLuint chan);
461 GLboolean reserve_gpr(r700_AssemblerBase* pAsm, GLuint sel, GLuint chan, GLuint cycle);
462 GLboolean cycle_for_scalar_bank_swizzle(const int swiz, const int sel, GLuint* pCycle);
463 GLboolean cycle_for_vector_bank_swizzle(const int swiz, const int sel, GLuint* pCycle);
464 GLboolean check_scalar(r700_AssemblerBase* pAsm,
465 R700ALUInstruction* alu_instruction_ptr);
466 GLboolean check_vector(r700_AssemblerBase* pAsm,
467 R700ALUInstruction* alu_instruction_ptr);
468 GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm);
469 GLboolean next_ins(r700_AssemblerBase *pAsm);
470 GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode);
471 GLboolean assemble_ABS(r700_AssemblerBase *pAsm);
472 GLboolean assemble_ADD(r700_AssemblerBase *pAsm);
473 GLboolean assemble_ARL(r700_AssemblerBase *pAsm);
474 GLboolean assemble_BAD(char *opcode_str);
475 GLboolean assemble_CMP(r700_AssemblerBase *pAsm);
476 GLboolean assemble_COS(r700_AssemblerBase *pAsm);
477 GLboolean assemble_DOT(r700_AssemblerBase *pAsm);
478 GLboolean assemble_DST(r700_AssemblerBase *pAsm);
479 GLboolean assemble_EX2(r700_AssemblerBase *pAsm);
480 GLboolean assemble_EXP(r700_AssemblerBase *pAsm);
481 GLboolean assemble_FLR(r700_AssemblerBase *pAsm);
482 GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm);
483 GLboolean assemble_FRC(r700_AssemblerBase *pAsm);
484 GLboolean assemble_KIL(r700_AssemblerBase *pAsm);
485 GLboolean assemble_LG2(r700_AssemblerBase *pAsm);
486 GLboolean assemble_LRP(r700_AssemblerBase *pAsm);
487 GLboolean assemble_LOG(r700_AssemblerBase *pAsm);
488 GLboolean assemble_MAD(r700_AssemblerBase *pAsm);
489 GLboolean assemble_LIT(r700_AssemblerBase *pAsm);
490 GLboolean assemble_MAX(r700_AssemblerBase *pAsm);
491 GLboolean assemble_MIN(r700_AssemblerBase *pAsm);
492 GLboolean assemble_MOV(r700_AssemblerBase *pAsm);
493 GLboolean assemble_MUL(r700_AssemblerBase *pAsm);
494 GLboolean assemble_POW(r700_AssemblerBase *pAsm);
495 GLboolean assemble_RCP(r700_AssemblerBase *pAsm);
496 GLboolean assemble_RSQ(r700_AssemblerBase *pAsm);
497 GLboolean assemble_SIN(r700_AssemblerBase *pAsm);
498 GLboolean assemble_SCS(r700_AssemblerBase *pAsm);
499 GLboolean assemble_SGE(r700_AssemblerBase *pAsm);
500 GLboolean assemble_SLT(r700_AssemblerBase *pAsm);
501 GLboolean assemble_STP(r700_AssemblerBase *pAsm);
502 GLboolean assemble_TEX(r700_AssemblerBase *pAsm);
503 GLboolean assemble_XPD(r700_AssemblerBase *pAsm);
504 GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm);
505 GLboolean assemble_IF(r700_AssemblerBase *pAsm);
506 GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm);
507
508 GLboolean Process_Export(r700_AssemblerBase* pAsm,
509 GLuint type,
510 GLuint export_starting_index,
511 GLuint export_count,
512 GLuint starting_register_number,
513 GLboolean is_depth_export);
514 GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm,
515 BITS depth_channel_select);
516
517
518 //Interface
519 GLboolean AssembleInstr(GLuint uiNumberInsts,
520 struct prog_instruction *pILInst,
521 r700_AssemblerBase *pR700AsmCode);
522 GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten);
523 GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten);
524
525 int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader);
526 GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode);
527
528 #endif //_R700_ASSEMBLER_H_