b555ea683c216060fce22b50563f006953520529
[mesa.git] / src / mesa / drivers / dri / r600 / r700_assembler.c
1 /*
2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 /*
23 * Authors:
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25 */
26
27 #include <stdio.h>
28 #include <stdarg.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <math.h>
32
33 #include "main/mtypes.h"
34 #include "main/imports.h"
35 #include "program/prog_parameter.h"
36
37 #include "radeon_debug.h"
38 #include "r600_context.h"
39
40 #include "r700_assembler.h"
41
42 #define USE_CF_FOR_CONTINUE_BREAK 1
43 #define USE_CF_FOR_POP_AFTER 1
44
45 struct prog_instruction noise1_insts[12] = {
46 {OPCODE_BGNSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
47 {OPCODE_MOV , {{0, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 2, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
48 {OPCODE_MOV , {{8, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 4, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
49 {OPCODE_MOV , {{8, 0, 585, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 8, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
50 {OPCODE_SGT , {{0, 0, 585, 0, 0, 0}, {8, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 1, 1, 0, 8, 1672, 0}, 1, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
51 {OPCODE_IF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 7, 0, 0}, 0, 0, 0, 1, 0, 0, 0, 15, 0, 0, 0},
52 {OPCODE_MOV , {{0, 0, 1755, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
53 {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
54 {OPCODE_ENDIF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
55 {OPCODE_MOV , {{0, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
56 {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
57 {OPCODE_ENDSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}
58 };
59 float noise1_const[2][4] = {
60 {0.300000f, 0.900000f, 0.500000f, 0.300000f}
61 };
62
63 COMPILED_SUB noise1_presub = {
64 &(noise1_insts[0]),
65 12,
66 2,
67 1,
68 0,
69 &(noise1_const[0]),
70 SWIZZLE_X,
71 SWIZZLE_X,
72 SWIZZLE_X,
73 SWIZZLE_X,
74 {0,0,0},
75 0
76 };
77
78 BITS addrmode_PVSDST(PVSDST * pPVSDST)
79 {
80 return pPVSDST->addrmode0 | ((BITS)pPVSDST->addrmode1 << 1);
81 }
82
83 void setaddrmode_PVSDST(PVSDST * pPVSDST, BITS addrmode)
84 {
85 pPVSDST->addrmode0 = addrmode & 1;
86 pPVSDST->addrmode1 = (addrmode >> 1) & 1;
87 }
88
89 void nomask_PVSDST(PVSDST * pPVSDST)
90 {
91 pPVSDST->writex = pPVSDST->writey = pPVSDST->writez = pPVSDST->writew = 1;
92 }
93
94 BITS addrmode_PVSSRC(PVSSRC* pPVSSRC)
95 {
96 return pPVSSRC->addrmode0 | ((BITS)pPVSSRC->addrmode1 << 1);
97 }
98
99 void setaddrmode_PVSSRC(PVSSRC* pPVSSRC, BITS addrmode)
100 {
101 pPVSSRC->addrmode0 = addrmode & 1;
102 pPVSSRC->addrmode1 = (addrmode >> 1) & 1;
103 }
104
105
106 void setswizzle_PVSSRC(PVSSRC* pPVSSRC, BITS swz)
107 {
108 pPVSSRC->swizzlex =
109 pPVSSRC->swizzley =
110 pPVSSRC->swizzlez =
111 pPVSSRC->swizzlew = swz;
112 }
113
114 void noswizzle_PVSSRC(PVSSRC* pPVSSRC)
115 {
116 pPVSSRC->swizzlex = SQ_SEL_X;
117 pPVSSRC->swizzley = SQ_SEL_Y;
118 pPVSSRC->swizzlez = SQ_SEL_Z;
119 pPVSSRC->swizzlew = SQ_SEL_W;
120 }
121
122 void
123 swizzleagain_PVSSRC(PVSSRC * pPVSSRC, BITS x, BITS y, BITS z, BITS w)
124 {
125 switch (x)
126 {
127 case SQ_SEL_X: x = pPVSSRC->swizzlex;
128 break;
129 case SQ_SEL_Y: x = pPVSSRC->swizzley;
130 break;
131 case SQ_SEL_Z: x = pPVSSRC->swizzlez;
132 break;
133 case SQ_SEL_W: x = pPVSSRC->swizzlew;
134 break;
135 default:;
136 }
137
138 switch (y)
139 {
140 case SQ_SEL_X: y = pPVSSRC->swizzlex;
141 break;
142 case SQ_SEL_Y: y = pPVSSRC->swizzley;
143 break;
144 case SQ_SEL_Z: y = pPVSSRC->swizzlez;
145 break;
146 case SQ_SEL_W: y = pPVSSRC->swizzlew;
147 break;
148 default:;
149 }
150
151 switch (z)
152 {
153 case SQ_SEL_X: z = pPVSSRC->swizzlex;
154 break;
155 case SQ_SEL_Y: z = pPVSSRC->swizzley;
156 break;
157 case SQ_SEL_Z: z = pPVSSRC->swizzlez;
158 break;
159 case SQ_SEL_W: z = pPVSSRC->swizzlew;
160 break;
161 default:;
162 }
163
164 switch (w)
165 {
166 case SQ_SEL_X: w = pPVSSRC->swizzlex;
167 break;
168 case SQ_SEL_Y: w = pPVSSRC->swizzley;
169 break;
170 case SQ_SEL_Z: w = pPVSSRC->swizzlez;
171 break;
172 case SQ_SEL_W: w = pPVSSRC->swizzlew;
173 break;
174 default:;
175 }
176
177 pPVSSRC->swizzlex = x;
178 pPVSSRC->swizzley = y;
179 pPVSSRC->swizzlez = z;
180 pPVSSRC->swizzlew = w;
181 }
182
183 void neg_PVSSRC(PVSSRC* pPVSSRC)
184 {
185 pPVSSRC->negx = 1;
186 pPVSSRC->negy = 1;
187 pPVSSRC->negz = 1;
188 pPVSSRC->negw = 1;
189 }
190
191 void noneg_PVSSRC(PVSSRC* pPVSSRC)
192 {
193 pPVSSRC->negx = 0;
194 pPVSSRC->negy = 0;
195 pPVSSRC->negz = 0;
196 pPVSSRC->negw = 0;
197 }
198
199 // negate argument (for SUB instead of ADD and alike)
200 void flipneg_PVSSRC(PVSSRC* pPVSSRC)
201 {
202 pPVSSRC->negx = !pPVSSRC->negx;
203 pPVSSRC->negy = !pPVSSRC->negy;
204 pPVSSRC->negz = !pPVSSRC->negz;
205 pPVSSRC->negw = !pPVSSRC->negw;
206 }
207
208 void zerocomp_PVSSRC(PVSSRC* pPVSSRC, int c)
209 {
210 switch (c)
211 {
212 case 0: pPVSSRC->swizzlex = SQ_SEL_0; pPVSSRC->negx = 0; break;
213 case 1: pPVSSRC->swizzley = SQ_SEL_0; pPVSSRC->negy = 0; break;
214 case 2: pPVSSRC->swizzlez = SQ_SEL_0; pPVSSRC->negz = 0; break;
215 case 3: pPVSSRC->swizzlew = SQ_SEL_0; pPVSSRC->negw = 0; break;
216 default:;
217 }
218 }
219
220 void onecomp_PVSSRC(PVSSRC* pPVSSRC, int c)
221 {
222 switch (c)
223 {
224 case 0: pPVSSRC->swizzlex = SQ_SEL_1; pPVSSRC->negx = 0; break;
225 case 1: pPVSSRC->swizzley = SQ_SEL_1; pPVSSRC->negy = 0; break;
226 case 2: pPVSSRC->swizzlez = SQ_SEL_1; pPVSSRC->negz = 0; break;
227 case 3: pPVSSRC->swizzlew = SQ_SEL_1; pPVSSRC->negw = 0; break;
228 default:;
229 }
230 }
231
232 BITS is_misc_component_exported(VAP_OUT_VTX_FMT_0* pOutVTXFmt0)
233 {
234 return (pOutVTXFmt0->point_size |
235 pOutVTXFmt0->edge_flag |
236 pOutVTXFmt0->rta_index |
237 pOutVTXFmt0->kill_flag |
238 pOutVTXFmt0->viewport_index);
239 }
240
241 BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt)
242 {
243 return (pFPOutFmt->depth |
244 pFPOutFmt->stencil_ref |
245 pFPOutFmt->mask |
246 pFPOutFmt->coverage_to_mask);
247 }
248
249 GLboolean is_reduction_opcode(PVSDWORD* dest)
250 {
251 if (dest->dst.op3 == 0)
252 {
253 if ( (dest->dst.opcode == SQ_OP2_INST_DOT4 || dest->dst.opcode == SQ_OP2_INST_DOT4_IEEE || dest->dst.opcode == SQ_OP2_INST_CUBE) )
254 {
255 return GL_TRUE;
256 }
257 }
258 return GL_FALSE;
259 }
260
261 GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size)
262 {
263 GLuint format = FMT_INVALID;
264 GLuint uiElemSize = 0;
265
266 switch (eType)
267 {
268 case GL_BYTE:
269 case GL_UNSIGNED_BYTE:
270 uiElemSize = 1;
271 switch(nChannels)
272 {
273 case 1:
274 format = FMT_8; break;
275 case 2:
276 format = FMT_8_8; break;
277 case 3:
278 format = FMT_8_8_8; break;
279 case 4:
280 format = FMT_8_8_8_8; break;
281 default:
282 break;
283 }
284 break;
285
286 case GL_UNSIGNED_SHORT:
287 case GL_SHORT:
288 uiElemSize = 2;
289 switch(nChannels)
290 {
291 case 1:
292 format = FMT_16; break;
293 case 2:
294 format = FMT_16_16; break;
295 case 3:
296 /* 3 comp GL_SHORT vertex format doesnt work on r700
297 4 somehow works, test - sauerbraten */
298 format = FMT_16_16_16_16; break;
299 case 4:
300 format = FMT_16_16_16_16; break;
301 default:
302 break;
303 }
304 break;
305
306 case GL_UNSIGNED_INT:
307 case GL_INT:
308 uiElemSize = 4;
309 switch(nChannels)
310 {
311 case 1:
312 format = FMT_32; break;
313 case 2:
314 format = FMT_32_32; break;
315 case 3:
316 format = FMT_32_32_32; break;
317 case 4:
318 format = FMT_32_32_32_32; break;
319 default:
320 break;
321 }
322 break;
323
324 case GL_FLOAT:
325 uiElemSize = 4;
326 switch(nChannels)
327 {
328 case 1:
329 format = FMT_32_FLOAT; break;
330 case 2:
331 format = FMT_32_32_FLOAT; break;
332 case 3:
333 format = FMT_32_32_32_FLOAT; break;
334 case 4:
335 format = FMT_32_32_32_32_FLOAT; break;
336 default:
337 break;
338 }
339 break;
340 case GL_DOUBLE:
341 uiElemSize = 8;
342 switch(nChannels)
343 {
344 case 1:
345 format = FMT_32_FLOAT; break;
346 case 2:
347 format = FMT_32_32_FLOAT; break;
348 case 3:
349 format = FMT_32_32_32_FLOAT; break;
350 case 4:
351 format = FMT_32_32_32_32_FLOAT; break;
352 default:
353 break;
354 }
355 break;
356 default:
357 ;
358 //GL_ASSERT_NO_CASE();
359 }
360
361 if(NULL != pClient_size)
362 {
363 *pClient_size = uiElemSize * nChannels;
364 }
365
366 return(format);
367 }
368
369 unsigned int r700GetNumOperands(GLuint opcode, GLuint nIsOp3)
370 {
371 if(nIsOp3 > 0)
372 {
373 return 3;
374 }
375
376 switch (opcode)
377 {
378 case SQ_OP2_INST_ADD:
379 case SQ_OP2_INST_KILLE:
380 case SQ_OP2_INST_KILLGT:
381 case SQ_OP2_INST_KILLGE:
382 case SQ_OP2_INST_KILLNE:
383 case SQ_OP2_INST_MUL:
384 case SQ_OP2_INST_MAX:
385 case SQ_OP2_INST_MIN:
386 //case SQ_OP2_INST_MAX_DX10:
387 //case SQ_OP2_INST_MIN_DX10:
388 case SQ_OP2_INST_SETE:
389 case SQ_OP2_INST_SETNE:
390 case SQ_OP2_INST_SETGT:
391 case SQ_OP2_INST_SETGE:
392 case SQ_OP2_INST_PRED_SETE:
393 case SQ_OP2_INST_PRED_SETGT:
394 case SQ_OP2_INST_PRED_SETGE:
395 case SQ_OP2_INST_PRED_SETNE:
396 case SQ_OP2_INST_DOT4:
397 case SQ_OP2_INST_DOT4_IEEE:
398 case SQ_OP2_INST_CUBE:
399 return 2;
400
401 case SQ_OP2_INST_MOV:
402 case SQ_OP2_INST_MOVA_FLOOR:
403 case SQ_OP2_INST_FRACT:
404 case SQ_OP2_INST_FLOOR:
405 case SQ_OP2_INST_TRUNC:
406 case SQ_OP2_INST_EXP_IEEE:
407 case SQ_OP2_INST_LOG_CLAMPED:
408 case SQ_OP2_INST_LOG_IEEE:
409 case SQ_OP2_INST_RECIP_IEEE:
410 case SQ_OP2_INST_RECIPSQRT_IEEE:
411 case SQ_OP2_INST_FLT_TO_INT:
412 case SQ_OP2_INST_SIN:
413 case SQ_OP2_INST_COS:
414 return 1;
415
416 default: radeon_error(
417 "Need instruction operand number for %x.\n", opcode);
418 };
419
420 return 3;
421 }
422
423 int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader)
424 {
425 GLuint i;
426
427 Init_R700_Shader(pShader);
428 pAsm->pR700Shader = pShader;
429 pAsm->currentShaderType = spt;
430
431 pAsm->cf_last_export_ptr = NULL;
432
433 pAsm->cf_current_export_clause_ptr = NULL;
434 pAsm->cf_current_alu_clause_ptr = NULL;
435 pAsm->cf_current_tex_clause_ptr = NULL;
436 pAsm->cf_current_vtx_clause_ptr = NULL;
437 pAsm->cf_current_cf_clause_ptr = NULL;
438
439 // No clause has been created yet
440 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
441
442 pAsm->number_of_colorandz_exports = 0;
443 pAsm->number_of_exports = 0;
444 pAsm->number_of_export_opcodes = 0;
445
446 pAsm->alu_x_opcode = 0;
447
448 pAsm->D2.bits = 0;
449
450 pAsm->D.bits = 0;
451 pAsm->S[0].bits = 0;
452 pAsm->S[1].bits = 0;
453 pAsm->S[2].bits = 0;
454
455 pAsm->uLastPosUpdate = 0;
456
457 *(BITS *) &pAsm->fp_stOutFmt0 = 0;
458
459 pAsm->uIIns = 0;
460 pAsm->uOIns = 0;
461 pAsm->number_used_registers = 0;
462 pAsm->uUsedConsts = 256;
463
464
465 // Fragment programs
466 pAsm->uBoolConsts = 0;
467 pAsm->uIntConsts = 0;
468 pAsm->uInsts = 0;
469 pAsm->uConsts = 0;
470
471 pAsm->FCSP = 0;
472 pAsm->fc_stack[0].type = FC_NONE;
473
474 pAsm->aArgSubst[0] =
475 pAsm->aArgSubst[1] =
476 pAsm->aArgSubst[2] =
477 pAsm->aArgSubst[3] = (-1);
478
479 pAsm->uOutputs = 0;
480
481 for (i=0; i<NUMBER_OF_OUTPUT_COLORS; i++)
482 {
483 pAsm->color_export_register_number[i] = (-1);
484 }
485
486
487 pAsm->depth_export_register_number = (-1);
488 pAsm->stencil_export_register_number = (-1);
489 pAsm->coverage_to_mask_export_register_number = (-1);
490 pAsm->mask_export_register_number = (-1);
491
492 pAsm->starting_export_register_number = 0;
493 pAsm->starting_vfetch_register_number = 0;
494 pAsm->starting_temp_register_number = 0;
495 pAsm->uFirstHelpReg = 0;
496
497 pAsm->input_position_is_used = GL_FALSE;
498 pAsm->input_normal_is_used = GL_FALSE;
499
500 for (i=0; i<NUMBER_OF_INPUT_COLORS; i++)
501 {
502 pAsm->input_color_is_used[ i ] = GL_FALSE;
503 }
504
505 for (i=0; i<NUMBER_OF_TEXTURE_UNITS; i++)
506 {
507 pAsm->input_texture_unit_is_used[ i ] = GL_FALSE;
508 }
509
510 for (i=0; i<VERT_ATTRIB_MAX; i++)
511 {
512 pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
513 }
514
515 pAsm->number_of_inputs = 0;
516
517 pAsm->is_tex = GL_FALSE;
518 pAsm->need_tex_barrier = GL_FALSE;
519
520 pAsm->subs = NULL;
521 pAsm->unSubArraySize = 0;
522 pAsm->unSubArrayPointer = 0;
523 pAsm->callers = NULL;
524 pAsm->unCallerArraySize = 0;
525 pAsm->unCallerArrayPointer = 0;
526
527 pAsm->CALLSP = 0;
528 pAsm->CALLSTACK[0].FCSP_BeforeEntry = 0;
529 pAsm->CALLSTACK[0].plstCFInstructions_local
530 = &(pAsm->pR700Shader->lstCFInstructions);
531
532 pAsm->CALLSTACK[0].max = 0;
533 pAsm->CALLSTACK[0].current = 0;
534
535 SetActiveCFlist(pAsm->pR700Shader, pAsm->CALLSTACK[0].plstCFInstructions_local);
536
537 pAsm->unCFflags = 0;
538
539 pAsm->presubs = NULL;
540 pAsm->unPresubArraySize = 0;
541 pAsm->unNumPresub = 0;
542 pAsm->unCurNumILInsts = 0;
543
544 pAsm->unVetTexBits = 0;
545
546 return 0;
547 }
548
549 GLboolean IsTex(gl_inst_opcode Opcode)
550 {
551 if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) ||
552 (OPCODE_DDX==Opcode) || (OPCODE_DDY==Opcode) )
553 {
554 return GL_TRUE;
555 }
556 return GL_FALSE;
557 }
558
559 GLboolean IsAlu(gl_inst_opcode Opcode)
560 {
561 //TODO : more for fc and ex for higher spec.
562 if( IsTex(Opcode) )
563 {
564 return GL_FALSE;
565 }
566 return GL_TRUE;
567 }
568
569 int check_current_clause(r700_AssemblerBase* pAsm,
570 CF_CLAUSE_TYPE new_clause_type)
571 {
572 if (pAsm->cf_current_clause_type != new_clause_type)
573 { //Close last open clause
574 switch (pAsm->cf_current_clause_type)
575 {
576 case CF_ALU_CLAUSE:
577 if ( pAsm->cf_current_alu_clause_ptr != NULL)
578 {
579 pAsm->cf_current_alu_clause_ptr = NULL;
580 }
581 break;
582 case CF_VTX_CLAUSE:
583 if ( pAsm->cf_current_vtx_clause_ptr != NULL)
584 {
585 pAsm->cf_current_vtx_clause_ptr = NULL;
586 }
587 break;
588 case CF_TEX_CLAUSE:
589 if ( pAsm->cf_current_tex_clause_ptr != NULL)
590 {
591 pAsm->cf_current_tex_clause_ptr = NULL;
592 }
593 break;
594 case CF_EXPORT_CLAUSE:
595 if ( pAsm->cf_current_export_clause_ptr != NULL)
596 {
597 pAsm->cf_current_export_clause_ptr = NULL;
598 }
599 break;
600 case CF_OTHER_CLAUSE:
601 if ( pAsm->cf_current_cf_clause_ptr != NULL)
602 {
603 pAsm->cf_current_cf_clause_ptr = NULL;
604 }
605 break;
606 case CF_EMPTY_CLAUSE:
607 break;
608 default:
609 radeon_error(
610 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
611 return GL_FALSE;
612 }
613
614 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
615
616 // Create new clause
617 switch (new_clause_type)
618 {
619 case CF_ALU_CLAUSE:
620 pAsm->cf_current_clause_type = CF_ALU_CLAUSE;
621 break;
622 case CF_VTX_CLAUSE:
623 pAsm->cf_current_clause_type = CF_VTX_CLAUSE;
624 break;
625 case CF_TEX_CLAUSE:
626 pAsm->cf_current_clause_type = CF_TEX_CLAUSE;
627 break;
628 case CF_EXPORT_CLAUSE:
629 {
630 R700ControlFlowSXClause* pR700ControlFlowSXClause
631 = (R700ControlFlowSXClause*) CALLOC_STRUCT(R700ControlFlowSXClause);
632
633 // Add new export instruction to control flow program
634 if (pR700ControlFlowSXClause != 0)
635 {
636 pAsm->cf_current_export_clause_ptr = pR700ControlFlowSXClause;
637 Init_R700ControlFlowSXClause(pR700ControlFlowSXClause);
638 AddCFInstruction( pAsm->pR700Shader,
639 (R700ControlFlowInstruction *)pR700ControlFlowSXClause );
640 }
641 else
642 {
643 radeon_error(
644 "Error allocating new EXPORT CF instruction in check_current_clause. \n");
645 return GL_FALSE;
646 }
647 pAsm->cf_current_clause_type = CF_EXPORT_CLAUSE;
648 }
649 break;
650 case CF_EMPTY_CLAUSE:
651 break;
652 case CF_OTHER_CLAUSE:
653 pAsm->cf_current_clause_type = CF_OTHER_CLAUSE;
654 break;
655 default:
656 radeon_error(
657 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
658 return GL_FALSE;
659 }
660 }
661
662 return GL_TRUE;
663 }
664
665 GLboolean add_cf_instruction(r700_AssemblerBase* pAsm)
666 {
667 if(GL_FALSE == check_current_clause(pAsm, CF_OTHER_CLAUSE))
668 {
669 return GL_FALSE;
670 }
671
672 pAsm->cf_current_cf_clause_ptr =
673 (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
674
675 if (pAsm->cf_current_cf_clause_ptr != NULL)
676 {
677 Init_R700ControlFlowGenericClause(pAsm->cf_current_cf_clause_ptr);
678 AddCFInstruction( pAsm->pR700Shader,
679 (R700ControlFlowInstruction *)pAsm->cf_current_cf_clause_ptr );
680 }
681 else
682 {
683 radeon_error("Could not allocate a new VFetch CF instruction.\n");
684 return GL_FALSE;
685 }
686
687 return GL_TRUE;
688 }
689
690 GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm,
691 R700VertexInstruction* vertex_instruction_ptr)
692 {
693 if( GL_FALSE == check_current_clause(pAsm, CF_VTX_CLAUSE) )
694 {
695 return GL_FALSE;
696 }
697
698 if( pAsm->cf_current_vtx_clause_ptr == NULL ||
699 ( (pAsm->cf_current_vtx_clause_ptr != NULL) &&
700 (pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_vtx_clause_ptr->m_ShaderInstType)-1)
701 ) )
702 {
703 // Create new Vfetch control flow instruction for this new clause
704 pAsm->cf_current_vtx_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
705
706 if (pAsm->cf_current_vtx_clause_ptr != NULL)
707 {
708 Init_R700ControlFlowGenericClause(pAsm->cf_current_vtx_clause_ptr);
709 AddCFInstruction( pAsm->pR700Shader,
710 (R700ControlFlowInstruction *)pAsm->cf_current_vtx_clause_ptr );
711 }
712 else
713 {
714 radeon_error("Could not allocate a new VFetch CF instruction.\n");
715 return GL_FALSE;
716 }
717
718 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.pop_count = 0x0;
719 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_const = 0x0;
720 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
721 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count = 0x0;
722 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.end_of_program = 0x0;
723 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
724 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_VTX;
725 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
726 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.barrier = 0x1;
727
728 LinkVertexInstruction(pAsm->cf_current_vtx_clause_ptr, vertex_instruction_ptr );
729 }
730 else
731 {
732 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count++;
733 }
734
735 AddVTXInstruction(pAsm->pR700Shader, vertex_instruction_ptr);
736
737 return GL_TRUE;
738 }
739
740 GLboolean add_tex_instruction(r700_AssemblerBase* pAsm,
741 R700TextureInstruction* tex_instruction_ptr)
742 {
743 if ( GL_FALSE == check_current_clause(pAsm, CF_TEX_CLAUSE) )
744 {
745 return GL_FALSE;
746 }
747
748 if ( pAsm->cf_current_tex_clause_ptr == NULL ||
749 ( (pAsm->cf_current_tex_clause_ptr != NULL) &&
750 (pAsm->cf_current_tex_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_tex_clause_ptr->m_ShaderInstType)-1)
751 ) )
752 {
753 // new tex cf instruction for this new clause
754 pAsm->cf_current_tex_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
755
756 if (pAsm->cf_current_tex_clause_ptr != NULL)
757 {
758 Init_R700ControlFlowGenericClause(pAsm->cf_current_tex_clause_ptr);
759 AddCFInstruction( pAsm->pR700Shader,
760 (R700ControlFlowInstruction *)pAsm->cf_current_tex_clause_ptr );
761 }
762 else
763 {
764 radeon_error("Could not allocate a new TEX CF instruction.\n");
765 return GL_FALSE;
766 }
767
768 pAsm->cf_current_tex_clause_ptr->m_Word1.f.pop_count = 0x0;
769 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_const = 0x0;
770 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
771
772 pAsm->cf_current_tex_clause_ptr->m_Word1.f.end_of_program = 0x0;
773 pAsm->cf_current_tex_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
774 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_TEX;
775 pAsm->cf_current_tex_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
776 pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x0; //0x1;
777 }
778 else
779 {
780 pAsm->cf_current_tex_clause_ptr->m_Word1.f.count++;
781 }
782
783 // If this clause constains any TEX instruction that is dependent on a previous instruction,
784 // set the barrier bit
785 if( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) || pAsm->need_tex_barrier == GL_TRUE )
786 {
787 pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x1;
788 }
789
790 if(NULL == pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction)
791 {
792 pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction = tex_instruction_ptr;
793 tex_instruction_ptr->m_pLinkedGenericClause = pAsm->cf_current_tex_clause_ptr;
794 }
795
796 AddTEXInstruction(pAsm->pR700Shader, tex_instruction_ptr);
797
798 return GL_TRUE;
799 }
800
801 GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
802 GLuint gl_client_id,
803 GLuint destination_register,
804 GLuint number_of_elements,
805 GLenum dataElementType,
806 VTX_FETCH_METHOD* pFetchMethod)
807 {
808 GLuint client_size_inbyte;
809 GLuint data_format;
810 GLuint mega_fetch_count;
811 GLuint is_mega_fetch_flag;
812
813 R700VertexGenericFetch* vfetch_instruction_ptr;
814 R700VertexGenericFetch* assembled_vfetch_instruction_ptr = pAsm->vfetch_instruction_ptr_array[ gl_client_id ];
815
816 if (assembled_vfetch_instruction_ptr == NULL)
817 {
818 vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
819 if (vfetch_instruction_ptr == NULL)
820 {
821 return GL_FALSE;
822 }
823 Init_R700VertexGenericFetch(vfetch_instruction_ptr);
824 }
825 else
826 {
827 vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
828 }
829
830 data_format = GetSurfaceFormat(dataElementType, number_of_elements, &client_size_inbyte);
831
832 if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
833 {
834 //TODO : mini fetch
835 mega_fetch_count = 0;
836 is_mega_fetch_flag = 0;
837 }
838 else
839 {
840 mega_fetch_count = MEGA_FETCH_BYTES - 1;
841 is_mega_fetch_flag = 0x1;
842 pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
843 }
844
845 vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH;
846 vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA;
847 vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
848
849 vfetch_instruction_ptr->m_Word0.f.buffer_id = gl_client_id;
850 vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0;
851 vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
852 vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X;
853 vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count;
854
855 vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (number_of_elements < 1) ? SQ_SEL_0 : SQ_SEL_X;
856 vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (number_of_elements < 2) ? SQ_SEL_0 : SQ_SEL_Y;
857 vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (number_of_elements < 3) ? SQ_SEL_0 : SQ_SEL_Z;
858 vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (number_of_elements < 4) ? SQ_SEL_1 : SQ_SEL_W;
859
860 vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1;
861
862 // Destination register
863 vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register;
864 vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE;
865
866 vfetch_instruction_ptr->m_Word2.f.offset = 0;
867 vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0;
868
869 vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag;
870
871 if (assembled_vfetch_instruction_ptr == NULL)
872 {
873 if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) )
874 {
875 return GL_FALSE;
876 }
877
878 if (pAsm->vfetch_instruction_ptr_array[ gl_client_id ] != NULL)
879 {
880 return GL_FALSE;
881 }
882 else
883 {
884 pAsm->vfetch_instruction_ptr_array[ gl_client_id ] = vfetch_instruction_ptr;
885 }
886 }
887
888 return GL_TRUE;
889 }
890
891 GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm,
892 GLuint destination_register,
893 GLenum type,
894 GLint size,
895 GLubyte element,
896 GLuint _signed,
897 GLboolean normalize,
898 GLenum format,
899 VTX_FETCH_METHOD * pFetchMethod)
900 {
901 GLuint client_size_inbyte;
902 GLuint data_format;
903 GLuint mega_fetch_count;
904 GLuint is_mega_fetch_flag;
905
906 R700VertexGenericFetch* vfetch_instruction_ptr;
907 R700VertexGenericFetch* assembled_vfetch_instruction_ptr
908 = pAsm->vfetch_instruction_ptr_array[element];
909
910 if (assembled_vfetch_instruction_ptr == NULL)
911 {
912 vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
913 if (vfetch_instruction_ptr == NULL)
914 {
915 return GL_FALSE;
916 }
917 Init_R700VertexGenericFetch(vfetch_instruction_ptr);
918 }
919 else
920 {
921 vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
922 }
923
924 data_format = GetSurfaceFormat(type, size, &client_size_inbyte);
925
926 if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
927 {
928 //TODO : mini fetch
929 mega_fetch_count = 0;
930 is_mega_fetch_flag = 0;
931 }
932 else
933 {
934 mega_fetch_count = MEGA_FETCH_BYTES - 1;
935 is_mega_fetch_flag = 0x1;
936 pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
937 }
938
939 vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH;
940 vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA;
941 vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
942
943 vfetch_instruction_ptr->m_Word0.f.buffer_id = element;
944 vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0;
945 vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
946 vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X;
947 vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count;
948
949 if(format == GL_BGRA)
950 {
951 vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_Z;
952 vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
953 vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_X;
954 vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
955 }
956 else
957 {
958 vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_X;
959 vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
960 vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z;
961 vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
962
963 }
964
965 vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1;
966 vfetch_instruction_ptr->m_Word1.f.data_format = data_format;
967 vfetch_instruction_ptr->m_Word2.f.endian_swap = SQ_ENDIAN_NONE;
968
969 if(1 == _signed)
970 {
971 vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_SIGNED;
972 }
973 else
974 {
975 vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_UNSIGNED;
976 }
977
978 if(GL_TRUE == normalize)
979 {
980 vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_NORM;
981 }
982 else
983 {
984 vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_INT;
985 }
986
987 // Destination register
988 vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register;
989 vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE;
990
991 vfetch_instruction_ptr->m_Word2.f.offset = 0;
992 vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0;
993
994 vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag;
995
996 if (assembled_vfetch_instruction_ptr == NULL)
997 {
998 if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) )
999 {
1000 return GL_FALSE;
1001 }
1002
1003 if (pAsm->vfetch_instruction_ptr_array[element] != NULL)
1004 {
1005 return GL_FALSE;
1006 }
1007 else
1008 {
1009 pAsm->vfetch_instruction_ptr_array[element] = vfetch_instruction_ptr;
1010 }
1011 }
1012
1013 return GL_TRUE;
1014 }
1015
1016 GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm)
1017 {
1018 GLint i;
1019 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
1020 pAsm->cf_current_vtx_clause_ptr = NULL;
1021
1022 for (i=0; i<VERT_ATTRIB_MAX; i++)
1023 {
1024 pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
1025 }
1026
1027 cleanup_vfetch_shaderinst(pAsm->pR700Shader);
1028
1029 return GL_TRUE;
1030 }
1031
1032 GLuint gethelpr(r700_AssemblerBase* pAsm)
1033 {
1034 GLuint r = pAsm->uHelpReg;
1035 pAsm->uHelpReg++;
1036 if (pAsm->uHelpReg > pAsm->number_used_registers)
1037 {
1038 pAsm->number_used_registers = pAsm->uHelpReg;
1039 }
1040 return r;
1041 }
1042 void resethelpr(r700_AssemblerBase* pAsm)
1043 {
1044 pAsm->uHelpReg = pAsm->uFirstHelpReg;
1045 }
1046
1047 void checkop_init(r700_AssemblerBase* pAsm)
1048 {
1049 resethelpr(pAsm);
1050 pAsm->aArgSubst[0] =
1051 pAsm->aArgSubst[1] =
1052 pAsm->aArgSubst[2] =
1053 pAsm->aArgSubst[3] = -1;
1054 }
1055
1056 static GLboolean next_ins(r700_AssemblerBase *pAsm)
1057 {
1058 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1059
1060 if (GL_TRUE == pAsm->is_tex)
1061 {
1062 if (pILInst->TexSrcTarget == TEXTURE_RECT_INDEX)
1063 {
1064 if (GL_FALSE == assemble_tex_instruction(pAsm, GL_FALSE))
1065 {
1066 radeon_error("Error assembling TEX instruction\n");
1067 return GL_FALSE;
1068 }
1069 }
1070 else
1071 {
1072 if (GL_FALSE == assemble_tex_instruction(pAsm, GL_TRUE))
1073 {
1074 radeon_error("Error assembling TEX instruction\n");
1075 return GL_FALSE;
1076 }
1077 }
1078 }
1079 else
1080 { //ALU
1081 if (GL_FALSE == assemble_alu_instruction(pAsm))
1082 {
1083 radeon_error("Error assembling ALU instruction\n");
1084 return GL_FALSE;
1085 }
1086 }
1087
1088 if (pAsm->D.dst.rtype == DST_REG_OUT)
1089 {
1090 assert(pAsm->D.dst.reg >= pAsm->starting_export_register_number);
1091
1092 if (pAsm->D.dst.op3)
1093 {
1094 // There is no mask for OP3 instructions, so all channels are written
1095 pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] = 0xF;
1096 }
1097 else
1098 {
1099 pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number]
1100 |= (unsigned char)pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask;
1101 }
1102 }
1103
1104 //reset for next inst.
1105 pAsm->D.bits = 0;
1106 pAsm->D2.bits = 0;
1107 pAsm->S[0].bits = 0;
1108 pAsm->S[1].bits = 0;
1109 pAsm->S[2].bits = 0;
1110 pAsm->is_tex = GL_FALSE;
1111 pAsm->need_tex_barrier = GL_FALSE;
1112 pAsm->D2.bits = 0;
1113 pAsm->C[0].bits = pAsm->C[1].bits = pAsm->C[2].bits = pAsm->C[3].bits = 0;
1114 return GL_TRUE;
1115 }
1116
1117 GLboolean mov_temp(r700_AssemblerBase* pAsm, int src)
1118 {
1119 GLuint tmp = gethelpr(pAsm);
1120
1121 //mov src to temp helper gpr.
1122 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
1123
1124 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1125
1126 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1127 pAsm->D.dst.reg = tmp;
1128
1129 nomask_PVSDST(&(pAsm->D.dst));
1130
1131 if( GL_FALSE == assemble_src(pAsm, src, 0) )
1132 {
1133 return GL_FALSE;
1134 }
1135
1136 noswizzle_PVSSRC(&(pAsm->S[0].src));
1137 noneg_PVSSRC(&(pAsm->S[0].src));
1138
1139 if( GL_FALSE == next_ins(pAsm) )
1140 {
1141 return GL_FALSE;
1142 }
1143
1144 pAsm->aArgSubst[1 + src] = tmp;
1145
1146 return GL_TRUE;
1147 }
1148
1149 GLboolean checkop1(r700_AssemblerBase* pAsm)
1150 {
1151 checkop_init(pAsm);
1152 return GL_TRUE;
1153 }
1154
1155 GLboolean checkop2(r700_AssemblerBase* pAsm)
1156 {
1157 GLboolean bSrcConst[2];
1158 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1159
1160 checkop_init(pAsm);
1161
1162 if( (pILInst->SrcReg[0].File == PROGRAM_UNIFORM) ||
1163 (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
1164 (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
1165 (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) ||
1166 (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
1167 {
1168 bSrcConst[0] = GL_TRUE;
1169 }
1170 else
1171 {
1172 bSrcConst[0] = GL_FALSE;
1173 }
1174 if( (pILInst->SrcReg[1].File == PROGRAM_UNIFORM) ||
1175 (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
1176 (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
1177 (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) ||
1178 (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
1179 {
1180 bSrcConst[1] = GL_TRUE;
1181 }
1182 else
1183 {
1184 bSrcConst[1] = GL_FALSE;
1185 }
1186
1187 if( (bSrcConst[0] == GL_TRUE) && (bSrcConst[1] == GL_TRUE) )
1188 {
1189 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)
1190 {
1191 if( GL_FALSE == mov_temp(pAsm, 1) )
1192 {
1193 return GL_FALSE;
1194 }
1195 }
1196 }
1197
1198 return GL_TRUE;
1199 }
1200
1201 GLboolean checkop3(r700_AssemblerBase* pAsm)
1202 {
1203 GLboolean bSrcConst[3];
1204 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1205
1206 checkop_init(pAsm);
1207
1208 if( (pILInst->SrcReg[0].File == PROGRAM_UNIFORM) ||
1209 (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
1210 (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
1211 (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) ||
1212 (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
1213 {
1214 bSrcConst[0] = GL_TRUE;
1215 }
1216 else
1217 {
1218 bSrcConst[0] = GL_FALSE;
1219 }
1220 if( (pILInst->SrcReg[1].File == PROGRAM_UNIFORM) ||
1221 (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
1222 (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
1223 (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) ||
1224 (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
1225 {
1226 bSrcConst[1] = GL_TRUE;
1227 }
1228 else
1229 {
1230 bSrcConst[1] = GL_FALSE;
1231 }
1232 if( (pILInst->SrcReg[2].File == PROGRAM_UNIFORM) ||
1233 (pILInst->SrcReg[2].File == PROGRAM_CONSTANT) ||
1234 (pILInst->SrcReg[2].File == PROGRAM_LOCAL_PARAM) ||
1235 (pILInst->SrcReg[2].File == PROGRAM_ENV_PARAM) ||
1236 (pILInst->SrcReg[2].File == PROGRAM_STATE_VAR) )
1237 {
1238 bSrcConst[2] = GL_TRUE;
1239 }
1240 else
1241 {
1242 bSrcConst[2] = GL_FALSE;
1243 }
1244
1245 if( (GL_TRUE == bSrcConst[0]) &&
1246 (GL_TRUE == bSrcConst[1]) &&
1247 (GL_TRUE == bSrcConst[2]) )
1248 {
1249 if( GL_FALSE == mov_temp(pAsm, 1) )
1250 {
1251 return GL_FALSE;
1252 }
1253 if( GL_FALSE == mov_temp(pAsm, 2) )
1254 {
1255 return GL_FALSE;
1256 }
1257
1258 return GL_TRUE;
1259 }
1260 else if( (GL_TRUE == bSrcConst[0]) &&
1261 (GL_TRUE == bSrcConst[1]) )
1262 {
1263 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)
1264 {
1265 if( GL_FALSE == mov_temp(pAsm, 1) )
1266 {
1267 return GL_FALSE;
1268 }
1269 }
1270
1271 return GL_TRUE;
1272 }
1273 else if ( (GL_TRUE == bSrcConst[0]) &&
1274 (GL_TRUE == bSrcConst[2]) )
1275 {
1276 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[2].Index)
1277 {
1278 if( GL_FALSE == mov_temp(pAsm, 2) )
1279 {
1280 return GL_FALSE;
1281 }
1282 }
1283
1284 return GL_TRUE;
1285 }
1286 else if( (GL_TRUE == bSrcConst[1]) &&
1287 (GL_TRUE == bSrcConst[2]) )
1288 {
1289 if(pILInst->SrcReg[1].Index != pILInst->SrcReg[2].Index)
1290 {
1291 if( GL_FALSE == mov_temp(pAsm, 2) )
1292 {
1293 return GL_FALSE;
1294 }
1295 }
1296
1297 return GL_TRUE;
1298 }
1299
1300 return GL_TRUE;
1301 }
1302
1303 GLboolean assemble_src(r700_AssemblerBase *pAsm,
1304 int src,
1305 int fld)
1306 {
1307 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1308
1309 if (fld == -1)
1310 {
1311 fld = src;
1312 }
1313
1314 if(pAsm->aArgSubst[1+src] >= 0)
1315 {
1316 assert(fld >= 0);
1317 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1318 pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
1319 pAsm->S[fld].src.reg = pAsm->aArgSubst[1+src];
1320 }
1321 else
1322 {
1323 switch (pILInst->SrcReg[src].File)
1324 {
1325 case PROGRAM_TEMPORARY:
1326 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1327 pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
1328 pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index + pAsm->starting_temp_register_number;
1329 break;
1330 case PROGRAM_CONSTANT:
1331 case PROGRAM_LOCAL_PARAM:
1332 case PROGRAM_ENV_PARAM:
1333 case PROGRAM_STATE_VAR:
1334 case PROGRAM_UNIFORM:
1335 if (1 == pILInst->SrcReg[src].RelAddr)
1336 {
1337 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_RELATIVE_A0);
1338 }
1339 else
1340 {
1341 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1342 }
1343
1344 pAsm->S[fld].src.rtype = SRC_REG_CONSTANT;
1345 if(pILInst->SrcReg[src].Index < 0)
1346 {
1347 WARN_ONCE("Negative register offsets not supported yet!\n");
1348 pAsm->S[fld].src.reg = 0;
1349 }
1350 else
1351 {
1352 pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index;
1353 }
1354 break;
1355 case PROGRAM_INPUT:
1356 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1357 pAsm->S[fld].src.rtype = SRC_REG_INPUT;
1358 switch (pAsm->currentShaderType)
1359 {
1360 case SPT_FP:
1361 pAsm->S[fld].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[src].Index];
1362 break;
1363 case SPT_VP:
1364 pAsm->S[fld].src.reg = pAsm->ucVP_AttributeMap[pILInst->SrcReg[src].Index];
1365 break;
1366 }
1367 break;
1368 default:
1369 radeon_error("Invalid source argument type : %d \n", pILInst->SrcReg[src].File);
1370 return GL_FALSE;
1371 }
1372 }
1373
1374 pAsm->S[fld].src.swizzlex = pILInst->SrcReg[src].Swizzle & 0x7;
1375 pAsm->S[fld].src.swizzley = (pILInst->SrcReg[src].Swizzle >> 3) & 0x7;
1376 pAsm->S[fld].src.swizzlez = (pILInst->SrcReg[src].Swizzle >> 6) & 0x7;
1377 pAsm->S[fld].src.swizzlew = (pILInst->SrcReg[src].Swizzle >> 9) & 0x7;
1378
1379 pAsm->S[fld].src.negx = pILInst->SrcReg[src].Negate & 0x1;
1380 pAsm->S[fld].src.negy = (pILInst->SrcReg[src].Negate >> 1) & 0x1;
1381 pAsm->S[fld].src.negz = (pILInst->SrcReg[src].Negate >> 2) & 0x1;
1382 pAsm->S[fld].src.negw = (pILInst->SrcReg[src].Negate >> 3) & 0x1;
1383
1384 return GL_TRUE;
1385 }
1386
1387 GLboolean assemble_dst(r700_AssemblerBase *pAsm)
1388 {
1389 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1390 switch (pILInst->DstReg.File)
1391 {
1392 case PROGRAM_TEMPORARY:
1393 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1394 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1395 pAsm->D.dst.reg = pILInst->DstReg.Index + pAsm->starting_temp_register_number;
1396 break;
1397 case PROGRAM_ADDRESS:
1398 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1399 pAsm->D.dst.rtype = DST_REG_A0;
1400 pAsm->D.dst.reg = 0;
1401 break;
1402 case PROGRAM_OUTPUT:
1403 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1404 pAsm->D.dst.rtype = DST_REG_OUT;
1405 switch (pAsm->currentShaderType)
1406 {
1407 case SPT_FP:
1408 pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
1409 break;
1410 case SPT_VP:
1411 pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
1412 break;
1413 }
1414 break;
1415 default:
1416 radeon_error("Invalid destination output argument type\n");
1417 return GL_FALSE;
1418 }
1419
1420 pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
1421 pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
1422 pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
1423 pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
1424
1425 if(pILInst->SaturateMode == SATURATE_ZERO_ONE)
1426 {
1427 pAsm->D2.dst2.SaturateMode = 1;
1428 }
1429 else
1430 {
1431 pAsm->D2.dst2.SaturateMode = 0;
1432 }
1433
1434 return GL_TRUE;
1435 }
1436
1437 GLboolean tex_dst(r700_AssemblerBase *pAsm)
1438 {
1439 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1440
1441 if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
1442 {
1443 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1444 pAsm->D.dst.reg = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number;
1445
1446 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1447 }
1448 else if(PROGRAM_OUTPUT == pILInst->DstReg.File)
1449 {
1450 pAsm->D.dst.rtype = DST_REG_OUT;
1451 switch (pAsm->currentShaderType)
1452 {
1453 case SPT_FP:
1454 pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
1455 break;
1456 case SPT_VP:
1457 pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
1458 break;
1459 }
1460
1461 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1462 }
1463 else
1464 {
1465 radeon_error("Invalid destination output argument type\n");
1466 return GL_FALSE;
1467 }
1468
1469 pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
1470 pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
1471 pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
1472 pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
1473
1474 return GL_TRUE;
1475 }
1476
1477 GLboolean tex_src(r700_AssemblerBase *pAsm)
1478 {
1479 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1480
1481 GLboolean bValidTexCoord = GL_FALSE;
1482
1483 if(pAsm->aArgSubst[1] >= 0)
1484 {
1485 bValidTexCoord = GL_TRUE;
1486 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
1487 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
1488 pAsm->S[0].src.reg = pAsm->aArgSubst[1];
1489 }
1490 else
1491 {
1492 switch (pILInst->SrcReg[0].File) {
1493 case PROGRAM_UNIFORM:
1494 case PROGRAM_CONSTANT:
1495 case PROGRAM_LOCAL_PARAM:
1496 case PROGRAM_ENV_PARAM:
1497 case PROGRAM_STATE_VAR:
1498 break;
1499 case PROGRAM_TEMPORARY:
1500 bValidTexCoord = GL_TRUE;
1501 pAsm->S[0].src.reg = pILInst->SrcReg[0].Index +
1502 pAsm->starting_temp_register_number;
1503 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
1504 break;
1505 case PROGRAM_INPUT:
1506 if(SPT_VP == pAsm->currentShaderType)
1507 {
1508 switch (pILInst->SrcReg[0].Index)
1509 {
1510 case VERT_ATTRIB_TEX0:
1511 case VERT_ATTRIB_TEX1:
1512 case VERT_ATTRIB_TEX2:
1513 case VERT_ATTRIB_TEX3:
1514 case VERT_ATTRIB_TEX4:
1515 case VERT_ATTRIB_TEX5:
1516 case VERT_ATTRIB_TEX6:
1517 case VERT_ATTRIB_TEX7:
1518 bValidTexCoord = GL_TRUE;
1519 pAsm->S[0].src.reg =
1520 pAsm->ucVP_AttributeMap[pILInst->SrcReg[0].Index];
1521 pAsm->S[0].src.rtype = SRC_REG_INPUT;
1522 break;
1523 }
1524 }
1525 else
1526 {
1527 switch (pILInst->SrcReg[0].Index)
1528 {
1529 case FRAG_ATTRIB_WPOS:
1530 case FRAG_ATTRIB_COL0:
1531 case FRAG_ATTRIB_COL1:
1532 case FRAG_ATTRIB_FOGC:
1533 case FRAG_ATTRIB_TEX0:
1534 case FRAG_ATTRIB_TEX1:
1535 case FRAG_ATTRIB_TEX2:
1536 case FRAG_ATTRIB_TEX3:
1537 case FRAG_ATTRIB_TEX4:
1538 case FRAG_ATTRIB_TEX5:
1539 case FRAG_ATTRIB_TEX6:
1540 case FRAG_ATTRIB_TEX7:
1541 bValidTexCoord = GL_TRUE;
1542 pAsm->S[0].src.reg =
1543 pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
1544 pAsm->S[0].src.rtype = SRC_REG_INPUT;
1545 break;
1546 case FRAG_ATTRIB_FACE:
1547 fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n");
1548 break;
1549 case FRAG_ATTRIB_PNTC:
1550 fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n");
1551 break;
1552 }
1553
1554 if( (pILInst->SrcReg[0].Index >= FRAG_ATTRIB_VAR0) ||
1555 (pILInst->SrcReg[0].Index < FRAG_ATTRIB_MAX) )
1556 {
1557 bValidTexCoord = GL_TRUE;
1558 pAsm->S[0].src.reg =
1559 pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
1560 pAsm->S[0].src.rtype = SRC_REG_INPUT;
1561 }
1562 }
1563
1564 break;
1565 }
1566 }
1567
1568 if(GL_TRUE == bValidTexCoord)
1569 {
1570 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
1571 }
1572 else
1573 {
1574 radeon_error("Invalid source texcoord for TEX instruction\n");
1575 return GL_FALSE;
1576 }
1577
1578 pAsm->S[0].src.swizzlex = pILInst->SrcReg[0].Swizzle & 0x7;
1579 pAsm->S[0].src.swizzley = (pILInst->SrcReg[0].Swizzle >> 3) & 0x7;
1580 pAsm->S[0].src.swizzlez = (pILInst->SrcReg[0].Swizzle >> 6) & 0x7;
1581 pAsm->S[0].src.swizzlew = (pILInst->SrcReg[0].Swizzle >> 9) & 0x7;
1582
1583 pAsm->S[0].src.negx = pILInst->SrcReg[0].Negate & 0x1;
1584 pAsm->S[0].src.negy = (pILInst->SrcReg[0].Negate >> 1) & 0x1;
1585 pAsm->S[0].src.negz = (pILInst->SrcReg[0].Negate >> 2) & 0x1;
1586 pAsm->S[0].src.negw = (pILInst->SrcReg[0].Negate >> 3) & 0x1;
1587
1588 return GL_TRUE;
1589 }
1590
1591 GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalized)
1592 {
1593 PVSSRC * texture_coordinate_source;
1594 PVSSRC * texture_unit_source;
1595
1596 R700TextureInstruction* tex_instruction_ptr = (R700TextureInstruction*) CALLOC_STRUCT(R700TextureInstruction);
1597 if (tex_instruction_ptr == NULL)
1598 {
1599 return GL_FALSE;
1600 }
1601 Init_R700TextureInstruction(tex_instruction_ptr);
1602
1603 texture_coordinate_source = &(pAsm->S[0].src);
1604 texture_unit_source = &(pAsm->S[1].src);
1605
1606 tex_instruction_ptr->m_Word0.f.tex_inst = pAsm->D.dst.opcode;
1607 tex_instruction_ptr->m_Word0.f.bc_frac_mode = 0x0;
1608 tex_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
1609 tex_instruction_ptr->m_Word0.f.alt_const = 0;
1610
1611 if(SPT_VP == pAsm->currentShaderType)
1612 {
1613 tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg + VERT_ATTRIB_MAX;
1614 pAsm->unVetTexBits |= 1 << texture_unit_source->reg;
1615 }
1616 else
1617 {
1618 tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg;
1619 }
1620
1621 tex_instruction_ptr->m_Word1.f.lod_bias = 0x0;
1622 if (normalized) {
1623 tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_NORMALIZED;
1624 tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_NORMALIZED;
1625 tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_NORMALIZED;
1626 tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_NORMALIZED;
1627 } else {
1628 /* XXX: UNNORMALIZED tex coords have limited wrap modes */
1629 tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_UNNORMALIZED;
1630 tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_UNNORMALIZED;
1631 tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_UNNORMALIZED;
1632 tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_UNNORMALIZED;
1633 }
1634
1635 tex_instruction_ptr->m_Word2.f.offset_x = 0x0;
1636 tex_instruction_ptr->m_Word2.f.offset_y = 0x0;
1637 tex_instruction_ptr->m_Word2.f.offset_z = 0x0;
1638 tex_instruction_ptr->m_Word2.f.sampler_id = texture_unit_source->reg;
1639
1640 // dst
1641 if ( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
1642 (pAsm->D.dst.rtype == DST_REG_OUT) )
1643 {
1644 tex_instruction_ptr->m_Word0.f.src_gpr = texture_coordinate_source->reg;
1645 tex_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
1646
1647 tex_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
1648 tex_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE;
1649
1650 tex_instruction_ptr->m_Word1.f.dst_sel_x = (pAsm->D.dst.writex ? texture_unit_source->swizzlex : SQ_SEL_MASK);
1651 tex_instruction_ptr->m_Word1.f.dst_sel_y = (pAsm->D.dst.writey ? texture_unit_source->swizzley : SQ_SEL_MASK);
1652 tex_instruction_ptr->m_Word1.f.dst_sel_z = (pAsm->D.dst.writez ? texture_unit_source->swizzlez : SQ_SEL_MASK);
1653 tex_instruction_ptr->m_Word1.f.dst_sel_w = (pAsm->D.dst.writew ? texture_unit_source->swizzlew : SQ_SEL_MASK);
1654
1655
1656 tex_instruction_ptr->m_Word2.f.src_sel_x = texture_coordinate_source->swizzlex;
1657 tex_instruction_ptr->m_Word2.f.src_sel_y = texture_coordinate_source->swizzley;
1658 tex_instruction_ptr->m_Word2.f.src_sel_z = texture_coordinate_source->swizzlez;
1659 tex_instruction_ptr->m_Word2.f.src_sel_w = texture_coordinate_source->swizzlew;
1660 }
1661 else
1662 {
1663 radeon_error("Only temp destination registers supported for TEX dest regs.\n");
1664 return GL_FALSE;
1665 }
1666
1667 if( GL_FALSE == add_tex_instruction(pAsm, tex_instruction_ptr) )
1668 {
1669 return GL_FALSE;
1670 }
1671
1672 return GL_TRUE;
1673 }
1674
1675 void initialize(r700_AssemblerBase *pAsm)
1676 {
1677 GLuint cycle, component;
1678
1679 for (cycle=0; cycle<NUMBER_OF_CYCLES; cycle++)
1680 {
1681 for (component=0; component<NUMBER_OF_COMPONENTS; component++)
1682 {
1683 pAsm->hw_gpr[cycle][component] = (-1);
1684 }
1685 }
1686 for (component=0; component<NUMBER_OF_COMPONENTS; component++)
1687 {
1688 pAsm->hw_cfile_addr[component] = (-1);
1689 pAsm->hw_cfile_chan[component] = (-1);
1690 }
1691 }
1692
1693 GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr,
1694 int source_index,
1695 PVSSRC* pSource,
1696 BITS scalar_channel_index)
1697 {
1698 BITS src_sel;
1699 BITS src_rel;
1700 BITS src_chan;
1701 BITS src_neg;
1702
1703 //--------------------------------------------------------------------------
1704 // Source for operands src0, src1.
1705 // Values [0,127] correspond to GPR[0..127].
1706 // Values [256,511] correspond to cfile constants c[0..255].
1707
1708 //--------------------------------------------------------------------------
1709 // Other special values are shown in the list below.
1710
1711 // 248 SQ_ALU_SRC_0: special constant 0.0.
1712 // 249 SQ_ALU_SRC_1: special constant 1.0 float.
1713
1714 // 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
1715 // 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
1716
1717 // 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
1718 // 253 SQ_ALU_SRC_LITERAL: literal constant.
1719
1720 // 254 SQ_ALU_SRC_PV: previous vector result.
1721 // 255 SQ_ALU_SRC_PS: previous scalar result.
1722 //--------------------------------------------------------------------------
1723
1724 BITS channel_swizzle;
1725 switch (scalar_channel_index)
1726 {
1727 case 0: channel_swizzle = pSource->swizzlex; break;
1728 case 1: channel_swizzle = pSource->swizzley; break;
1729 case 2: channel_swizzle = pSource->swizzlez; break;
1730 case 3: channel_swizzle = pSource->swizzlew; break;
1731 default: channel_swizzle = SQ_SEL_MASK; break;
1732 }
1733
1734 if(channel_swizzle == SQ_SEL_0)
1735 {
1736 src_sel = SQ_ALU_SRC_0;
1737 }
1738 else if (channel_swizzle == SQ_SEL_1)
1739 {
1740 src_sel = SQ_ALU_SRC_1;
1741 }
1742 else
1743 {
1744 if ( (pSource->rtype == SRC_REG_TEMPORARY) ||
1745 (pSource->rtype == SRC_REG_INPUT)
1746 )
1747 {
1748 src_sel = pSource->reg;
1749 }
1750 else if (pSource->rtype == SRC_REG_CONSTANT)
1751 {
1752 src_sel = pSource->reg + CFILE_REGISTER_OFFSET;
1753 }
1754 else if (pSource->rtype == SRC_REC_LITERAL)
1755 {
1756 src_sel = SQ_ALU_SRC_LITERAL;
1757 }
1758 else
1759 {
1760 radeon_error("Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.\n",
1761 source_index, pSource->rtype);
1762 return GL_FALSE;
1763 }
1764 }
1765
1766 if( ADDR_ABSOLUTE == addrmode_PVSSRC(pSource) )
1767 {
1768 src_rel = SQ_ABSOLUTE;
1769 }
1770 else
1771 {
1772 src_rel = SQ_RELATIVE;
1773 }
1774
1775 switch (channel_swizzle)
1776 {
1777 case SQ_SEL_X:
1778 src_chan = SQ_CHAN_X;
1779 break;
1780 case SQ_SEL_Y:
1781 src_chan = SQ_CHAN_Y;
1782 break;
1783 case SQ_SEL_Z:
1784 src_chan = SQ_CHAN_Z;
1785 break;
1786 case SQ_SEL_W:
1787 src_chan = SQ_CHAN_W;
1788 break;
1789 case SQ_SEL_0:
1790 case SQ_SEL_1:
1791 // Does not matter since src_sel controls
1792 src_chan = SQ_CHAN_X;
1793 break;
1794 default:
1795 radeon_error("Unknown source select value (%d) in assemble_alu_src().\n", channel_swizzle);
1796 return GL_FALSE;
1797 break;
1798 }
1799
1800 switch (scalar_channel_index)
1801 {
1802 case 0: src_neg = pSource->negx; break;
1803 case 1: src_neg = pSource->negy; break;
1804 case 2: src_neg = pSource->negz; break;
1805 case 3: src_neg = pSource->negw; break;
1806 default: src_neg = 0; break;
1807 }
1808
1809 switch (source_index)
1810 {
1811 case 0:
1812 assert(alu_instruction_ptr);
1813 alu_instruction_ptr->m_Word0.f.src0_sel = src_sel;
1814 alu_instruction_ptr->m_Word0.f.src0_rel = src_rel;
1815 alu_instruction_ptr->m_Word0.f.src0_chan = src_chan;
1816 alu_instruction_ptr->m_Word0.f.src0_neg = src_neg;
1817 break;
1818 case 1:
1819 assert(alu_instruction_ptr);
1820 alu_instruction_ptr->m_Word0.f.src1_sel = src_sel;
1821 alu_instruction_ptr->m_Word0.f.src1_rel = src_rel;
1822 alu_instruction_ptr->m_Word0.f.src1_chan = src_chan;
1823 alu_instruction_ptr->m_Word0.f.src1_neg = src_neg;
1824 break;
1825 case 2:
1826 assert(alu_instruction_ptr);
1827 alu_instruction_ptr->m_Word1_OP3.f.src2_sel = src_sel;
1828 alu_instruction_ptr->m_Word1_OP3.f.src2_rel = src_rel;
1829 alu_instruction_ptr->m_Word1_OP3.f.src2_chan = src_chan;
1830 alu_instruction_ptr->m_Word1_OP3.f.src2_neg = src_neg;
1831 break;
1832 default:
1833 radeon_error("Only three sources allowed in ALU opcodes.\n");
1834 return GL_FALSE;
1835 break;
1836 }
1837
1838 return GL_TRUE;
1839 }
1840
1841 GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
1842 R700ALUInstruction* alu_instruction_ptr,
1843 GLuint contiguous_slots_needed)
1844 {
1845 if( GL_FALSE == check_current_clause(pAsm, CF_ALU_CLAUSE) )
1846 {
1847 return GL_FALSE;
1848 }
1849
1850 if ( pAsm->alu_x_opcode != 0 ||
1851 pAsm->cf_current_alu_clause_ptr == NULL ||
1852 ( (pAsm->cf_current_alu_clause_ptr != NULL) &&
1853 (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-contiguous_slots_needed-1) )
1854 ) )
1855 {
1856
1857 //new cf inst for this clause
1858 pAsm->cf_current_alu_clause_ptr = (R700ControlFlowALUClause*) CALLOC_STRUCT(R700ControlFlowALUClause);
1859
1860 // link the new cf to cf segment
1861 if(NULL != pAsm->cf_current_alu_clause_ptr)
1862 {
1863 Init_R700ControlFlowALUClause(pAsm->cf_current_alu_clause_ptr);
1864 AddCFInstruction( pAsm->pR700Shader,
1865 (R700ControlFlowInstruction *)pAsm->cf_current_alu_clause_ptr );
1866 }
1867 else
1868 {
1869 radeon_error("Could not allocate a new ALU CF instruction.\n");
1870 return GL_FALSE;
1871 }
1872
1873 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank0 = 0x0;
1874 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank1 = 0x0;
1875 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_mode0 = SQ_CF_KCACHE_NOP;
1876
1877 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_mode1 = SQ_CF_KCACHE_NOP;
1878 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr0 = 0x0;
1879 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr1 = 0x0;
1880
1881 pAsm->cf_current_alu_clause_ptr->m_Word1.f.count = 0x0;
1882
1883 if(pAsm->alu_x_opcode != 0)
1884 {
1885 pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = pAsm->alu_x_opcode;
1886 pAsm->alu_x_opcode = 0;
1887 }
1888 else
1889 {
1890 pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ALU;
1891 }
1892
1893 pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
1894
1895 pAsm->cf_current_alu_clause_ptr->m_Word1.f.barrier = 0x1;
1896 }
1897 else
1898 {
1899 pAsm->cf_current_alu_clause_ptr->m_Word1.f.count += (GetInstructionSize(alu_instruction_ptr->m_ShaderInstType) / 2);
1900 }
1901
1902 // If this clause constains any instruction that is forward dependent on a TEX instruction,
1903 // set the whole_quad_mode for this clause
1904 if ( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) )
1905 {
1906 pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x1;
1907 }
1908
1909 if (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-1) )
1910 {
1911 alu_instruction_ptr->m_Word0.f.last = 1;
1912 }
1913
1914 if(NULL == pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction)
1915 {
1916 pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction = alu_instruction_ptr;
1917 alu_instruction_ptr->m_pLinkedALUClause = pAsm->cf_current_alu_clause_ptr;
1918 }
1919
1920 AddALUInstruction(pAsm->pR700Shader, alu_instruction_ptr);
1921
1922 return GL_TRUE;
1923 }
1924
1925 void get_src_properties(R700ALUInstruction* alu_instruction_ptr,
1926 int source_index,
1927 BITS* psrc_sel,
1928 BITS* psrc_rel,
1929 BITS* psrc_chan,
1930 BITS* psrc_neg)
1931 {
1932 switch (source_index)
1933 {
1934 case 0:
1935 *psrc_sel = alu_instruction_ptr->m_Word0.f.src0_sel ;
1936 *psrc_rel = alu_instruction_ptr->m_Word0.f.src0_rel ;
1937 *psrc_chan = alu_instruction_ptr->m_Word0.f.src0_chan;
1938 *psrc_neg = alu_instruction_ptr->m_Word0.f.src0_neg ;
1939 break;
1940
1941 case 1:
1942 *psrc_sel = alu_instruction_ptr->m_Word0.f.src1_sel ;
1943 *psrc_rel = alu_instruction_ptr->m_Word0.f.src1_rel ;
1944 *psrc_chan = alu_instruction_ptr->m_Word0.f.src1_chan;
1945 *psrc_neg = alu_instruction_ptr->m_Word0.f.src1_neg ;
1946 break;
1947
1948 case 2:
1949 *psrc_sel = alu_instruction_ptr->m_Word1_OP3.f.src2_sel;
1950 *psrc_rel = alu_instruction_ptr->m_Word1_OP3.f.src2_rel;
1951 *psrc_chan = alu_instruction_ptr->m_Word1_OP3.f.src2_chan;
1952 *psrc_neg = alu_instruction_ptr->m_Word1_OP3.f.src2_neg;
1953 break;
1954 }
1955 }
1956
1957 int is_cfile(BITS sel)
1958 {
1959 if (sel > 255 && sel < 512)
1960 {
1961 return 1;
1962 }
1963 return 0;
1964 }
1965
1966 int is_const(BITS sel)
1967 {
1968 if (is_cfile(sel))
1969 {
1970 return 1;
1971 }
1972 else if(sel >= SQ_ALU_SRC_0 && sel <= SQ_ALU_SRC_LITERAL)
1973 {
1974 return 1;
1975 }
1976 return 0;
1977 }
1978
1979 int is_gpr(BITS sel)
1980 {
1981 if (sel >= 0 && sel < 128)
1982 {
1983 return 1;
1984 }
1985 return 0;
1986 }
1987
1988 const GLuint BANK_SWIZZLE_VEC[8] = {SQ_ALU_VEC_210, //000
1989 SQ_ALU_VEC_120, //001
1990 SQ_ALU_VEC_102, //010
1991
1992 SQ_ALU_VEC_201, //011
1993 SQ_ALU_VEC_012, //100
1994 SQ_ALU_VEC_021, //101
1995
1996 SQ_ALU_VEC_012, //110
1997 SQ_ALU_VEC_012}; //111
1998
1999 const GLuint BANK_SWIZZLE_SCL[8] = {SQ_ALU_SCL_210, //000
2000 SQ_ALU_SCL_122, //001
2001 SQ_ALU_SCL_122, //010
2002
2003 SQ_ALU_SCL_221, //011
2004 SQ_ALU_SCL_212, //100
2005 SQ_ALU_SCL_122, //101
2006
2007 SQ_ALU_SCL_122, //110
2008 SQ_ALU_SCL_122}; //111
2009
2010 GLboolean reserve_cfile(r700_AssemblerBase* pAsm,
2011 GLuint sel,
2012 GLuint chan)
2013 {
2014 int res_match = (-1);
2015 int res_empty = (-1);
2016
2017 GLint res;
2018
2019 for (res=3; res>=0; res--)
2020 {
2021 if(pAsm->hw_cfile_addr[ res] < 0)
2022 {
2023 res_empty = res;
2024 }
2025 else if( (pAsm->hw_cfile_addr[res] == (int)sel)
2026 &&
2027 (pAsm->hw_cfile_chan[ res ] == (int) chan) )
2028 {
2029 res_match = res;
2030 }
2031 }
2032
2033 if(res_match >= 0)
2034 {
2035 // Read for this scalar component already reserved, nothing to do here.
2036 ;
2037 }
2038 else if(res_empty >= 0)
2039 {
2040 pAsm->hw_cfile_addr[ res_empty ] = sel;
2041 pAsm->hw_cfile_chan[ res_empty ] = chan;
2042 }
2043 else
2044 {
2045 radeon_error("All cfile read ports are used, cannot reference C$sel, channel $chan.\n");
2046 return GL_FALSE;
2047 }
2048 return GL_TRUE;
2049 }
2050
2051 GLboolean reserve_gpr(r700_AssemblerBase* pAsm, GLuint sel, GLuint chan, GLuint cycle)
2052 {
2053 if(pAsm->hw_gpr[cycle][chan] < 0)
2054 {
2055 pAsm->hw_gpr[cycle][chan] = sel;
2056 }
2057 else if(pAsm->hw_gpr[cycle][chan] != (int)sel)
2058 {
2059 radeon_error("Another scalar operation has already used GPR read port for given channel\n");
2060 return GL_FALSE;
2061 }
2062
2063 return GL_TRUE;
2064 }
2065
2066 GLboolean cycle_for_scalar_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
2067 {
2068 switch (swiz)
2069 {
2070 case SQ_ALU_SCL_210:
2071 {
2072 int table[3] = {2, 1, 0};
2073 *pCycle = table[sel];
2074 return GL_TRUE;
2075 }
2076 break;
2077 case SQ_ALU_SCL_122:
2078 {
2079 int table[3] = {1, 2, 2};
2080 *pCycle = table[sel];
2081 return GL_TRUE;
2082 }
2083 break;
2084 case SQ_ALU_SCL_212:
2085 {
2086 int table[3] = {2, 1, 2};
2087 *pCycle = table[sel];
2088 return GL_TRUE;
2089 }
2090 break;
2091 case SQ_ALU_SCL_221:
2092 {
2093 int table[3] = {2, 2, 1};
2094 *pCycle = table[sel];
2095 return GL_TRUE;
2096 }
2097 break;
2098 default:
2099 radeon_error("Bad Scalar bank swizzle value\n");
2100 break;
2101 }
2102
2103 return GL_FALSE;
2104 }
2105
2106 GLboolean cycle_for_vector_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
2107 {
2108 switch (swiz)
2109 {
2110 case SQ_ALU_VEC_012:
2111 {
2112 int table[3] = {0, 1, 2};
2113 *pCycle = table[sel];
2114 }
2115 break;
2116 case SQ_ALU_VEC_021:
2117 {
2118 int table[3] = {0, 2, 1};
2119 *pCycle = table[sel];
2120 }
2121 break;
2122 case SQ_ALU_VEC_120:
2123 {
2124 int table[3] = {1, 2, 0};
2125 *pCycle = table[sel];
2126 }
2127 break;
2128 case SQ_ALU_VEC_102:
2129 {
2130 int table[3] = {1, 0, 2};
2131 *pCycle = table[sel];
2132 }
2133 break;
2134 case SQ_ALU_VEC_201:
2135 {
2136 int table[3] = {2, 0, 1};
2137 *pCycle = table[sel];
2138 }
2139 break;
2140 case SQ_ALU_VEC_210:
2141 {
2142 int table[3] = {2, 1, 0};
2143 *pCycle = table[sel];
2144 }
2145 break;
2146 default:
2147 radeon_error("Bad Vec bank swizzle value\n");
2148 return GL_FALSE;
2149 break;
2150 }
2151
2152 return GL_TRUE;
2153 }
2154
2155 GLboolean check_scalar(r700_AssemblerBase* pAsm,
2156 R700ALUInstruction* alu_instruction_ptr)
2157 {
2158 GLuint cycle;
2159 GLuint bank_swizzle;
2160 GLuint const_count = 0;
2161
2162 BITS sel;
2163 BITS chan;
2164 BITS rel;
2165 BITS neg;
2166
2167 GLuint src;
2168
2169 BITS src_sel [3] = {0,0,0};
2170 BITS src_chan[3] = {0,0,0};
2171 BITS src_rel [3] = {0,0,0};
2172 BITS src_neg [3] = {0,0,0};
2173
2174 GLuint swizzle_key;
2175
2176 GLuint number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
2177
2178 for (src=0; src<number_of_operands; src++)
2179 {
2180 get_src_properties(alu_instruction_ptr,
2181 src,
2182 &(src_sel[src]),
2183 &(src_rel[src]),
2184 &(src_chan[src]),
2185 &(src_neg[src]) );
2186 }
2187
2188
2189 swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) +
2190 (is_const( src_sel[1] ) ? 2 : 0) +
2191 (is_const( src_sel[2] ) ? 1 : 0) );
2192
2193 alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_SCL[ swizzle_key ];
2194
2195 for (src=0; src<number_of_operands; src++)
2196 {
2197 sel = src_sel [src];
2198 chan = src_chan[src];
2199 rel = src_rel [src];
2200 neg = src_neg [src];
2201
2202 if (is_const( sel ))
2203 {
2204 // Any constant, including literal and inline constants
2205 const_count++;
2206
2207 if (is_cfile( sel ))
2208 {
2209 reserve_cfile(pAsm, sel, chan);
2210 }
2211
2212 }
2213 }
2214
2215 for (src=0; src<number_of_operands; src++)
2216 {
2217 sel = src_sel [src];
2218 chan = src_chan[src];
2219 rel = src_rel [src];
2220 neg = src_neg [src];
2221
2222 if( is_gpr(sel) )
2223 {
2224 bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
2225
2226 if( GL_FALSE == cycle_for_scalar_bank_swizzle(bank_swizzle, src, &cycle) )
2227 {
2228 return GL_FALSE;
2229 }
2230
2231 if(cycle < const_count)
2232 {
2233 if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
2234 {
2235 return GL_FALSE;
2236 }
2237 }
2238 }
2239 }
2240
2241 return GL_TRUE;
2242 }
2243
2244 GLboolean check_vector(r700_AssemblerBase* pAsm,
2245 R700ALUInstruction* alu_instruction_ptr)
2246 {
2247 GLuint cycle;
2248 GLuint bank_swizzle;
2249 GLuint const_count = 0;
2250
2251 GLuint src;
2252
2253 BITS sel;
2254 BITS chan;
2255 BITS rel;
2256 BITS neg;
2257
2258 BITS src_sel [3] = {0,0,0};
2259 BITS src_chan[3] = {0,0,0};
2260 BITS src_rel [3] = {0,0,0};
2261 BITS src_neg [3] = {0,0,0};
2262
2263 GLuint swizzle_key;
2264
2265 GLuint number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
2266
2267 for (src=0; src<number_of_operands; src++)
2268 {
2269 get_src_properties(alu_instruction_ptr,
2270 src,
2271 &(src_sel[src]),
2272 &(src_rel[src]),
2273 &(src_chan[src]),
2274 &(src_neg[src]) );
2275 }
2276
2277
2278 swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) +
2279 (is_const( src_sel[1] ) ? 2 : 0) +
2280 (is_const( src_sel[2] ) ? 1 : 0)
2281 );
2282
2283 alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_VEC[swizzle_key];
2284
2285 for (src=0; src<number_of_operands; src++)
2286 {
2287 sel = src_sel [src];
2288 chan = src_chan[src];
2289 rel = src_rel [src];
2290 neg = src_neg [src];
2291
2292
2293 bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
2294
2295 if( is_gpr(sel) )
2296 {
2297 if( GL_FALSE == cycle_for_vector_bank_swizzle(bank_swizzle, src, &cycle) )
2298 {
2299 return GL_FALSE;
2300 }
2301
2302 if ( (src == 1) &&
2303 (sel == src_sel[0]) &&
2304 (chan == src_chan[0]) )
2305 {
2306 }
2307 else
2308 {
2309 if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
2310 {
2311 return GL_FALSE;
2312 }
2313 }
2314 }
2315 else if( is_const(sel) )
2316 {
2317 const_count++;
2318
2319 if( is_cfile(sel) )
2320 {
2321 if( GL_FALSE == reserve_cfile(pAsm, sel, chan) )
2322 {
2323 return GL_FALSE;
2324 }
2325 }
2326 }
2327 }
2328
2329 return GL_TRUE;
2330 }
2331
2332 GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
2333 {
2334 R700ALUInstruction * alu_instruction_ptr = NULL;
2335 R700ALUInstructionHalfLiteral * alu_instruction_ptr_hl;
2336 R700ALUInstructionFullLiteral * alu_instruction_ptr_fl;
2337
2338 GLuint number_of_scalar_operations;
2339 GLboolean is_single_scalar_operation;
2340 GLuint scalar_channel_index;
2341
2342 PVSSRC * pcurrent_source;
2343 int current_source_index;
2344 GLuint contiguous_slots_needed;
2345
2346 GLuint uNumSrc = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
2347 //GLuint channel_swizzle, j;
2348 //GLuint chan_counter[4] = {0, 0, 0, 0};
2349 //PVSSRC * pSource[3];
2350 GLboolean bSplitInst = GL_FALSE;
2351
2352 if (1 == pAsm->D.dst.math)
2353 {
2354 is_single_scalar_operation = GL_TRUE;
2355 number_of_scalar_operations = 1;
2356 }
2357 else
2358 {
2359 is_single_scalar_operation = GL_FALSE;
2360 number_of_scalar_operations = 4;
2361
2362 /* current assembler doesn't do more than 1 register per source */
2363 #if 0
2364 /* check read port, only very preliminary algorithm, not count in
2365 src0/1 same comp case and prev slot repeat case; also not count relative
2366 addressing. TODO: improve performance. */
2367 for(j=0; j<uNumSrc; j++)
2368 {
2369 pSource[j] = &(pAsm->S[j].src);
2370 }
2371 for(scalar_channel_index=0; scalar_channel_index<4; scalar_channel_index++)
2372 {
2373 for(j=0; j<uNumSrc; j++)
2374 {
2375 switch (scalar_channel_index)
2376 {
2377 case 0: channel_swizzle = pSource[j]->swizzlex; break;
2378 case 1: channel_swizzle = pSource[j]->swizzley; break;
2379 case 2: channel_swizzle = pSource[j]->swizzlez; break;
2380 case 3: channel_swizzle = pSource[j]->swizzlew; break;
2381 default: channel_swizzle = SQ_SEL_MASK; break;
2382 }
2383 if ( ((pSource[j]->rtype == SRC_REG_TEMPORARY) ||
2384 (pSource[j]->rtype == SRC_REG_INPUT))
2385 && (channel_swizzle <= SQ_SEL_W) )
2386 {
2387 chan_counter[channel_swizzle]++;
2388 }
2389 }
2390 }
2391 if( (chan_counter[SQ_SEL_X] > 3)
2392 || (chan_counter[SQ_SEL_Y] > 3)
2393 || (chan_counter[SQ_SEL_Z] > 3)
2394 || (chan_counter[SQ_SEL_W] > 3) ) /* each chan bank has only 3 ports. */
2395 {
2396 bSplitInst = GL_TRUE;
2397 }
2398 #endif
2399 }
2400
2401 contiguous_slots_needed = 0;
2402
2403 if(!is_single_scalar_operation)
2404 {
2405 contiguous_slots_needed = 4;
2406 }
2407
2408 contiguous_slots_needed += pAsm->D2.dst2.literal_slots;
2409
2410 initialize(pAsm);
2411
2412 for (scalar_channel_index=0;
2413 scalar_channel_index < number_of_scalar_operations;
2414 scalar_channel_index++)
2415 {
2416 if(scalar_channel_index == (number_of_scalar_operations-1))
2417 {
2418 switch(pAsm->D2.dst2.literal_slots)
2419 {
2420 case 0:
2421 alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
2422 Init_R700ALUInstruction(alu_instruction_ptr);
2423 break;
2424 case 1:
2425 alu_instruction_ptr_hl = (R700ALUInstructionHalfLiteral*) CALLOC_STRUCT(R700ALUInstructionHalfLiteral);
2426 Init_R700ALUInstructionHalfLiteral(alu_instruction_ptr_hl, pAsm->C[0].f, pAsm->C[1].f);
2427 alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_hl;
2428 break;
2429 case 2:
2430 alu_instruction_ptr_fl = (R700ALUInstructionFullLiteral*) CALLOC_STRUCT(R700ALUInstructionFullLiteral);
2431 Init_R700ALUInstructionFullLiteral(alu_instruction_ptr_fl,pAsm->C[0].f, pAsm->C[1].f, pAsm->C[2].f, pAsm->C[3].f);
2432 alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_fl;
2433 break;
2434 };
2435 }
2436 else
2437 {
2438 alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
2439 Init_R700ALUInstruction(alu_instruction_ptr);
2440 }
2441
2442 //src 0
2443 current_source_index = 0;
2444 pcurrent_source = &(pAsm->S[0].src);
2445
2446 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2447 current_source_index,
2448 pcurrent_source,
2449 scalar_channel_index) )
2450 {
2451 return GL_FALSE;
2452 }
2453
2454 if (uNumSrc > 1)
2455 {
2456 // Process source 1
2457 current_source_index = 1;
2458 pcurrent_source = &(pAsm->S[current_source_index].src);
2459
2460 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2461 current_source_index,
2462 pcurrent_source,
2463 scalar_channel_index) )
2464 {
2465 return GL_FALSE;
2466 }
2467 }
2468
2469 //other bits
2470 alu_instruction_ptr->m_Word0.f.index_mode = pAsm->D2.dst2.index_mode;
2471
2472 if( (is_single_scalar_operation == GL_TRUE)
2473 || (GL_TRUE == bSplitInst) )
2474 {
2475 alu_instruction_ptr->m_Word0.f.last = 1;
2476 }
2477 else
2478 {
2479 alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ? 1 : 0;
2480 }
2481
2482 alu_instruction_ptr->m_Word0.f.pred_sel = (pAsm->D.dst.pred_inv > 0) ? 1 : 0;
2483 if(1 == pAsm->D.dst.predicated)
2484 {
2485 alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x1;
2486 alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1;
2487 }
2488 else
2489 {
2490 alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2491 alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2492 }
2493
2494 // dst
2495 if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
2496 (pAsm->D.dst.rtype == DST_REG_OUT) )
2497 {
2498 alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
2499 }
2500 else
2501 {
2502 radeon_error("Only temp destination registers supported for ALU dest regs.\n");
2503 return GL_FALSE;
2504 }
2505
2506 alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; //D.rtype
2507
2508 if ( is_single_scalar_operation == GL_TRUE )
2509 {
2510 // Override scalar_channel_index since only one scalar value will be written
2511 if(pAsm->D.dst.writex)
2512 {
2513 scalar_channel_index = 0;
2514 }
2515 else if(pAsm->D.dst.writey)
2516 {
2517 scalar_channel_index = 1;
2518 }
2519 else if(pAsm->D.dst.writez)
2520 {
2521 scalar_channel_index = 2;
2522 }
2523 else if(pAsm->D.dst.writew)
2524 {
2525 scalar_channel_index = 3;
2526 }
2527 }
2528
2529 alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index;
2530
2531 alu_instruction_ptr->m_Word1.f.clamp = pAsm->D2.dst2.SaturateMode;
2532
2533 if (pAsm->D.dst.op3)
2534 {
2535 //op3
2536
2537 alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode;
2538
2539 //There's 3rd src for op3
2540 current_source_index = 2;
2541 pcurrent_source = &(pAsm->S[current_source_index].src);
2542
2543 if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2544 current_source_index,
2545 pcurrent_source,
2546 scalar_channel_index) )
2547 {
2548 return GL_FALSE;
2549 }
2550 }
2551 else
2552 {
2553 //op2
2554 if (pAsm->bR6xx)
2555 {
2556 alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode;
2557
2558 alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = pAsm->S[0].src.abs;
2559 alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = pAsm->S[1].src.abs;
2560
2561 //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
2562 //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0;
2563 switch (scalar_channel_index)
2564 {
2565 case 0:
2566 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writex;
2567 break;
2568 case 1:
2569 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writey;
2570 break;
2571 case 2:
2572 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writez;
2573 break;
2574 case 3:
2575 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writew;
2576 break;
2577 default:
2578 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1; //SQ_SEL_MASK;
2579 break;
2580 }
2581 alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF;
2582 }
2583 else
2584 {
2585 alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode;
2586
2587 alu_instruction_ptr->m_Word1_OP2.f.src0_abs = pAsm->S[0].src.abs;
2588 alu_instruction_ptr->m_Word1_OP2.f.src1_abs = pAsm->S[1].src.abs;
2589
2590 //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2591 //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2592 switch (scalar_channel_index)
2593 {
2594 case 0:
2595 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writex;
2596 break;
2597 case 1:
2598 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writey;
2599 break;
2600 case 2:
2601 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writez;
2602 break;
2603 case 3:
2604 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writew;
2605 break;
2606 default:
2607 alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1; //SQ_SEL_MASK;
2608 break;
2609 }
2610 alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF;
2611 }
2612 }
2613
2614 if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) )
2615 {
2616 return GL_FALSE;
2617 }
2618
2619 /*
2620 * Judge the type of current instruction, is it vector or scalar
2621 * instruction.
2622 */
2623 if (is_single_scalar_operation)
2624 {
2625 if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) )
2626 {
2627 return GL_FALSE;
2628 }
2629 }
2630 else
2631 {
2632 if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) )
2633 {
2634 return GL_FALSE;
2635 }
2636 }
2637
2638 contiguous_slots_needed -= 1;
2639 }
2640
2641 return GL_TRUE;
2642 }
2643
2644 GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode)
2645 {
2646 BITS tmp;
2647
2648 checkop1(pAsm);
2649
2650 tmp = gethelpr(pAsm);
2651
2652 // opcode tmp.x, a.x
2653 // MOV dst, tmp.x
2654
2655 pAsm->D.dst.opcode = opcode;
2656 pAsm->D.dst.math = 1;
2657
2658 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2659 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2660 pAsm->D.dst.reg = tmp;
2661 pAsm->D.dst.writex = 1;
2662
2663 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2664 {
2665 return GL_FALSE;
2666 }
2667
2668 if ( GL_FALSE == next_ins(pAsm) )
2669 {
2670 return GL_FALSE;
2671 }
2672
2673 // Now replicate result to all necessary channels in destination
2674 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
2675
2676 if( GL_FALSE == assemble_dst(pAsm) )
2677 {
2678 return GL_FALSE;
2679 }
2680
2681 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2682 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
2683 pAsm->S[0].src.reg = tmp;
2684
2685 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
2686 noneg_PVSSRC(&(pAsm->S[0].src));
2687
2688 if( GL_FALSE == next_ins(pAsm) )
2689 {
2690 return GL_FALSE;
2691 }
2692
2693 return GL_TRUE;
2694 }
2695
2696 GLboolean assemble_ABS(r700_AssemblerBase *pAsm)
2697 {
2698 checkop1(pAsm);
2699
2700 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
2701
2702 if( GL_FALSE == assemble_dst(pAsm) )
2703 {
2704 return GL_FALSE;
2705 }
2706 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2707 {
2708 return GL_FALSE;
2709 }
2710
2711 pAsm->S[1].bits = pAsm->S[0].bits;
2712 flipneg_PVSSRC(&(pAsm->S[1].src));
2713
2714 if ( GL_FALSE == next_ins(pAsm) )
2715 {
2716 return GL_FALSE;
2717 }
2718
2719 return GL_TRUE;
2720 }
2721
2722 GLboolean assemble_ADD(r700_AssemblerBase *pAsm)
2723 {
2724 if( GL_FALSE == checkop2(pAsm) )
2725 {
2726 return GL_FALSE;
2727 }
2728
2729 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
2730
2731 if( GL_FALSE == assemble_dst(pAsm) )
2732 {
2733 return GL_FALSE;
2734 }
2735
2736 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2737 {
2738 return GL_FALSE;
2739 }
2740
2741 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
2742 {
2743 return GL_FALSE;
2744 }
2745
2746 if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_SUB)
2747 {
2748 flipneg_PVSSRC(&(pAsm->S[1].src));
2749 }
2750
2751 if( GL_FALSE == next_ins(pAsm) )
2752 {
2753 return GL_FALSE;
2754 }
2755
2756 return GL_TRUE;
2757 }
2758
2759 GLboolean assemble_ARL(r700_AssemblerBase *pAsm)
2760 { /* TODO: ar values dont' persist between clauses */
2761 if( GL_FALSE == checkop1(pAsm) )
2762 {
2763 return GL_FALSE;
2764 }
2765
2766 pAsm->D.dst.opcode = SQ_OP2_INST_MOVA_FLOOR;
2767 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2768 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2769 pAsm->D.dst.reg = 0;
2770 pAsm->D.dst.writex = 0;
2771 pAsm->D.dst.writey = 0;
2772 pAsm->D.dst.writez = 0;
2773 pAsm->D.dst.writew = 0;
2774
2775 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2776 {
2777 return GL_FALSE;
2778 }
2779
2780 if( GL_FALSE == next_ins(pAsm) )
2781 {
2782 return GL_FALSE;
2783 }
2784
2785 return GL_TRUE;
2786 }
2787
2788 GLboolean assemble_BAD(char *opcode_str)
2789 {
2790 radeon_error("Not yet implemented instruction (%s)\n", opcode_str);
2791 return GL_FALSE;
2792 }
2793
2794 GLboolean assemble_CMP(r700_AssemblerBase *pAsm)
2795 {
2796 int tmp;
2797
2798 if( GL_FALSE == checkop3(pAsm) )
2799 {
2800 return GL_FALSE;
2801 }
2802
2803 pAsm->D.dst.opcode = SQ_OP3_INST_CNDGE;
2804 pAsm->D.dst.op3 = 1;
2805
2806 tmp = (-1);
2807
2808 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
2809 {
2810 //OP3 has no support for write mask
2811 tmp = gethelpr(pAsm);
2812
2813 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2814 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2815 pAsm->D.dst.reg = tmp;
2816
2817 nomask_PVSDST(&(pAsm->D.dst));
2818 }
2819 else
2820 {
2821 if( GL_FALSE == assemble_dst(pAsm) )
2822 {
2823 return GL_FALSE;
2824 }
2825 }
2826
2827 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2828 {
2829 return GL_FALSE;
2830 }
2831
2832 if( GL_FALSE == assemble_src(pAsm, 2, 1) )
2833 {
2834 return GL_FALSE;
2835 }
2836
2837 if( GL_FALSE == assemble_src(pAsm, 1, 2) )
2838 {
2839 return GL_FALSE;
2840 }
2841
2842 if ( GL_FALSE == next_ins(pAsm) )
2843 {
2844 return GL_FALSE;
2845 }
2846
2847 if (0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
2848 {
2849 if( GL_FALSE == assemble_dst(pAsm) )
2850 {
2851 return GL_FALSE;
2852 }
2853
2854 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
2855
2856 //tmp for source
2857 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2858 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2859 pAsm->S[0].src.reg = tmp;
2860
2861 noneg_PVSSRC(&(pAsm->S[0].src));
2862 noswizzle_PVSSRC(&(pAsm->S[0].src));
2863
2864 if( GL_FALSE == next_ins(pAsm) )
2865 {
2866 return GL_FALSE;
2867 }
2868 }
2869
2870 return GL_TRUE;
2871 }
2872
2873 GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode)
2874 {
2875 /*
2876 * r600 - trunc to -PI..PI range
2877 * r700 - normalize by dividing by 2PI
2878 * see fdo bug 27901
2879 */
2880
2881 int tmp;
2882 checkop1(pAsm);
2883
2884 tmp = gethelpr(pAsm);
2885
2886 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
2887 pAsm->D.dst.op3 = 1;
2888
2889 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2890 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2891 pAsm->D.dst.reg = tmp;
2892
2893 assemble_src(pAsm, 0, -1);
2894
2895 pAsm->S[1].src.rtype = SRC_REC_LITERAL;
2896 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
2897
2898 pAsm->S[2].src.rtype = SRC_REC_LITERAL;
2899 setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
2900
2901 pAsm->D2.dst2.literal_slots = 1;
2902 pAsm->C[0].f = 1/(3.1415926535 * 2);
2903 pAsm->C[1].f = 0.5f;
2904
2905 if ( GL_FALSE == next_ins(pAsm) )
2906 {
2907 return GL_FALSE;
2908 }
2909
2910 pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
2911
2912 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2913 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2914 pAsm->D.dst.reg = tmp;
2915 pAsm->D.dst.writex = 1;
2916
2917 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2918 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2919 pAsm->S[0].src.reg = tmp;
2920 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
2921
2922 if(( GL_FALSE == next_ins(pAsm) ))
2923 {
2924 return GL_FALSE;
2925 }
2926 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
2927 pAsm->D.dst.op3 = 1;
2928
2929 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2930 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2931 pAsm->D.dst.reg = tmp;
2932
2933 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2934 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2935 pAsm->S[0].src.reg = tmp;
2936 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
2937
2938 pAsm->S[1].src.rtype = SRC_REC_LITERAL;
2939 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
2940
2941 pAsm->S[2].src.rtype = SRC_REC_LITERAL;
2942 setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
2943
2944 pAsm->D2.dst2.literal_slots = 1;
2945
2946 if (pAsm->bR6xx)
2947 {
2948 pAsm->C[0].f = 3.1415926535897f * 2.0f;
2949 pAsm->C[1].f = -3.1415926535897f;
2950 }
2951 else
2952 {
2953 pAsm->C[0].f = 1.0f;
2954 pAsm->C[1].f = -0.5f;
2955 }
2956
2957 if(( GL_FALSE == next_ins(pAsm) ))
2958 {
2959 return GL_FALSE;
2960 }
2961
2962 pAsm->D.dst.opcode = opcode;
2963 pAsm->D.dst.math = 1;
2964
2965 assemble_dst(pAsm);
2966
2967 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2968 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2969 pAsm->S[0].src.reg = tmp;
2970 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
2971 noneg_PVSSRC(&(pAsm->S[0].src));
2972
2973 next_ins(pAsm);
2974
2975 //TODO - replicate if more channels set in WriteMask
2976 return GL_TRUE;
2977
2978 }
2979
2980 GLboolean assemble_DOT(r700_AssemblerBase *pAsm)
2981 {
2982 if( GL_FALSE == checkop2(pAsm) )
2983 {
2984 return GL_FALSE;
2985 }
2986
2987 pAsm->D.dst.opcode = SQ_OP2_INST_DOT4;
2988
2989 if( GL_FALSE == assemble_dst(pAsm) )
2990 {
2991 return GL_FALSE;
2992 }
2993
2994 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2995 {
2996 return GL_FALSE;
2997 }
2998
2999 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3000 {
3001 return GL_FALSE;
3002 }
3003
3004 if(OPCODE_DP3 == pAsm->pILInst[pAsm->uiCurInst].Opcode)
3005 {
3006 zerocomp_PVSSRC(&(pAsm->S[0].src), 3);
3007 zerocomp_PVSSRC(&(pAsm->S[1].src), 3);
3008 }
3009 else if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_DPH)
3010 {
3011 onecomp_PVSSRC(&(pAsm->S[0].src), 3);
3012 }
3013
3014 if ( GL_FALSE == next_ins(pAsm) )
3015 {
3016 return GL_FALSE;
3017 }
3018
3019 return GL_TRUE;
3020 }
3021
3022 GLboolean assemble_DST(r700_AssemblerBase *pAsm)
3023 {
3024 if( GL_FALSE == checkop2(pAsm) )
3025 {
3026 return GL_FALSE;
3027 }
3028
3029 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
3030
3031 if( GL_FALSE == assemble_dst(pAsm) )
3032 {
3033 return GL_FALSE;
3034 }
3035
3036 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3037 {
3038 return GL_FALSE;
3039 }
3040
3041 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3042 {
3043 return GL_FALSE;
3044 }
3045
3046 onecomp_PVSSRC(&(pAsm->S[0].src), 0);
3047 onecomp_PVSSRC(&(pAsm->S[0].src), 3);
3048
3049 onecomp_PVSSRC(&(pAsm->S[1].src), 0);
3050 onecomp_PVSSRC(&(pAsm->S[1].src), 2);
3051
3052 if ( GL_FALSE == next_ins(pAsm) )
3053 {
3054 return GL_FALSE;
3055 }
3056
3057 return GL_TRUE;
3058 }
3059
3060 GLboolean assemble_EX2(r700_AssemblerBase *pAsm)
3061 {
3062 return assemble_math_function(pAsm, SQ_OP2_INST_EXP_IEEE);
3063 }
3064
3065 GLboolean assemble_EXP(r700_AssemblerBase *pAsm)
3066 {
3067 BITS tmp;
3068
3069 checkop1(pAsm);
3070
3071 tmp = gethelpr(pAsm);
3072
3073 // FLOOR tmp.x, a.x
3074 // EX2 dst.x tmp.x
3075
3076 if (pAsm->pILInst->DstReg.WriteMask & 0x1) {
3077 pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
3078
3079 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3080 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3081 pAsm->D.dst.reg = tmp;
3082 pAsm->D.dst.writex = 1;
3083
3084 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3085 {
3086 return GL_FALSE;
3087 }
3088
3089 if( GL_FALSE == next_ins(pAsm) )
3090 {
3091 return GL_FALSE;
3092 }
3093
3094 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3095 pAsm->D.dst.math = 1;
3096
3097 if( GL_FALSE == assemble_dst(pAsm) )
3098 {
3099 return GL_FALSE;
3100 }
3101
3102 pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
3103
3104 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3105 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3106 pAsm->S[0].src.reg = tmp;
3107
3108 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3109 noneg_PVSSRC(&(pAsm->S[0].src));
3110
3111 if( GL_FALSE == next_ins(pAsm) )
3112 {
3113 return GL_FALSE;
3114 }
3115 }
3116
3117 // FRACT dst.y a.x
3118
3119 if ((pAsm->pILInst->DstReg.WriteMask >> 1) & 0x1) {
3120 pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
3121
3122 if( GL_FALSE == assemble_dst(pAsm) )
3123 {
3124 return GL_FALSE;
3125 }
3126
3127 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3128 {
3129 return GL_FALSE;
3130 }
3131
3132 pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
3133
3134 if( GL_FALSE == next_ins(pAsm) )
3135 {
3136 return GL_FALSE;
3137 }
3138 }
3139
3140 // EX2 dst.z, a.x
3141
3142 if ((pAsm->pILInst->DstReg.WriteMask >> 2) & 0x1) {
3143 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3144 pAsm->D.dst.math = 1;
3145
3146 if( GL_FALSE == assemble_dst(pAsm) )
3147 {
3148 return GL_FALSE;
3149 }
3150
3151 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3152 {
3153 return GL_FALSE;
3154 }
3155
3156 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0;
3157
3158 if( GL_FALSE == next_ins(pAsm) )
3159 {
3160 return GL_FALSE;
3161 }
3162 }
3163
3164 // MOV dst.w 1.0
3165
3166 if ((pAsm->pILInst->DstReg.WriteMask >> 3) & 0x1) {
3167 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3168
3169 if( GL_FALSE == assemble_dst(pAsm) )
3170 {
3171 return GL_FALSE;
3172 }
3173
3174 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0;
3175
3176 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3177 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3178 pAsm->S[0].src.reg = tmp;
3179
3180 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1);
3181 noneg_PVSSRC(&(pAsm->S[0].src));
3182
3183 if( GL_FALSE == next_ins(pAsm) )
3184 {
3185 return GL_FALSE;
3186 }
3187 }
3188
3189 return GL_TRUE;
3190 }
3191
3192 GLboolean assemble_FLR(r700_AssemblerBase *pAsm)
3193 {
3194 checkop1(pAsm);
3195
3196 pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
3197
3198 if ( GL_FALSE == assemble_dst(pAsm) )
3199 {
3200 return GL_FALSE;
3201 }
3202
3203 if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
3204 {
3205 return GL_FALSE;
3206 }
3207
3208 if ( GL_FALSE == next_ins(pAsm) )
3209 {
3210 return GL_FALSE;
3211 }
3212
3213 return GL_TRUE;
3214 }
3215
3216 GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm)
3217 {
3218 return assemble_math_function(pAsm, SQ_OP2_INST_FLT_TO_INT);
3219 }
3220
3221 GLboolean assemble_FRC(r700_AssemblerBase *pAsm)
3222 {
3223 checkop1(pAsm);
3224
3225 pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
3226
3227 if ( GL_FALSE == assemble_dst(pAsm) )
3228 {
3229 return GL_FALSE;
3230 }
3231
3232 if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
3233 {
3234 return GL_FALSE;
3235 }
3236
3237 if ( GL_FALSE == next_ins(pAsm) )
3238 {
3239 return GL_FALSE;
3240 }
3241
3242 return GL_TRUE;
3243 }
3244
3245 GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode)
3246 {
3247 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
3248
3249 if(pILInst->Opcode == OPCODE_KIL)
3250 checkop1(pAsm);
3251
3252 pAsm->D.dst.opcode = opcode;
3253 //pAsm->D.dst.math = 1;
3254
3255 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3256 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3257 pAsm->D.dst.reg = 0;
3258 pAsm->D.dst.writex = 0;
3259 pAsm->D.dst.writey = 0;
3260 pAsm->D.dst.writez = 0;
3261 pAsm->D.dst.writew = 0;
3262
3263 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3264 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3265 pAsm->S[0].src.reg = 0;
3266 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0);
3267 noneg_PVSSRC(&(pAsm->S[0].src));
3268
3269 if(pILInst->Opcode == OPCODE_KIL_NV)
3270 {
3271 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
3272 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
3273 pAsm->S[1].src.reg = 0;
3274 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_1);
3275 neg_PVSSRC(&(pAsm->S[1].src));
3276 }
3277 else
3278 {
3279 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
3280 {
3281 return GL_FALSE;
3282 }
3283
3284 }
3285
3286 if ( GL_FALSE == next_ins(pAsm) )
3287 {
3288 return GL_FALSE;
3289 }
3290
3291 /* Doc says KILL has to be last(end) ALU clause */
3292 pAsm->pR700Shader->killIsUsed = GL_TRUE;
3293 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
3294
3295 return GL_TRUE;
3296 }
3297
3298 GLboolean assemble_LG2(r700_AssemblerBase *pAsm)
3299 {
3300 return assemble_math_function(pAsm, SQ_OP2_INST_LOG_IEEE);
3301 }
3302
3303 GLboolean assemble_LRP(r700_AssemblerBase *pAsm)
3304 {
3305 BITS tmp;
3306
3307 if( GL_FALSE == checkop3(pAsm) )
3308 {
3309 return GL_FALSE;
3310 }
3311
3312 tmp = gethelpr(pAsm);
3313
3314 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
3315
3316 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3317 pAsm->D.dst.reg = tmp;
3318 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3319 nomask_PVSDST(&(pAsm->D.dst));
3320
3321
3322 if( GL_FALSE == assemble_src(pAsm, 1, 0) )
3323 {
3324 return GL_FALSE;
3325 }
3326
3327 if ( GL_FALSE == assemble_src(pAsm, 2, 1) )
3328 {
3329 return GL_FALSE;
3330 }
3331
3332 neg_PVSSRC(&(pAsm->S[1].src));
3333
3334 if( GL_FALSE == next_ins(pAsm) )
3335 {
3336 return GL_FALSE;
3337 }
3338
3339 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
3340 pAsm->D.dst.op3 = 1;
3341
3342 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3343 pAsm->D.dst.reg = tmp;
3344 nomask_PVSDST(&(pAsm->D.dst));
3345 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3346
3347 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3348 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3349 pAsm->S[0].src.reg = tmp;
3350 noswizzle_PVSSRC(&(pAsm->S[0].src));
3351
3352
3353 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
3354 {
3355 return GL_FALSE;
3356 }
3357
3358 if( GL_FALSE == assemble_src(pAsm, 2, -1) )
3359 {
3360 return GL_FALSE;
3361 }
3362
3363 if( GL_FALSE == next_ins(pAsm) )
3364 {
3365 return GL_FALSE;
3366 }
3367
3368 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3369
3370 if( GL_FALSE == assemble_dst(pAsm) )
3371 {
3372 return GL_FALSE;
3373 }
3374
3375 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3376 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3377 pAsm->S[0].src.reg = tmp;
3378 noswizzle_PVSSRC(&(pAsm->S[0].src));
3379
3380 if( GL_FALSE == next_ins(pAsm) )
3381 {
3382 return GL_FALSE;
3383 }
3384
3385 return GL_TRUE;
3386 }
3387
3388 GLboolean assemble_LOG(r700_AssemblerBase *pAsm)
3389 {
3390 BITS tmp1, tmp2, tmp3;
3391
3392 checkop1(pAsm);
3393
3394 tmp1 = gethelpr(pAsm);
3395 tmp2 = gethelpr(pAsm);
3396 tmp3 = gethelpr(pAsm);
3397
3398 // FIXME: The hardware can do fabs() directly on input
3399 // elements, but the compiler doesn't have the
3400 // capability to use that.
3401
3402 // MAX tmp1.x, a.x, -a.x (fabs(a.x))
3403
3404 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
3405
3406 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3407 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3408 pAsm->D.dst.reg = tmp1;
3409 pAsm->D.dst.writex = 1;
3410
3411 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3412 {
3413 return GL_FALSE;
3414 }
3415
3416 pAsm->S[1].bits = pAsm->S[0].bits;
3417 flipneg_PVSSRC(&(pAsm->S[1].src));
3418
3419 if ( GL_FALSE == next_ins(pAsm) )
3420 {
3421 return GL_FALSE;
3422 }
3423
3424 // Entire algo:
3425 //
3426 // LG2 tmp2.x, tmp1.x
3427 // FLOOR tmp3.x, tmp2.x
3428 // MOV dst.x, tmp3.x
3429 // ADD tmp3.x, tmp2.x, -tmp3.x
3430 // EX2 dst.y, tmp3.x
3431 // MOV dst.z, tmp2.x
3432 // MOV dst.w, 1.0
3433
3434 // LG2 tmp2.x, tmp1.x
3435 // FLOOR tmp3.x, tmp2.x
3436
3437 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;
3438 pAsm->D.dst.math = 1;
3439
3440 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3441 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3442 pAsm->D.dst.reg = tmp2;
3443 pAsm->D.dst.writex = 1;
3444
3445 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3446 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3447 pAsm->S[0].src.reg = tmp1;
3448
3449 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3450 noneg_PVSSRC(&(pAsm->S[0].src));
3451
3452 if( GL_FALSE == next_ins(pAsm) )
3453 {
3454 return GL_FALSE;
3455 }
3456
3457 pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
3458
3459 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3460 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3461 pAsm->D.dst.reg = tmp3;
3462 pAsm->D.dst.writex = 1;
3463
3464 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3465 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3466 pAsm->S[0].src.reg = tmp2;
3467
3468 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3469 noneg_PVSSRC(&(pAsm->S[0].src));
3470
3471 if( GL_FALSE == next_ins(pAsm) )
3472 {
3473 return GL_FALSE;
3474 }
3475
3476 // MOV dst.x, tmp3.x
3477
3478 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3479
3480 if( GL_FALSE == assemble_dst(pAsm) )
3481 {
3482 return GL_FALSE;
3483 }
3484
3485 pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
3486
3487 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3488 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3489 pAsm->S[0].src.reg = tmp3;
3490
3491 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3492 noneg_PVSSRC(&(pAsm->S[0].src));
3493
3494 if( GL_FALSE == next_ins(pAsm) )
3495 {
3496 return GL_FALSE;
3497 }
3498
3499 // ADD tmp3.x, tmp2.x, -tmp3.x
3500 // EX2 dst.y, tmp3.x
3501
3502 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
3503
3504 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3505 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3506 pAsm->D.dst.reg = tmp3;
3507 pAsm->D.dst.writex = 1;
3508
3509 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3510 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3511 pAsm->S[0].src.reg = tmp2;
3512
3513 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3514 noneg_PVSSRC(&(pAsm->S[0].src));
3515
3516 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
3517 pAsm->S[1].src.rtype = DST_REG_TEMPORARY;
3518 pAsm->S[1].src.reg = tmp3;
3519
3520 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
3521 neg_PVSSRC(&(pAsm->S[1].src));
3522
3523 if( GL_FALSE == next_ins(pAsm) )
3524 {
3525 return GL_FALSE;
3526 }
3527
3528 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3529 pAsm->D.dst.math = 1;
3530
3531 if( GL_FALSE == assemble_dst(pAsm) )
3532 {
3533 return GL_FALSE;
3534 }
3535
3536 pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
3537
3538 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3539 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3540 pAsm->S[0].src.reg = tmp3;
3541
3542 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3543 noneg_PVSSRC(&(pAsm->S[0].src));
3544
3545 if( GL_FALSE == next_ins(pAsm) )
3546 {
3547 return GL_FALSE;
3548 }
3549
3550 // MOV dst.z, tmp2.x
3551
3552 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3553
3554 if( GL_FALSE == assemble_dst(pAsm) )
3555 {
3556 return GL_FALSE;
3557 }
3558
3559 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0;
3560
3561 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3562 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3563 pAsm->S[0].src.reg = tmp2;
3564
3565 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3566 noneg_PVSSRC(&(pAsm->S[0].src));
3567
3568 if( GL_FALSE == next_ins(pAsm) )
3569 {
3570 return GL_FALSE;
3571 }
3572
3573 // MOV dst.w 1.0
3574
3575 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3576
3577 if( GL_FALSE == assemble_dst(pAsm) )
3578 {
3579 return GL_FALSE;
3580 }
3581
3582 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0;
3583
3584 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3585 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3586 pAsm->S[0].src.reg = tmp1;
3587
3588 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1);
3589 noneg_PVSSRC(&(pAsm->S[0].src));
3590
3591 if( GL_FALSE == next_ins(pAsm) )
3592 {
3593 return GL_FALSE;
3594 }
3595
3596 return GL_TRUE;
3597 }
3598
3599 GLboolean assemble_MAD(struct r700_AssemblerBase *pAsm)
3600 {
3601 int tmp, ii;
3602 GLboolean bReplaceDst = GL_FALSE;
3603 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
3604
3605 if( GL_FALSE == checkop3(pAsm) )
3606 {
3607 return GL_FALSE;
3608 }
3609
3610 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
3611 pAsm->D.dst.op3 = 1;
3612
3613 tmp = (-1);
3614
3615 if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
3616 { /* TODO : more investigation on MAD src and dst using same register */
3617 for(ii=0; ii<3; ii++)
3618 {
3619 if( (PROGRAM_TEMPORARY == pILInst->SrcReg[ii].File)
3620 && (pILInst->DstReg.Index == pILInst->SrcReg[ii].Index) )
3621 {
3622 bReplaceDst = GL_TRUE;
3623 break;
3624 }
3625 }
3626 }
3627 if(0xF != pILInst->DstReg.WriteMask)
3628 { /* OP3 has no support for write mask */
3629 bReplaceDst = GL_TRUE;
3630 }
3631
3632 if(GL_TRUE == bReplaceDst)
3633 {
3634 tmp = gethelpr(pAsm);
3635
3636 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3637 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3638 pAsm->D.dst.reg = tmp;
3639
3640 nomask_PVSDST(&(pAsm->D.dst));
3641 }
3642 else
3643 {
3644 if( GL_FALSE == assemble_dst(pAsm) )
3645 {
3646 return GL_FALSE;
3647 }
3648 }
3649
3650 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3651 {
3652 return GL_FALSE;
3653 }
3654
3655 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3656 {
3657 return GL_FALSE;
3658 }
3659
3660 if( GL_FALSE == assemble_src(pAsm, 2, -1) )
3661 {
3662 return GL_FALSE;
3663 }
3664
3665 if ( GL_FALSE == next_ins(pAsm) )
3666 {
3667 return GL_FALSE;
3668 }
3669
3670 if (GL_TRUE == bReplaceDst)
3671 {
3672 if( GL_FALSE == assemble_dst(pAsm) )
3673 {
3674 return GL_FALSE;
3675 }
3676
3677 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3678
3679 //tmp for source
3680 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3681 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3682 pAsm->S[0].src.reg = tmp;
3683
3684 noneg_PVSSRC(&(pAsm->S[0].src));
3685 noswizzle_PVSSRC(&(pAsm->S[0].src));
3686
3687 if( GL_FALSE == next_ins(pAsm) )
3688 {
3689 return GL_FALSE;
3690 }
3691 }
3692
3693 return GL_TRUE;
3694 }
3695
3696 /* LIT dst, src */
3697 GLboolean assemble_LIT(r700_AssemblerBase *pAsm)
3698 {
3699 unsigned int dstReg;
3700 unsigned int dstType;
3701 unsigned int srcReg;
3702 unsigned int srcType;
3703 checkop1(pAsm);
3704 int tmp = gethelpr(pAsm);
3705
3706 if( GL_FALSE == assemble_dst(pAsm) )
3707 {
3708 return GL_FALSE;
3709 }
3710 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3711 {
3712 return GL_FALSE;
3713 }
3714 dstReg = pAsm->D.dst.reg;
3715 dstType = pAsm->D.dst.rtype;
3716 srcReg = pAsm->S[0].src.reg;
3717 srcType = pAsm->S[0].src.rtype;
3718
3719 /* dst.xw, <- 1.0 */
3720 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3721 pAsm->D.dst.rtype = dstType;
3722 pAsm->D.dst.reg = dstReg;
3723 pAsm->D.dst.writex = 1;
3724 pAsm->D.dst.writey = 0;
3725 pAsm->D.dst.writez = 0;
3726 pAsm->D.dst.writew = 1;
3727 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3728 pAsm->S[0].src.reg = tmp;
3729 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3730 noneg_PVSSRC(&(pAsm->S[0].src));
3731 pAsm->S[0].src.swizzlex = SQ_SEL_1;
3732 pAsm->S[0].src.swizzley = SQ_SEL_1;
3733 pAsm->S[0].src.swizzlez = SQ_SEL_1;
3734 pAsm->S[0].src.swizzlew = SQ_SEL_1;
3735 if( GL_FALSE == next_ins(pAsm) )
3736 {
3737 return GL_FALSE;
3738 }
3739
3740 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3741 {
3742 return GL_FALSE;
3743 }
3744
3745 /* dst.y = max(src.x, 0.0) */
3746 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
3747 pAsm->D.dst.rtype = dstType;
3748 pAsm->D.dst.reg = dstReg;
3749 pAsm->D.dst.writex = 0;
3750 pAsm->D.dst.writey = 1;
3751 pAsm->D.dst.writez = 0;
3752 pAsm->D.dst.writew = 0;
3753 pAsm->S[0].src.rtype = srcType;
3754 pAsm->S[0].src.reg = srcReg;
3755 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3756 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X);
3757 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
3758 pAsm->S[1].src.reg = tmp;
3759 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
3760 noneg_PVSSRC(&(pAsm->S[1].src));
3761 pAsm->S[1].src.swizzlex = SQ_SEL_0;
3762 pAsm->S[1].src.swizzley = SQ_SEL_0;
3763 pAsm->S[1].src.swizzlez = SQ_SEL_0;
3764 pAsm->S[1].src.swizzlew = SQ_SEL_0;
3765 if( GL_FALSE == next_ins(pAsm) )
3766 {
3767 return GL_FALSE;
3768 }
3769
3770 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3771 {
3772 return GL_FALSE;
3773 }
3774
3775 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y);
3776
3777 /* dst.z = log(src.y) */
3778 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_CLAMPED;
3779 pAsm->D.dst.math = 1;
3780 pAsm->D.dst.rtype = dstType;
3781 pAsm->D.dst.reg = dstReg;
3782 pAsm->D.dst.writex = 0;
3783 pAsm->D.dst.writey = 0;
3784 pAsm->D.dst.writez = 1;
3785 pAsm->D.dst.writew = 0;
3786 pAsm->S[0].src.rtype = srcType;
3787 pAsm->S[0].src.reg = srcReg;
3788 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3789 if( GL_FALSE == next_ins(pAsm) )
3790 {
3791 return GL_FALSE;
3792 }
3793
3794 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3795 {
3796 return GL_FALSE;
3797 }
3798
3799 if( GL_FALSE == assemble_src(pAsm, 0, 2) )
3800 {
3801 return GL_FALSE;
3802 }
3803
3804 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W);
3805
3806 swizzleagain_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X);
3807
3808 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
3809 pAsm->D.dst.opcode = SQ_OP3_INST_MUL_LIT;
3810 pAsm->D.dst.math = 1;
3811 pAsm->D.dst.op3 = 1;
3812 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3813 pAsm->D.dst.reg = tmp;
3814 pAsm->D.dst.writex = 1;
3815 pAsm->D.dst.writey = 0;
3816 pAsm->D.dst.writez = 0;
3817 pAsm->D.dst.writew = 0;
3818
3819 pAsm->S[0].src.rtype = srcType;
3820 pAsm->S[0].src.reg = srcReg;
3821 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3822
3823 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
3824 pAsm->S[1].src.reg = dstReg;
3825 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
3826 noneg_PVSSRC(&(pAsm->S[1].src));
3827 pAsm->S[1].src.swizzlex = SQ_SEL_Z;
3828 pAsm->S[1].src.swizzley = SQ_SEL_Z;
3829 pAsm->S[1].src.swizzlez = SQ_SEL_Z;
3830 pAsm->S[1].src.swizzlew = SQ_SEL_Z;
3831
3832 pAsm->S[2].src.rtype = srcType;
3833 pAsm->S[2].src.reg = srcReg;
3834 setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
3835
3836 if( GL_FALSE == next_ins(pAsm) )
3837 {
3838 return GL_FALSE;
3839 }
3840
3841 /* dst.z = exp(tmp.x) */
3842 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3843 pAsm->D.dst.math = 1;
3844 pAsm->D.dst.rtype = dstType;
3845 pAsm->D.dst.reg = dstReg;
3846 pAsm->D.dst.writex = 0;
3847 pAsm->D.dst.writey = 0;
3848 pAsm->D.dst.writez = 1;
3849 pAsm->D.dst.writew = 0;
3850
3851 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3852 pAsm->S[0].src.reg = tmp;
3853 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3854 noneg_PVSSRC(&(pAsm->S[0].src));
3855 pAsm->S[0].src.swizzlex = SQ_SEL_X;
3856 pAsm->S[0].src.swizzley = SQ_SEL_X;
3857 pAsm->S[0].src.swizzlez = SQ_SEL_X;
3858 pAsm->S[0].src.swizzlew = SQ_SEL_X;
3859
3860 if( GL_FALSE == next_ins(pAsm) )
3861 {
3862 return GL_FALSE;
3863 }
3864
3865 return GL_TRUE;
3866 }
3867
3868 GLboolean assemble_MAX(r700_AssemblerBase *pAsm)
3869 {
3870 if( GL_FALSE == checkop2(pAsm) )
3871 {
3872 return GL_FALSE;
3873 }
3874
3875 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
3876
3877 if( GL_FALSE == assemble_dst(pAsm) )
3878 {
3879 return GL_FALSE;
3880 }
3881
3882 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3883 {
3884 return GL_FALSE;
3885 }
3886
3887 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3888 {
3889 return GL_FALSE;
3890 }
3891
3892 if( GL_FALSE == next_ins(pAsm) )
3893 {
3894 return GL_FALSE;
3895 }
3896
3897 return GL_TRUE;
3898 }
3899
3900 GLboolean assemble_MIN(r700_AssemblerBase *pAsm)
3901 {
3902 if( GL_FALSE == checkop2(pAsm) )
3903 {
3904 return GL_FALSE;
3905 }
3906
3907 pAsm->D.dst.opcode = SQ_OP2_INST_MIN;
3908
3909 if( GL_FALSE == assemble_dst(pAsm) )
3910 {
3911 return GL_FALSE;
3912 }
3913
3914 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3915 {
3916 return GL_FALSE;
3917 }
3918
3919 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3920 {
3921 return GL_FALSE;
3922 }
3923
3924 if( GL_FALSE == next_ins(pAsm) )
3925 {
3926 return GL_FALSE;
3927 }
3928
3929 return GL_TRUE;
3930 }
3931
3932 GLboolean assemble_MOV(r700_AssemblerBase *pAsm)
3933 {
3934 checkop1(pAsm);
3935
3936 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3937
3938 if (GL_FALSE == assemble_dst(pAsm))
3939 {
3940 return GL_FALSE;
3941 }
3942
3943 if (GL_FALSE == assemble_src(pAsm, 0, -1))
3944 {
3945 return GL_FALSE;
3946 }
3947
3948 if ( GL_FALSE == next_ins(pAsm) )
3949 {
3950 return GL_FALSE;
3951 }
3952
3953 return GL_TRUE;
3954 }
3955
3956 GLboolean assemble_MUL(r700_AssemblerBase *pAsm)
3957 {
3958 if( GL_FALSE == checkop2(pAsm) )
3959 {
3960 return GL_FALSE;
3961 }
3962
3963 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
3964
3965 if( GL_FALSE == assemble_dst(pAsm) )
3966 {
3967 return GL_FALSE;
3968 }
3969
3970 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3971 {
3972 return GL_FALSE;
3973 }
3974
3975 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3976 {
3977 return GL_FALSE;
3978 }
3979
3980 if( GL_FALSE == next_ins(pAsm) )
3981 {
3982 return GL_FALSE;
3983 }
3984
3985 return GL_TRUE;
3986 }
3987
3988 GLboolean assemble_POW(r700_AssemblerBase *pAsm)
3989 {
3990 BITS tmp;
3991
3992 checkop1(pAsm);
3993
3994 tmp = gethelpr(pAsm);
3995
3996 // LG2 tmp.x, a.swizzle
3997 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;
3998 pAsm->D.dst.math = 1;
3999
4000 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4001 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4002 pAsm->D.dst.reg = tmp;
4003 nomask_PVSDST(&(pAsm->D.dst));
4004
4005 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4006 {
4007 return GL_FALSE;
4008 }
4009
4010 if( GL_FALSE == next_ins(pAsm) )
4011 {
4012 return GL_FALSE;
4013 }
4014
4015 // MUL tmp.x, tmp.x, b.swizzle
4016 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
4017
4018 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4019 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4020 pAsm->D.dst.reg = tmp;
4021 nomask_PVSDST(&(pAsm->D.dst));
4022
4023 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4024 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4025 pAsm->S[0].src.reg = tmp;
4026 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4027 noneg_PVSSRC(&(pAsm->S[0].src));
4028
4029 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4030 {
4031 return GL_FALSE;
4032 }
4033
4034 if( GL_FALSE == next_ins(pAsm) )
4035 {
4036 return GL_FALSE;
4037 }
4038
4039 // EX2 dst.mask, tmp.x
4040 // EX2 tmp.x, tmp.x
4041 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
4042 pAsm->D.dst.math = 1;
4043
4044 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4045 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4046 pAsm->D.dst.reg = tmp;
4047 nomask_PVSDST(&(pAsm->D.dst));
4048
4049 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4050 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4051 pAsm->S[0].src.reg = tmp;
4052 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4053 noneg_PVSSRC(&(pAsm->S[0].src));
4054
4055 if( GL_FALSE == next_ins(pAsm) )
4056 {
4057 return GL_FALSE;
4058 }
4059
4060 // Now replicate result to all necessary channels in destination
4061 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4062
4063 if( GL_FALSE == assemble_dst(pAsm) )
4064 {
4065 return GL_FALSE;
4066 }
4067
4068 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4069 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
4070 pAsm->S[0].src.reg = tmp;
4071
4072 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4073 noneg_PVSSRC(&(pAsm->S[0].src));
4074
4075 if( GL_FALSE == next_ins(pAsm) )
4076 {
4077 return GL_FALSE;
4078 }
4079
4080 return GL_TRUE;
4081 }
4082
4083 GLboolean assemble_RCP(r700_AssemblerBase *pAsm)
4084 {
4085 return assemble_math_function(pAsm, SQ_OP2_INST_RECIP_IEEE);
4086 }
4087
4088 GLboolean assemble_RSQ(r700_AssemblerBase *pAsm)
4089 {
4090 return assemble_math_function(pAsm, SQ_OP2_INST_RECIPSQRT_IEEE);
4091 }
4092
4093 GLboolean assemble_SCS(r700_AssemblerBase *pAsm)
4094 {
4095 BITS tmp;
4096
4097 checkop1(pAsm);
4098
4099 tmp = gethelpr(pAsm);
4100
4101 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
4102 pAsm->D.dst.op3 = 1;
4103
4104 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4105 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4106 pAsm->D.dst.reg = tmp;
4107
4108 assemble_src(pAsm, 0, -1);
4109
4110 pAsm->S[1].src.rtype = SRC_REC_LITERAL;
4111 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
4112
4113 pAsm->S[2].src.rtype = SRC_REC_LITERAL;
4114 setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
4115
4116 pAsm->D2.dst2.literal_slots = 1;
4117 pAsm->C[0].f = 1/(3.1415926535 * 2);
4118 pAsm->C[1].f = 0.5F;
4119
4120 if ( GL_FALSE == next_ins(pAsm) )
4121 {
4122 return GL_FALSE;
4123 }
4124
4125 pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
4126
4127 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4128 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4129 pAsm->D.dst.reg = tmp;
4130 pAsm->D.dst.writex = 1;
4131
4132 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4133 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4134 pAsm->S[0].src.reg = tmp;
4135 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4136
4137 if(( GL_FALSE == next_ins(pAsm) ))
4138 {
4139 return GL_FALSE;
4140 }
4141 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
4142 pAsm->D.dst.op3 = 1;
4143
4144 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4145 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4146 pAsm->D.dst.reg = tmp;
4147
4148 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4149 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4150 pAsm->S[0].src.reg = tmp;
4151 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4152
4153 pAsm->S[1].src.rtype = SRC_REC_LITERAL;
4154 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
4155
4156 pAsm->S[2].src.rtype = SRC_REC_LITERAL;
4157 setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
4158
4159 pAsm->D2.dst2.literal_slots = 1;
4160
4161 if(pAsm->bR6xx) {
4162 pAsm->C[0].f = 3.1415926535897f * 2.0f;
4163 pAsm->C[1].f = -3.1415926535897f;
4164 } else {
4165 pAsm->C[0].f = 1.0f;
4166 pAsm->C[1].f = -0.5f;
4167 }
4168
4169 if(( GL_FALSE == next_ins(pAsm) ))
4170 {
4171 return GL_FALSE;
4172 }
4173
4174 // COS dst.x, a.x
4175 pAsm->D.dst.opcode = SQ_OP2_INST_COS;
4176 pAsm->D.dst.math = 1;
4177
4178 assemble_dst(pAsm);
4179 /* mask y */
4180 pAsm->D.dst.writey = 0;
4181
4182 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4183 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4184 pAsm->S[0].src.reg = tmp;
4185 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4186 noneg_PVSSRC(&(pAsm->S[0].src));
4187
4188 if ( GL_FALSE == next_ins(pAsm) )
4189 {
4190 return GL_FALSE;
4191 }
4192
4193 // SIN dst.y, a.x
4194 pAsm->D.dst.opcode = SQ_OP2_INST_SIN;
4195 pAsm->D.dst.math = 1;
4196
4197 assemble_dst(pAsm);
4198 /* mask x */
4199 pAsm->D.dst.writex = 0;
4200
4201 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4202 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4203 pAsm->S[0].src.reg = tmp;
4204 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4205 noneg_PVSSRC(&(pAsm->S[0].src));
4206
4207 if( GL_FALSE == next_ins(pAsm) )
4208 {
4209 return GL_FALSE;
4210 }
4211
4212 return GL_TRUE;
4213 }
4214
4215 GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode)
4216 {
4217 if( GL_FALSE == checkop2(pAsm) )
4218 {
4219 return GL_FALSE;
4220 }
4221
4222 pAsm->D.dst.opcode = opcode;
4223 //pAsm->D.dst.math = 1;
4224
4225 if( GL_FALSE == assemble_dst(pAsm) )
4226 {
4227 return GL_FALSE;
4228 }
4229
4230 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4231 {
4232 return GL_FALSE;
4233 }
4234
4235 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4236 {
4237 return GL_FALSE;
4238 }
4239
4240 if( GL_FALSE == next_ins(pAsm) )
4241 {
4242 return GL_FALSE;
4243 }
4244
4245 return GL_TRUE;
4246 }
4247
4248 GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode)
4249 {
4250 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
4251
4252 pAsm->D.dst.opcode = opcode;
4253 pAsm->D.dst.math = 1;
4254 pAsm->D.dst.predicated = 1;
4255
4256 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4257 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4258 pAsm->D.dst.reg = pAsm->uHelpReg;
4259 pAsm->D.dst.writex = 1;
4260 pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
4261
4262 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4263 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4264 pAsm->S[0].src.reg = pAsm->last_cond_register + pAsm->starting_temp_register_number;
4265 pAsm->S[0].src.swizzlex = pILInst->DstReg.CondSwizzle & 0x7;
4266 noneg_PVSSRC(&(pAsm->S[0].src));
4267
4268 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4269 pAsm->S[1].src.reg = pAsm->uHelpReg;
4270 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4271 noneg_PVSSRC(&(pAsm->S[1].src));
4272 pAsm->S[1].src.swizzlex = SQ_SEL_0;
4273 pAsm->S[1].src.swizzley = SQ_SEL_0;
4274 pAsm->S[1].src.swizzlez = SQ_SEL_0;
4275 pAsm->S[1].src.swizzlew = SQ_SEL_0;
4276
4277 if( GL_FALSE == next_ins(pAsm) )
4278 {
4279 return GL_FALSE;
4280 }
4281
4282 return GL_TRUE;
4283 }
4284
4285 GLboolean assemble_SGE(r700_AssemblerBase *pAsm)
4286 {
4287 if( GL_FALSE == checkop2(pAsm) )
4288 {
4289 return GL_FALSE;
4290 }
4291
4292 pAsm->D.dst.opcode = SQ_OP2_INST_SETGE;
4293
4294 if( GL_FALSE == assemble_dst(pAsm) )
4295 {
4296 return GL_FALSE;
4297 }
4298
4299 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4300 {
4301 return GL_FALSE;
4302 }
4303
4304 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4305 {
4306 return GL_FALSE;
4307 }
4308
4309 if( GL_FALSE == next_ins(pAsm) )
4310 {
4311 return GL_FALSE;
4312 }
4313
4314 return GL_TRUE;
4315 }
4316
4317 GLboolean assemble_SLT(r700_AssemblerBase *pAsm)
4318 {
4319 if( GL_FALSE == checkop2(pAsm) )
4320 {
4321 return GL_FALSE;
4322 }
4323
4324 pAsm->D.dst.opcode = SQ_OP2_INST_SETGT;
4325
4326 if( GL_FALSE == assemble_dst(pAsm) )
4327 {
4328 return GL_FALSE;
4329 }
4330
4331 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
4332 {
4333 return GL_FALSE;
4334 }
4335
4336 if( GL_FALSE == assemble_src(pAsm, 1, 0) )
4337 {
4338 return GL_FALSE;
4339 }
4340
4341 if( GL_FALSE == next_ins(pAsm) )
4342 {
4343 return GL_FALSE;
4344 }
4345
4346 return GL_TRUE;
4347 }
4348
4349 GLboolean assemble_STP(r700_AssemblerBase *pAsm)
4350 {
4351 return GL_TRUE;
4352 }
4353
4354 GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
4355 {
4356 GLboolean src_const;
4357 GLboolean need_barrier = GL_FALSE;
4358
4359 checkop1(pAsm);
4360
4361 switch (pAsm->pILInst[pAsm->uiCurInst].SrcReg[0].File)
4362 {
4363 case PROGRAM_UNIFORM:
4364 case PROGRAM_CONSTANT:
4365 case PROGRAM_LOCAL_PARAM:
4366 case PROGRAM_ENV_PARAM:
4367 case PROGRAM_STATE_VAR:
4368 src_const = GL_TRUE;
4369 break;
4370 case PROGRAM_TEMPORARY:
4371 case PROGRAM_INPUT:
4372 default:
4373 src_const = GL_FALSE;
4374 break;
4375 }
4376
4377 if (GL_TRUE == src_const)
4378 {
4379 if ( GL_FALSE == mov_temp(pAsm, 0) )
4380 return GL_FALSE;
4381 need_barrier = GL_TRUE;
4382 }
4383
4384 if (pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP)
4385 {
4386 GLuint tmp = gethelpr(pAsm);
4387 pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
4388 pAsm->D.dst.math = 1;
4389 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4390 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4391 pAsm->D.dst.reg = tmp;
4392 pAsm->D.dst.writew = 1;
4393
4394 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4395 {
4396 return GL_FALSE;
4397 }
4398 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W);
4399 if( GL_FALSE == next_ins(pAsm) )
4400 {
4401 return GL_FALSE;
4402 }
4403
4404 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
4405 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4406 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4407 pAsm->D.dst.reg = tmp;
4408 pAsm->D.dst.writex = 1;
4409 pAsm->D.dst.writey = 1;
4410 pAsm->D.dst.writez = 1;
4411 pAsm->D.dst.writew = 0;
4412
4413 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4414 {
4415 return GL_FALSE;
4416 }
4417 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4418 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4419 pAsm->S[1].src.reg = tmp;
4420 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_W);
4421
4422 if( GL_FALSE == next_ins(pAsm) )
4423 {
4424 return GL_FALSE;
4425 }
4426
4427 pAsm->aArgSubst[1] = tmp;
4428 need_barrier = GL_TRUE;
4429 }
4430
4431 if (pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX )
4432 {
4433 GLuint tmp1 = gethelpr(pAsm);
4434 GLuint tmp2 = gethelpr(pAsm);
4435
4436 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
4437 pAsm->D.dst.opcode = SQ_OP2_INST_CUBE;
4438 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4439 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4440 pAsm->D.dst.reg = tmp1;
4441 nomask_PVSDST(&(pAsm->D.dst));
4442
4443 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4444 {
4445 return GL_FALSE;
4446 }
4447
4448 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
4449 {
4450 return GL_FALSE;
4451 }
4452
4453 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y);
4454 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_X, SQ_SEL_Z, SQ_SEL_Z);
4455
4456 if( GL_FALSE == next_ins(pAsm) )
4457 {
4458 return GL_FALSE;
4459 }
4460
4461 /* tmp1.z = RCP_e(|tmp1.z|) */
4462 pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
4463 pAsm->D.dst.math = 1;
4464 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4465 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4466 pAsm->D.dst.reg = tmp1;
4467 pAsm->D.dst.writez = 1;
4468
4469 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4470 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4471 pAsm->S[0].src.reg = tmp1;
4472 pAsm->S[0].src.swizzlex = SQ_SEL_Z;
4473 pAsm->S[0].src.abs = 1;
4474
4475 next_ins(pAsm);
4476
4477 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
4478 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
4479 * muladd has no writemask, have to use another temp
4480 */
4481 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
4482 pAsm->D.dst.op3 = 1;
4483 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4484 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4485 pAsm->D.dst.reg = tmp2;
4486
4487 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4488 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4489 pAsm->S[0].src.reg = tmp1;
4490 noswizzle_PVSSRC(&(pAsm->S[0].src));
4491 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4492 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4493 pAsm->S[1].src.reg = tmp1;
4494 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z);
4495 setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
4496 /* immediate c 1.5 */
4497 pAsm->D2.dst2.literal_slots = 1;
4498 pAsm->C[0].f = 1.5F;
4499 pAsm->S[2].src.rtype = SRC_REC_LITERAL;
4500 pAsm->S[2].src.reg = tmp1;
4501 setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X);
4502
4503 next_ins(pAsm);
4504
4505 /* tmp1.xy = temp2.xy */
4506 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4507 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4508 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4509 pAsm->D.dst.reg = tmp1;
4510 pAsm->D.dst.writex = 1;
4511 pAsm->D.dst.writey = 1;
4512 pAsm->D.dst.writez = 0;
4513 pAsm->D.dst.writew = 0;
4514
4515 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4516 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4517 pAsm->S[0].src.reg = tmp2;
4518 noswizzle_PVSSRC(&(pAsm->S[0].src));
4519
4520 next_ins(pAsm);
4521 pAsm->aArgSubst[1] = tmp1;
4522 need_barrier = GL_TRUE;
4523
4524 }
4525
4526 switch(pAsm->pILInst[pAsm->uiCurInst].Opcode)
4527 {
4528 case OPCODE_DDX:
4529 /* will these need WQM(1) on CF inst ? */
4530 pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_H;
4531 break;
4532 case OPCODE_DDY:
4533 pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_V;
4534 break;
4535 case OPCODE_TXB:
4536 pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_L;
4537 break;
4538 default:
4539 if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1)
4540 pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_C;
4541 else
4542 pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;
4543 }
4544
4545 pAsm->is_tex = GL_TRUE;
4546 if ( GL_TRUE == need_barrier )
4547
4548 pAsm->is_tex = GL_TRUE;
4549 if ( GL_TRUE == need_barrier )
4550 {
4551 pAsm->need_tex_barrier = GL_TRUE;
4552 }
4553 // Set src1 to tex unit id
4554 pAsm->S[1].src.reg = pAsm->SamplerUnits[pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit];
4555 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4556
4557 //No sw info from mesa compiler, so hard code here.
4558 pAsm->S[1].src.swizzlex = SQ_SEL_X;
4559 pAsm->S[1].src.swizzley = SQ_SEL_Y;
4560 pAsm->S[1].src.swizzlez = SQ_SEL_Z;
4561 pAsm->S[1].src.swizzlew = SQ_SEL_W;
4562
4563 if( GL_FALSE == tex_dst(pAsm) )
4564 {
4565 return GL_FALSE;
4566 }
4567
4568 if( GL_FALSE == tex_src(pAsm) )
4569 {
4570 return GL_FALSE;
4571 }
4572
4573 if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP)
4574 {
4575 /* hopefully did swizzles before */
4576 noswizzle_PVSSRC(&(pAsm->S[0].src));
4577 }
4578
4579 if(pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX)
4580 {
4581 /* SAMPLE dst, tmp.yxwy, CUBE */
4582 pAsm->S[0].src.swizzlex = SQ_SEL_Y;
4583 pAsm->S[0].src.swizzley = SQ_SEL_X;
4584 pAsm->S[0].src.swizzlez = SQ_SEL_W;
4585 pAsm->S[0].src.swizzlew = SQ_SEL_Y;
4586 }
4587
4588 if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1)
4589 {
4590 /* compare value goes to w chan ? */
4591 pAsm->S[0].src.swizzlew = SQ_SEL_Z;
4592 }
4593
4594 if ( GL_FALSE == next_ins(pAsm) )
4595 {
4596 return GL_FALSE;
4597 }
4598
4599 /* add ARB shadow ambient but clamp to 0..1 */
4600 if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1)
4601 {
4602 /* ADD_SAT dst, dst, ambient[texunit] */
4603 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
4604
4605 if( GL_FALSE == assemble_dst(pAsm) )
4606 {
4607 return GL_FALSE;
4608 }
4609 pAsm->D2.dst2.SaturateMode = 1;
4610
4611 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4612 pAsm->S[0].src.reg = pAsm->D.dst.reg;
4613 noswizzle_PVSSRC(&(pAsm->S[0].src));
4614 noneg_PVSSRC(&(pAsm->S[0].src));
4615
4616 pAsm->S[1].src.rtype = SRC_REG_CONSTANT;
4617 pAsm->S[1].src.reg = pAsm->shadow_regs[pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit];
4618 noswizzle_PVSSRC(&(pAsm->S[1].src));
4619 noneg_PVSSRC(&(pAsm->S[1].src));
4620
4621 if( GL_FALSE == next_ins(pAsm) )
4622 {
4623 return GL_FALSE;
4624 }
4625
4626 }
4627
4628 return GL_TRUE;
4629 }
4630
4631 GLboolean assemble_XPD(r700_AssemblerBase *pAsm)
4632 {
4633 BITS tmp1;
4634 BITS tmp2 = 0;
4635
4636 if( GL_FALSE == checkop2(pAsm) )
4637 {
4638 return GL_FALSE;
4639 }
4640
4641 tmp1 = gethelpr(pAsm);
4642
4643 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
4644
4645 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4646 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4647 pAsm->D.dst.reg = tmp1;
4648 nomask_PVSDST(&(pAsm->D.dst));
4649
4650 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4651 {
4652 return GL_FALSE;
4653 }
4654
4655 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4656 {
4657 return GL_FALSE;
4658 }
4659
4660 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
4661 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
4662
4663 if( GL_FALSE == next_ins(pAsm) )
4664 {
4665 return GL_FALSE;
4666 }
4667
4668 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
4669 pAsm->D.dst.op3 = 1;
4670
4671 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
4672 {
4673 tmp2 = gethelpr(pAsm);
4674
4675 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4676 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4677 pAsm->D.dst.reg = tmp2;
4678
4679 nomask_PVSDST(&(pAsm->D.dst));
4680 }
4681 else
4682 {
4683 if( GL_FALSE == assemble_dst(pAsm) )
4684 {
4685 return GL_FALSE;
4686 }
4687 }
4688
4689 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4690 {
4691 return GL_FALSE;
4692 }
4693
4694 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4695 {
4696 return GL_FALSE;
4697 }
4698
4699 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
4700 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
4701
4702 // result1 + (neg) result0
4703 setaddrmode_PVSSRC(&(pAsm->S[2].src),ADDR_ABSOLUTE);
4704 pAsm->S[2].src.rtype = SRC_REG_TEMPORARY;
4705 pAsm->S[2].src.reg = tmp1;
4706
4707 neg_PVSSRC(&(pAsm->S[2].src));
4708 noswizzle_PVSSRC(&(pAsm->S[2].src));
4709
4710 if( GL_FALSE == next_ins(pAsm) )
4711 {
4712 return GL_FALSE;
4713 }
4714
4715
4716 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
4717 {
4718 if( GL_FALSE == assemble_dst(pAsm) )
4719 {
4720 return GL_FALSE;
4721 }
4722
4723 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4724
4725 // Use tmp as source
4726 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4727 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4728 pAsm->S[0].src.reg = tmp2;
4729
4730 noneg_PVSSRC(&(pAsm->S[0].src));
4731 noswizzle_PVSSRC(&(pAsm->S[0].src));
4732
4733 if( GL_FALSE == next_ins(pAsm) )
4734 {
4735 return GL_FALSE;
4736 }
4737 }
4738
4739 return GL_TRUE;
4740 }
4741
4742 GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm)
4743 {
4744 return GL_TRUE;
4745 }
4746
4747 static inline void decreaseCurrent(r700_AssemblerBase *pAsm, GLuint uReason)
4748 {
4749 switch (uReason)
4750 {
4751 case FC_PUSH_VPM:
4752 pAsm->CALLSTACK[pAsm->CALLSP].current--;
4753 break;
4754 case FC_PUSH_WQM:
4755 pAsm->CALLSTACK[pAsm->CALLSP].current -= 4;
4756 break;
4757 case FC_LOOP:
4758 pAsm->CALLSTACK[pAsm->CALLSP].current -= 4;
4759 break;
4760 case FC_REP:
4761 /* TODO : for 16 vp asic, should -= 2; */
4762 pAsm->CALLSTACK[pAsm->CALLSP].current -= 1;
4763 break;
4764 };
4765 }
4766
4767 static inline void checkStackDepth(r700_AssemblerBase *pAsm, GLuint uReason, GLboolean bCheckMaxOnly)
4768 {
4769 if(GL_TRUE == bCheckMaxOnly)
4770 {
4771 switch (uReason)
4772 {
4773 case FC_PUSH_VPM:
4774 if((pAsm->CALLSTACK[pAsm->CALLSP].current + 1)
4775 > pAsm->CALLSTACK[pAsm->CALLSP].max)
4776 {
4777 pAsm->CALLSTACK[pAsm->CALLSP].max =
4778 pAsm->CALLSTACK[pAsm->CALLSP].current + 1;
4779 }
4780 break;
4781 case FC_PUSH_WQM:
4782 if((pAsm->CALLSTACK[pAsm->CALLSP].current + 4)
4783 > pAsm->CALLSTACK[pAsm->CALLSP].max)
4784 {
4785 pAsm->CALLSTACK[pAsm->CALLSP].max =
4786 pAsm->CALLSTACK[pAsm->CALLSP].current + 4;
4787 }
4788 break;
4789 }
4790 return;
4791 }
4792
4793 switch (uReason)
4794 {
4795 case FC_PUSH_VPM:
4796 pAsm->CALLSTACK[pAsm->CALLSP].current++;
4797 break;
4798 case FC_PUSH_WQM:
4799 pAsm->CALLSTACK[pAsm->CALLSP].current += 4;
4800 break;
4801 case FC_LOOP:
4802 pAsm->CALLSTACK[pAsm->CALLSP].current += 4;
4803 break;
4804 case FC_REP:
4805 /* TODO : for 16 vp asic, should += 2; */
4806 pAsm->CALLSTACK[pAsm->CALLSP].current += 1;
4807 break;
4808 };
4809
4810 if(pAsm->CALLSTACK[pAsm->CALLSP].current
4811 > pAsm->CALLSTACK[pAsm->CALLSP].max)
4812 {
4813 pAsm->CALLSTACK[pAsm->CALLSP].max =
4814 pAsm->CALLSTACK[pAsm->CALLSP].current;
4815 }
4816 }
4817
4818 GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset)
4819 {
4820 if(GL_FALSE == add_cf_instruction(pAsm) )
4821 {
4822 return GL_FALSE;
4823 }
4824
4825 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops;
4826 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
4827 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
4828
4829 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
4830 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
4831 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP;
4832 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
4833
4834 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
4835
4836 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + offset;
4837
4838 return GL_TRUE;
4839 }
4840
4841 GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops)
4842 {
4843 if(GL_FALSE == add_cf_instruction(pAsm) )
4844 {
4845 return GL_FALSE;
4846 }
4847
4848 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops;
4849 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
4850 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
4851
4852 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
4853 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
4854 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
4855
4856 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
4857
4858 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
4859 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
4860
4861 return GL_TRUE;
4862 }
4863
4864 GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse)
4865 {
4866 pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
4867
4868 assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE);
4869
4870
4871 if(GL_FALSE == add_cf_instruction(pAsm) )
4872 {
4873 return GL_FALSE;
4874 }
4875
4876 if(GL_TRUE != bHasElse)
4877 {
4878 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
4879 }
4880 else
4881 {
4882 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
4883 }
4884 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
4885 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
4886
4887 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
4888 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
4889 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP;
4890 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
4891
4892 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
4893
4894 pAsm->FCSP++;
4895 pAsm->fc_stack[pAsm->FCSP].type = FC_IF;
4896 pAsm->fc_stack[pAsm->FCSP].mid = NULL;
4897 pAsm->fc_stack[pAsm->FCSP].midLen= 0;
4898 pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr;
4899
4900 #ifndef USE_CF_FOR_POP_AFTER
4901 if(GL_TRUE != bHasElse)
4902 {
4903 pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER;
4904 }
4905 #endif /* USE_CF_FOR_POP_AFTER */
4906
4907 checkStackDepth(pAsm, FC_PUSH_VPM, GL_FALSE);
4908
4909 return GL_TRUE;
4910 }
4911
4912 GLboolean assemble_ELSE(r700_AssemblerBase *pAsm)
4913 {
4914 if(GL_FALSE == add_cf_instruction(pAsm) )
4915 {
4916 return GL_FALSE;
4917 }
4918
4919 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; ///
4920 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
4921 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
4922
4923 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
4924 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
4925 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ELSE;
4926 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
4927
4928 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
4929
4930 pAsm->fc_stack[pAsm->FCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( (void *)pAsm->fc_stack[pAsm->FCSP].mid,
4931 0,
4932 sizeof(R700ControlFlowGenericClause *) );
4933 pAsm->fc_stack[pAsm->FCSP].mid[0] = pAsm->cf_current_cf_clause_ptr;
4934 //pAsm->fc_stack[pAsm->FCSP].unNumMid = 1;
4935
4936 #ifndef USE_CF_FOR_POP_AFTER
4937 pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER;
4938 #endif /* USE_CF_FOR_POP_AFTER */
4939
4940 pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode - 1;
4941
4942 return GL_TRUE;
4943 }
4944
4945 GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm)
4946 {
4947 #ifdef USE_CF_FOR_POP_AFTER
4948 pops(pAsm, 1);
4949 #endif /* USE_CF_FOR_POP_AFTER */
4950
4951 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
4952
4953 if(NULL == pAsm->fc_stack[pAsm->FCSP].mid)
4954 {
4955 /* no else in between */
4956 pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode;
4957 }
4958 else
4959 {
4960 pAsm->fc_stack[pAsm->FCSP].mid[0]->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode;
4961 }
4962
4963 if(NULL != pAsm->fc_stack[pAsm->FCSP].mid)
4964 {
4965 FREE(pAsm->fc_stack[pAsm->FCSP].mid);
4966 }
4967
4968 if(pAsm->fc_stack[pAsm->FCSP].type != FC_IF)
4969 {
4970 radeon_error("if/endif in shader code are not paired. \n");
4971 return GL_FALSE;
4972 }
4973
4974 pAsm->FCSP--;
4975
4976 decreaseCurrent(pAsm, FC_PUSH_VPM);
4977
4978 return GL_TRUE;
4979 }
4980
4981 GLboolean assemble_BGNLOOP(r700_AssemblerBase *pAsm)
4982 {
4983 if(GL_FALSE == add_cf_instruction(pAsm) )
4984 {
4985 return GL_FALSE;
4986 }
4987
4988
4989 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
4990 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
4991 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
4992
4993 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
4994 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
4995 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_START_NO_AL;
4996 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
4997
4998 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
4999
5000 pAsm->FCSP++;
5001 pAsm->fc_stack[pAsm->FCSP].type = FC_LOOP;
5002 pAsm->fc_stack[pAsm->FCSP].mid = NULL;
5003 pAsm->fc_stack[pAsm->FCSP].unNumMid = 0;
5004 pAsm->fc_stack[pAsm->FCSP].midLen = 0;
5005 pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr;
5006
5007 checkStackDepth(pAsm, FC_LOOP, GL_FALSE);
5008
5009 return GL_TRUE;
5010 }
5011
5012 GLboolean assemble_BRK(r700_AssemblerBase *pAsm)
5013 {
5014 #ifdef USE_CF_FOR_CONTINUE_BREAK
5015
5016 pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
5017
5018 assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE);
5019
5020 unsigned int unFCSP;
5021 for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--)
5022 {
5023 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
5024 {
5025 break;
5026 }
5027 }
5028 if(0 == FC_LOOP)
5029 {
5030 radeon_error("Break is not inside loop/endloop pair.\n");
5031 return GL_FALSE;
5032 }
5033
5034 if(GL_FALSE == add_cf_instruction(pAsm) )
5035 {
5036 return GL_FALSE;
5037 }
5038
5039
5040 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5041 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5042 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5043
5044 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5045 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5046 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK;
5047
5048 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5049
5050 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5051
5052 pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
5053 (void *)pAsm->fc_stack[unFCSP].mid,
5054 sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
5055 sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
5056 pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
5057 pAsm->fc_stack[unFCSP].unNumMid++;
5058
5059 if(GL_FALSE == add_cf_instruction(pAsm) )
5060 {
5061 return GL_FALSE;
5062 }
5063
5064 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5065 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5066 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5067
5068 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5069 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5070 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
5071
5072 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5073
5074 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5075 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
5076
5077 checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
5078
5079 #endif //USE_CF_FOR_CONTINUE_BREAK
5080 return GL_TRUE;
5081 }
5082
5083 GLboolean assemble_CONT(r700_AssemblerBase *pAsm)
5084 {
5085 #ifdef USE_CF_FOR_CONTINUE_BREAK
5086 pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
5087
5088 assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE);
5089
5090 unsigned int unFCSP;
5091 for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--)
5092 {
5093 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
5094 {
5095 break;
5096 }
5097 }
5098 if(0 == FC_LOOP)
5099 {
5100 radeon_error("Continue is not inside loop/endloop pair.\n");
5101 return GL_FALSE;
5102 }
5103
5104 if(GL_FALSE == add_cf_instruction(pAsm) )
5105 {
5106 return GL_FALSE;
5107 }
5108
5109
5110 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5111 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5112 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5113
5114 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5115 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5116 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_CONTINUE;
5117
5118 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5119
5120 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5121
5122 pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
5123 (void *)pAsm->fc_stack[unFCSP].mid,
5124 sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
5125 sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
5126 pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
5127 pAsm->fc_stack[unFCSP].unNumMid++;
5128
5129 if(GL_FALSE == add_cf_instruction(pAsm) )
5130 {
5131 return GL_FALSE;
5132 }
5133
5134 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5135 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5136 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5137
5138 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5139 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5140 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
5141
5142 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5143
5144 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5145 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
5146
5147 checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
5148
5149 #endif /* USE_CF_FOR_CONTINUE_BREAK */
5150
5151 return GL_TRUE;
5152 }
5153
5154 GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm)
5155 {
5156 GLuint i;
5157
5158 if(GL_FALSE == add_cf_instruction(pAsm) )
5159 {
5160 return GL_FALSE;
5161 }
5162
5163
5164 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
5165 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5166 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5167
5168 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5169 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5170 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_END;
5171 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5172
5173 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5174
5175 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->fc_stack[pAsm->FCSP].first->m_uIndex + 1;
5176 pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
5177
5178 #ifdef USE_CF_FOR_CONTINUE_BREAK
5179 for(i=0; i<pAsm->fc_stack[pAsm->FCSP].unNumMid; i++)
5180 {
5181 pAsm->fc_stack[pAsm->FCSP].mid[i]->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex;
5182 }
5183 if(NULL != pAsm->fc_stack[pAsm->FCSP].mid)
5184 {
5185 FREE(pAsm->fc_stack[pAsm->FCSP].mid);
5186 }
5187 #endif
5188
5189 if(pAsm->fc_stack[pAsm->FCSP].type != FC_LOOP)
5190 {
5191 radeon_error("loop/endloop in shader code are not paired. \n");
5192 return GL_FALSE;
5193 }
5194
5195 GLuint unFCSP;
5196 GLuint unIF = 0;
5197 if((pAsm->unCFflags & HAS_CURRENT_LOOPRET) > 0)
5198 {
5199 for(unFCSP=(pAsm->FCSP-1); unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--)
5200 {
5201 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
5202 {
5203 breakLoopOnFlag(pAsm, unFCSP);
5204 break;
5205 }
5206 else if(FC_IF == pAsm->fc_stack[unFCSP].type)
5207 {
5208 unIF++;
5209 }
5210 }
5211 if(unFCSP <= pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry)
5212 {
5213 #ifdef USE_CF_FOR_POP_AFTER
5214 returnOnFlag(pAsm, unIF);
5215 #else
5216 returnOnFlag(pAsm, 0);
5217 #endif /* USE_CF_FOR_POP_AFTER */
5218 pAsm->unCFflags &= ~HAS_CURRENT_LOOPRET;
5219 }
5220 }
5221
5222 pAsm->FCSP--;
5223
5224 decreaseCurrent(pAsm, FC_LOOP);
5225
5226 return GL_TRUE;
5227 }
5228
5229 void add_return_inst(r700_AssemblerBase *pAsm)
5230 {
5231 if(GL_FALSE == add_cf_instruction(pAsm) )
5232 {
5233 return;
5234 }
5235 //pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5236 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
5237 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5238 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5239
5240 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5241 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5242 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_RETURN;
5243 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5244
5245 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5246 }
5247
5248 GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex, GLuint uiIL_Shift)
5249 {
5250 /* Put in sub */
5251 if( (pAsm->unSubArrayPointer + 1) > pAsm->unSubArraySize )
5252 {
5253 pAsm->subs = (SUB_OFFSET*)_mesa_realloc( (void *)pAsm->subs,
5254 sizeof(SUB_OFFSET) * pAsm->unSubArraySize,
5255 sizeof(SUB_OFFSET) * (pAsm->unSubArraySize + 10) );
5256 if(NULL == pAsm->subs)
5257 {
5258 return GL_FALSE;
5259 }
5260 pAsm->unSubArraySize += 10;
5261 }
5262
5263 pAsm->subs[pAsm->unSubArrayPointer].subIL_Offset = nILindex + uiIL_Shift;
5264 pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pHead=NULL;
5265 pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pTail=NULL;
5266 pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.uNumOfNode=0;
5267
5268 pAsm->CALLSP++;
5269 pAsm->CALLSTACK[pAsm->CALLSP].subDescIndex = pAsm->unSubArrayPointer;
5270 pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry = pAsm->FCSP;
5271 pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local
5272 = &(pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local);
5273 pAsm->CALLSTACK[pAsm->CALLSP].max = 0;
5274 pAsm->CALLSTACK[pAsm->CALLSP].current = 0;
5275 SetActiveCFlist(pAsm->pR700Shader,
5276 pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local);
5277
5278 pAsm->unSubArrayPointer++;
5279
5280 /* start sub */
5281 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
5282
5283 pAsm->FCSP++;
5284 pAsm->fc_stack[pAsm->FCSP].type = FC_REP;
5285
5286 checkStackDepth(pAsm, FC_REP, GL_FALSE);
5287
5288 return GL_TRUE;
5289 }
5290
5291 GLboolean assemble_ENDSUB(r700_AssemblerBase *pAsm)
5292 {
5293 if(pAsm->fc_stack[pAsm->FCSP].type != FC_REP)
5294 {
5295 radeon_error("BGNSUB/ENDSUB in shader code are not paired. \n");
5296 return GL_FALSE;
5297 }
5298
5299 /* copy max to sub structure */
5300 pAsm->subs[pAsm->CALLSTACK[pAsm->CALLSP].subDescIndex].unStackDepthMax
5301 = pAsm->CALLSTACK[pAsm->CALLSP].max;
5302
5303 decreaseCurrent(pAsm, FC_REP);
5304
5305 pAsm->CALLSP--;
5306 SetActiveCFlist(pAsm->pR700Shader,
5307 pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local);
5308
5309 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
5310
5311 pAsm->FCSP--;
5312
5313 return GL_TRUE;
5314 }
5315
5316 GLboolean assemble_RET(r700_AssemblerBase *pAsm)
5317 {
5318 GLuint unIF = 0;
5319
5320 if(pAsm->CALLSP > 0)
5321 { /* in sub */
5322 GLuint unFCSP;
5323 for(unFCSP=pAsm->FCSP; unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--)
5324 {
5325 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
5326 {
5327 setRetInLoopFlag(pAsm, SQ_SEL_1);
5328 breakLoopOnFlag(pAsm, unFCSP);
5329 pAsm->unCFflags |= LOOPRET_FLAGS;
5330
5331 return GL_TRUE;
5332 }
5333 else if(FC_IF == pAsm->fc_stack[unFCSP].type)
5334 {
5335 unIF++;
5336 }
5337 }
5338 }
5339
5340 #ifdef USE_CF_FOR_POP_AFTER
5341 if(unIF > 0)
5342 {
5343 pops(pAsm, unIF);
5344 }
5345 #endif /* USE_CF_FOR_POP_AFTER */
5346
5347 add_return_inst(pAsm);
5348
5349 return GL_TRUE;
5350 }
5351
5352 GLboolean assemble_CAL(r700_AssemblerBase *pAsm,
5353 GLint nILindex,
5354 GLuint uiIL_Shift,
5355 GLuint uiNumberInsts,
5356 struct prog_instruction *pILInst,
5357 PRESUB_DESC * pPresubDesc)
5358 {
5359 GLint uiIL_Offset;
5360
5361 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
5362
5363 if(GL_FALSE == add_cf_instruction(pAsm) )
5364 {
5365 return GL_FALSE;
5366 }
5367
5368 pAsm->cf_current_cf_clause_ptr->m_Word1.f.call_count = 1;
5369 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
5370 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5371 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5372
5373 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5374 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5375 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_CALL;
5376 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5377
5378 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5379
5380 /* Put in caller */
5381 if( (pAsm->unCallerArrayPointer + 1) > pAsm->unCallerArraySize )
5382 {
5383 pAsm->callers = (CALLER_POINTER*)_mesa_realloc( (void *)pAsm->callers,
5384 sizeof(CALLER_POINTER) * pAsm->unCallerArraySize,
5385 sizeof(CALLER_POINTER) * (pAsm->unCallerArraySize + 10) );
5386 if(NULL == pAsm->callers)
5387 {
5388 return GL_FALSE;
5389 }
5390 pAsm->unCallerArraySize += 10;
5391 }
5392
5393 uiIL_Offset = nILindex + uiIL_Shift;
5394 pAsm->callers[pAsm->unCallerArrayPointer].subIL_Offset = uiIL_Offset;
5395 pAsm->callers[pAsm->unCallerArrayPointer].cf_ptr = pAsm->cf_current_cf_clause_ptr;
5396
5397 pAsm->callers[pAsm->unCallerArrayPointer].finale_cf_ptr = NULL;
5398 pAsm->callers[pAsm->unCallerArrayPointer].prelude_cf_ptr = NULL;
5399
5400 pAsm->unCallerArrayPointer++;
5401
5402 int j;
5403 GLuint max;
5404 GLuint unSubID;
5405 GLboolean bRet;
5406 for(j=0; j<pAsm->unSubArrayPointer; j++)
5407 {
5408 if(uiIL_Offset == pAsm->subs[j].subIL_Offset)
5409 { /* compiled before */
5410
5411 max = pAsm->subs[j].unStackDepthMax
5412 + pAsm->CALLSTACK[pAsm->CALLSP].current;
5413 if(max > pAsm->CALLSTACK[pAsm->CALLSP].max)
5414 {
5415 pAsm->CALLSTACK[pAsm->CALLSP].max = max;
5416 }
5417
5418 pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = j;
5419 return GL_TRUE;
5420 }
5421 }
5422
5423 pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = pAsm->unSubArrayPointer;
5424 unSubID = pAsm->unSubArrayPointer;
5425
5426 bRet = AssembleInstr(nILindex, uiIL_Shift, uiNumberInsts, pILInst, pAsm);
5427
5428 if(GL_TRUE == bRet)
5429 {
5430 max = pAsm->subs[unSubID].unStackDepthMax
5431 + pAsm->CALLSTACK[pAsm->CALLSP].current;
5432 if(max > pAsm->CALLSTACK[pAsm->CALLSP].max)
5433 {
5434 pAsm->CALLSTACK[pAsm->CALLSP].max = max;
5435 }
5436
5437 pAsm->subs[unSubID].pPresubDesc = pPresubDesc;
5438 }
5439
5440 return bRet;
5441 }
5442
5443 GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue)
5444 {
5445 /*GLfloat fLiteral[2] = {0.1, 0.0};*/
5446
5447 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
5448 pAsm->D.dst.op3 = 0;
5449 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5450 pAsm->D.dst.reg = pAsm->flag_reg_index;
5451 pAsm->D.dst.writex = 1;
5452 pAsm->D.dst.writey = 0;
5453 pAsm->D.dst.writez = 0;
5454 pAsm->D.dst.writew = 0;
5455 pAsm->D2.dst2.literal_slots = 1;
5456 pAsm->D2.dst2.SaturateMode = SATURATE_OFF;
5457 pAsm->D.dst.predicated = 0;
5458 /* in reloc where dislink flag init inst, only one slot alu inst is handled. */
5459 pAsm->D.dst.math = 1; /* TODO : not math really, but one channel op, more generic alu assembler needed */
5460 pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */
5461 #if 0
5462 pAsm->S[0].src.rtype = SRC_REC_LITERAL;
5463 //pAsm->S[0].src.reg = 0;
5464 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5465 noneg_PVSSRC(&(pAsm->S[0].src));
5466 pAsm->S[0].src.swizzlex = SQ_SEL_X;
5467 pAsm->S[0].src.swizzley = SQ_SEL_Y;
5468 pAsm->S[0].src.swizzlez = SQ_SEL_Z;
5469 pAsm->S[0].src.swizzlew = SQ_SEL_W;
5470
5471 if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) )
5472 {
5473 return GL_FALSE;
5474 }
5475 #else
5476 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
5477 pAsm->S[0].src.reg = 0;
5478 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5479 noneg_PVSSRC(&(pAsm->S[0].src));
5480 pAsm->S[0].src.swizzlex = flagValue;
5481 pAsm->S[0].src.swizzley = flagValue;
5482 pAsm->S[0].src.swizzlez = flagValue;
5483 pAsm->S[0].src.swizzlew = flagValue;
5484
5485 if( GL_FALSE == next_ins(pAsm) )
5486 {
5487 return GL_FALSE;
5488 }
5489 #endif
5490
5491 return GL_TRUE;
5492 }
5493
5494 GLboolean testFlag(r700_AssemblerBase *pAsm)
5495 {
5496 /*GLfloat fLiteral[2] = {0.1, 0.0};*/
5497
5498 //Test flag
5499 GLuint tmp = gethelpr(pAsm);
5500 pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
5501
5502 pAsm->D.dst.opcode = SQ_OP2_INST_PRED_SETE;
5503 pAsm->D.dst.math = 1;
5504 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5505 pAsm->D.dst.reg = tmp;
5506 pAsm->D.dst.writex = 1;
5507 pAsm->D.dst.writey = 0;
5508 pAsm->D.dst.writez = 0;
5509 pAsm->D.dst.writew = 0;
5510 pAsm->D2.dst2.literal_slots = 1;
5511 pAsm->D2.dst2.SaturateMode = SATURATE_OFF;
5512 pAsm->D.dst.predicated = 1;
5513 pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */
5514
5515 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
5516 pAsm->S[0].src.reg = pAsm->flag_reg_index;
5517 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5518 noneg_PVSSRC(&(pAsm->S[0].src));
5519 pAsm->S[0].src.swizzlex = SQ_SEL_X;
5520 pAsm->S[0].src.swizzley = SQ_SEL_Y;
5521 pAsm->S[0].src.swizzlez = SQ_SEL_Z;
5522 pAsm->S[0].src.swizzlew = SQ_SEL_W;
5523 #if 0
5524 pAsm->S[1].src.rtype = SRC_REC_LITERAL;
5525 //pAsm->S[1].src.reg = 0;
5526 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
5527 noneg_PVSSRC(&(pAsm->S[1].src));
5528 pAsm->S[1].src.swizzlex = SQ_SEL_X;
5529 pAsm->S[1].src.swizzley = SQ_SEL_Y;
5530 pAsm->S[1].src.swizzlez = SQ_SEL_Z;
5531 pAsm->S[1].src.swizzlew = SQ_SEL_W;
5532
5533 if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) )
5534 {
5535 return GL_FALSE;
5536 }
5537 #else
5538 pAsm->S[1].src.rtype = DST_REG_TEMPORARY;
5539 pAsm->S[1].src.reg = 0;
5540 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
5541 noneg_PVSSRC(&(pAsm->S[1].src));
5542 pAsm->S[1].src.swizzlex = SQ_SEL_1;
5543 pAsm->S[1].src.swizzley = SQ_SEL_1;
5544 pAsm->S[1].src.swizzlez = SQ_SEL_1;
5545 pAsm->S[1].src.swizzlew = SQ_SEL_1;
5546
5547 if( GL_FALSE == next_ins(pAsm) )
5548 {
5549 return GL_FALSE;
5550 }
5551 #endif
5552
5553 checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
5554
5555 return GL_TRUE;
5556 }
5557
5558 GLboolean returnOnFlag(r700_AssemblerBase *pAsm, GLuint unIF)
5559 {
5560 testFlag(pAsm);
5561 jumpToOffest(pAsm, 1, 4);
5562 setRetInLoopFlag(pAsm, SQ_SEL_0);
5563 pops(pAsm, unIF + 1);
5564 add_return_inst(pAsm);
5565
5566 return GL_TRUE;
5567 }
5568
5569 GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP)
5570 {
5571 testFlag(pAsm);
5572
5573 //break
5574 if(GL_FALSE == add_cf_instruction(pAsm) )
5575 {
5576 return GL_FALSE;
5577 }
5578
5579 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5580 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5581 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5582
5583 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5584 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5585 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK;
5586 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5587
5588 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5589
5590 pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
5591 (void *)pAsm->fc_stack[unFCSP].mid,
5592 sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
5593 sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
5594 pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
5595 pAsm->fc_stack[unFCSP].unNumMid++;
5596
5597 pops(pAsm, 1);
5598
5599 return GL_TRUE;
5600 }
5601
5602 GLboolean AssembleInstr(GLuint uiFirstInst,
5603 GLuint uiIL_Shift,
5604 GLuint uiNumberInsts,
5605 struct prog_instruction *pILInst,
5606 r700_AssemblerBase *pR700AsmCode)
5607 {
5608 GLuint i;
5609
5610 pR700AsmCode->pILInst = pILInst;
5611 for(i=uiFirstInst; i<uiNumberInsts; i++)
5612 {
5613 pR700AsmCode->uiCurInst = i;
5614
5615 #ifndef USE_CF_FOR_CONTINUE_BREAK
5616 if(OPCODE_BRK == pILInst[i+1].Opcode)
5617 {
5618 switch(pILInst[i].Opcode)
5619 {
5620 case OPCODE_SLE:
5621 pILInst[i].Opcode = OPCODE_SGT;
5622 break;
5623 case OPCODE_SLT:
5624 pILInst[i].Opcode = OPCODE_SGE;
5625 break;
5626 case OPCODE_SGE:
5627 pILInst[i].Opcode = OPCODE_SLT;
5628 break;
5629 case OPCODE_SGT:
5630 pILInst[i].Opcode = OPCODE_SLE;
5631 break;
5632 case OPCODE_SEQ:
5633 pILInst[i].Opcode = OPCODE_SNE;
5634 break;
5635 case OPCODE_SNE:
5636 pILInst[i].Opcode = OPCODE_SEQ;
5637 break;
5638 default:
5639 break;
5640 }
5641 }
5642 #endif
5643 if(pILInst[i].CondUpdate == 1)
5644 {
5645 /* remember dest register used for cond evaluation */
5646 /* XXX also handle PROGRAM_OUTPUT registers here? */
5647 pR700AsmCode->last_cond_register = pILInst[i].DstReg.Index;
5648 }
5649
5650 switch (pILInst[i].Opcode)
5651 {
5652 case OPCODE_ABS:
5653 if ( GL_FALSE == assemble_ABS(pR700AsmCode) )
5654 return GL_FALSE;
5655 break;
5656 case OPCODE_ADD:
5657 case OPCODE_SUB:
5658 if ( GL_FALSE == assemble_ADD(pR700AsmCode) )
5659 return GL_FALSE;
5660 break;
5661
5662 case OPCODE_ARL:
5663 if ( GL_FALSE == assemble_ARL(pR700AsmCode) )
5664 return GL_FALSE;
5665 break;
5666 case OPCODE_ARR:
5667 radeon_error("Not yet implemented instruction OPCODE_ARR \n");
5668 //if ( GL_FALSE == assemble_BAD("ARR") )
5669 return GL_FALSE;
5670 break;
5671
5672 case OPCODE_CMP:
5673 if ( GL_FALSE == assemble_CMP(pR700AsmCode) )
5674 return GL_FALSE;
5675 break;
5676 case OPCODE_COS:
5677 if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_COS) )
5678 return GL_FALSE;
5679 break;
5680
5681 case OPCODE_DP3:
5682 case OPCODE_DP4:
5683 case OPCODE_DPH:
5684 if ( GL_FALSE == assemble_DOT(pR700AsmCode) )
5685 return GL_FALSE;
5686 break;
5687
5688 case OPCODE_DST:
5689 if ( GL_FALSE == assemble_DST(pR700AsmCode) )
5690 return GL_FALSE;
5691 break;
5692
5693 case OPCODE_EX2:
5694 if ( GL_FALSE == assemble_EX2(pR700AsmCode) )
5695 return GL_FALSE;
5696 break;
5697 case OPCODE_EXP:
5698 if ( GL_FALSE == assemble_EXP(pR700AsmCode) )
5699 return GL_FALSE;
5700 break;
5701
5702 case OPCODE_FLR:
5703 if ( GL_FALSE == assemble_FLR(pR700AsmCode) )
5704 return GL_FALSE;
5705 break;
5706 //case OP_FLR_INT: ;
5707
5708 // if ( GL_FALSE == assemble_FLR_INT() )
5709 // return GL_FALSE;
5710 // break;
5711
5712 case OPCODE_FRC:
5713 if ( GL_FALSE == assemble_FRC(pR700AsmCode) )
5714 return GL_FALSE;
5715 break;
5716
5717 case OPCODE_KIL:
5718 case OPCODE_KIL_NV:
5719 if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGT) )
5720 return GL_FALSE;
5721 break;
5722 case OPCODE_LG2:
5723 if ( GL_FALSE == assemble_LG2(pR700AsmCode) )
5724 return GL_FALSE;
5725 break;
5726 case OPCODE_LIT:
5727 if ( GL_FALSE == assemble_LIT(pR700AsmCode) )
5728 return GL_FALSE;
5729 break;
5730 case OPCODE_LRP:
5731 if ( GL_FALSE == assemble_LRP(pR700AsmCode) )
5732 return GL_FALSE;
5733 break;
5734 case OPCODE_LOG:
5735 if ( GL_FALSE == assemble_LOG(pR700AsmCode) )
5736 return GL_FALSE;
5737 break;
5738
5739 case OPCODE_MAD:
5740 if ( GL_FALSE == assemble_MAD(pR700AsmCode) )
5741 return GL_FALSE;
5742 break;
5743 case OPCODE_MAX:
5744 if ( GL_FALSE == assemble_MAX(pR700AsmCode) )
5745 return GL_FALSE;
5746 break;
5747 case OPCODE_MIN:
5748 if ( GL_FALSE == assemble_MIN(pR700AsmCode) )
5749 return GL_FALSE;
5750 break;
5751
5752 case OPCODE_MOV:
5753 if ( GL_FALSE == assemble_MOV(pR700AsmCode) )
5754 return GL_FALSE;
5755 break;
5756 case OPCODE_MUL:
5757 if ( GL_FALSE == assemble_MUL(pR700AsmCode) )
5758 return GL_FALSE;
5759 break;
5760
5761 case OPCODE_NOISE1:
5762 {
5763 callPreSub(pR700AsmCode,
5764 GLSL_NOISE1,
5765 &noise1_presub,
5766 pILInst->DstReg.Index + pR700AsmCode->starting_temp_register_number,
5767 1);
5768 radeon_error("noise1: not yet supported shader instruction\n");
5769 };
5770 break;
5771 case OPCODE_NOISE2:
5772 radeon_error("noise2: not yet supported shader instruction\n");
5773 break;
5774 case OPCODE_NOISE3:
5775 radeon_error("noise3: not yet supported shader instruction\n");
5776 break;
5777 case OPCODE_NOISE4:
5778 radeon_error("noise4: not yet supported shader instruction\n");
5779 break;
5780
5781 case OPCODE_POW:
5782 if ( GL_FALSE == assemble_POW(pR700AsmCode) )
5783 return GL_FALSE;
5784 break;
5785 case OPCODE_RCP:
5786 if ( GL_FALSE == assemble_RCP(pR700AsmCode) )
5787 return GL_FALSE;
5788 break;
5789 case OPCODE_RSQ:
5790 if ( GL_FALSE == assemble_RSQ(pR700AsmCode) )
5791 return GL_FALSE;
5792 break;
5793 case OPCODE_SIN:
5794 if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_SIN) )
5795 return GL_FALSE;
5796 break;
5797 case OPCODE_SCS:
5798 if ( GL_FALSE == assemble_SCS(pR700AsmCode) )
5799 return GL_FALSE;
5800 break;
5801
5802 case OPCODE_SEQ:
5803 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETE) )
5804 {
5805 return GL_FALSE;
5806 }
5807 break;
5808
5809 case OPCODE_SGT:
5810 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) )
5811 {
5812 return GL_FALSE;
5813 }
5814 break;
5815
5816 case OPCODE_SGE:
5817 if ( GL_FALSE == assemble_SGE(pR700AsmCode) )
5818 {
5819 return GL_FALSE;
5820 }
5821 break;
5822
5823 /* NO LT, LE, TODO : use GE => LE, GT => LT : reverse 2 src order would be simpliest. Or use SQ_CF_COND_FALSE for SQ_CF_COND_ACTIVE.*/
5824 case OPCODE_SLT:
5825 {
5826 struct prog_src_register SrcRegSave[2];
5827 SrcRegSave[0] = pILInst[i].SrcReg[0];
5828 SrcRegSave[1] = pILInst[i].SrcReg[1];
5829 pILInst[i].SrcReg[0] = SrcRegSave[1];
5830 pILInst[i].SrcReg[1] = SrcRegSave[0];
5831 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) )
5832 {
5833 pILInst[i].SrcReg[0] = SrcRegSave[0];
5834 pILInst[i].SrcReg[1] = SrcRegSave[1];
5835 return GL_FALSE;
5836 }
5837 pILInst[i].SrcReg[0] = SrcRegSave[0];
5838 pILInst[i].SrcReg[1] = SrcRegSave[1];
5839 }
5840 break;
5841
5842 case OPCODE_SLE:
5843 {
5844 struct prog_src_register SrcRegSave[2];
5845 SrcRegSave[0] = pILInst[i].SrcReg[0];
5846 SrcRegSave[1] = pILInst[i].SrcReg[1];
5847 pILInst[i].SrcReg[0] = SrcRegSave[1];
5848 pILInst[i].SrcReg[1] = SrcRegSave[0];
5849 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGE) )
5850 {
5851 pILInst[i].SrcReg[0] = SrcRegSave[0];
5852 pILInst[i].SrcReg[1] = SrcRegSave[1];
5853 return GL_FALSE;
5854 }
5855 pILInst[i].SrcReg[0] = SrcRegSave[0];
5856 pILInst[i].SrcReg[1] = SrcRegSave[1];
5857 }
5858 break;
5859
5860 case OPCODE_SNE:
5861 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETNE) )
5862 {
5863 return GL_FALSE;
5864 }
5865 break;
5866
5867 //case OP_STP:
5868 // if ( GL_FALSE == assemble_STP(pR700AsmCode) )
5869 // return GL_FALSE;
5870 // break;
5871
5872 case OPCODE_SWZ:
5873 if ( GL_FALSE == assemble_MOV(pR700AsmCode) )
5874 {
5875 return GL_FALSE;
5876 }
5877 else
5878 {
5879 if( (i+1)<uiNumberInsts )
5880 {
5881 if(OPCODE_END != pILInst[i+1].Opcode)
5882 {
5883 if( GL_TRUE == IsTex(pILInst[i+1].Opcode) )
5884 {
5885 pR700AsmCode->pInstDeps[i+1].nDstDep = i+1; //=1?
5886 }
5887 }
5888 }
5889 }
5890 break;
5891 case OPCODE_DDX:
5892 case OPCODE_DDY:
5893 case OPCODE_TEX:
5894 case OPCODE_TXB:
5895 case OPCODE_TXP:
5896 if ( GL_FALSE == assemble_TEX(pR700AsmCode) )
5897 return GL_FALSE;
5898 break;
5899
5900 case OPCODE_TRUNC:
5901 if ( GL_FALSE == assemble_math_function(pR700AsmCode, SQ_OP2_INST_TRUNC) )
5902 return GL_FALSE;
5903 break;
5904
5905 case OPCODE_XPD:
5906 if ( GL_FALSE == assemble_XPD(pR700AsmCode) )
5907 return GL_FALSE;
5908 break;
5909
5910 case OPCODE_IF:
5911 {
5912 GLboolean bHasElse = GL_FALSE;
5913
5914 if(pILInst[pILInst[i].BranchTarget].Opcode == OPCODE_ELSE)
5915 {
5916 bHasElse = GL_TRUE;
5917 }
5918
5919 if ( GL_FALSE == assemble_IF(pR700AsmCode, bHasElse) )
5920 {
5921 return GL_FALSE;
5922 }
5923 }
5924 break;
5925
5926 case OPCODE_ELSE :
5927 if ( GL_FALSE == assemble_ELSE(pR700AsmCode) )
5928 return GL_FALSE;
5929 break;
5930
5931 case OPCODE_ENDIF:
5932 if ( GL_FALSE == assemble_ENDIF(pR700AsmCode) )
5933 return GL_FALSE;
5934 break;
5935
5936 case OPCODE_BGNLOOP:
5937 if( GL_FALSE == assemble_BGNLOOP(pR700AsmCode) )
5938 {
5939 return GL_FALSE;
5940 }
5941 break;
5942
5943 case OPCODE_BRK:
5944 if( GL_FALSE == assemble_BRK(pR700AsmCode) )
5945 {
5946 return GL_FALSE;
5947 }
5948 break;
5949
5950 case OPCODE_CONT:
5951 if( GL_FALSE == assemble_CONT(pR700AsmCode) )
5952 {
5953 return GL_FALSE;
5954 }
5955 break;
5956
5957 case OPCODE_ENDLOOP:
5958 if( GL_FALSE == assemble_ENDLOOP(pR700AsmCode) )
5959 {
5960 return GL_FALSE;
5961 }
5962 break;
5963
5964 case OPCODE_BGNSUB:
5965 if( GL_FALSE == assemble_BGNSUB(pR700AsmCode, i, uiIL_Shift) )
5966 {
5967 return GL_FALSE;
5968 }
5969 break;
5970
5971 case OPCODE_RET:
5972 if( GL_FALSE == assemble_RET(pR700AsmCode) )
5973 {
5974 return GL_FALSE;
5975 }
5976 break;
5977
5978 case OPCODE_CAL:
5979 if( GL_FALSE == assemble_CAL(pR700AsmCode,
5980 pILInst[i].BranchTarget,
5981 uiIL_Shift,
5982 uiNumberInsts,
5983 pILInst,
5984 NULL) )
5985 {
5986 return GL_FALSE;
5987 }
5988 break;
5989
5990 //case OPCODE_EXPORT:
5991 // if ( GL_FALSE == assemble_EXPORT() )
5992 // return GL_FALSE;
5993 // break;
5994
5995 case OPCODE_ENDSUB:
5996 return assemble_ENDSUB(pR700AsmCode);
5997
5998 case OPCODE_END:
5999 //pR700AsmCode->uiCurInst = i;
6000 //This is to remaind that if in later exoort there is depth/stencil
6001 //export, we need a mov to re-arrange DST channel, where using a
6002 //psuedo inst, we will use this end inst to do it.
6003 return GL_TRUE;
6004
6005 default:
6006 radeon_error("internal: unknown instruction\n");
6007 return GL_FALSE;
6008 }
6009 }
6010
6011 return GL_TRUE;
6012 }
6013
6014 GLboolean InitShaderProgram(r700_AssemblerBase * pAsm)
6015 {
6016 setRetInLoopFlag(pAsm, SQ_SEL_0);
6017 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
6018 return GL_TRUE;
6019 }
6020
6021 GLboolean RelocProgram(r700_AssemblerBase * pAsm, struct gl_program * pILProg)
6022 {
6023 GLuint i;
6024 GLuint unCFoffset;
6025 TypedShaderList * plstCFmain;
6026 TypedShaderList * plstCFsub;
6027
6028 R700ShaderInstruction * pInst;
6029 R700ControlFlowGenericClause * pCFInst;
6030
6031 R700ControlFlowALUClause * pCF_ALU;
6032 R700ALUInstruction * pALU;
6033 GLuint unConstOffset = 0;
6034 GLuint unRegOffset;
6035 GLuint unMinRegIndex;
6036
6037 plstCFmain = pAsm->CALLSTACK[0].plstCFInstructions_local;
6038
6039 /* remove flags init if they are not used */
6040 if((pAsm->unCFflags & HAS_LOOPRET) == 0)
6041 {
6042 R700ControlFlowALUClause * pCF_ALU;
6043 pInst = plstCFmain->pHead;
6044 while(pInst)
6045 {
6046 if(SIT_CF_ALU == pInst->m_ShaderInstType)
6047 {
6048 pCF_ALU = (R700ControlFlowALUClause *)pInst;
6049 if(0 == pCF_ALU->m_Word1.f.count)
6050 {
6051 pCF_ALU->m_Word1.f.cf_inst = SQ_CF_INST_NOP;
6052 }
6053 else
6054 {
6055 R700ALUInstruction * pALU = pCF_ALU->m_pLinkedALUInstruction;
6056
6057 pALU->m_pLinkedALUClause = NULL;
6058 pALU = (R700ALUInstruction *)(pALU->pNextInst);
6059 pALU->m_pLinkedALUClause = pCF_ALU;
6060 pCF_ALU->m_pLinkedALUInstruction = pALU;
6061
6062 pCF_ALU->m_Word1.f.count--;
6063 }
6064 break;
6065 }
6066 pInst = pInst->pNextInst;
6067 };
6068 }
6069
6070 if(pAsm->CALLSTACK[0].max > 0)
6071 {
6072 pAsm->pR700Shader->uStackSize = ((pAsm->CALLSTACK[0].max + 3)>>2) + 2;
6073 }
6074
6075 if(0 == pAsm->unSubArrayPointer)
6076 {
6077 return GL_TRUE;
6078 }
6079
6080 unCFoffset = plstCFmain->uNumOfNode;
6081
6082 if(NULL != pILProg->Parameters)
6083 {
6084 unConstOffset = pILProg->Parameters->NumParameters;
6085 }
6086
6087 /* Reloc subs */
6088 for(i=0; i<pAsm->unSubArrayPointer; i++)
6089 {
6090 pAsm->subs[i].unCFoffset = unCFoffset;
6091 plstCFsub = &(pAsm->subs[i].lstCFInstructions_local);
6092
6093 pInst = plstCFsub->pHead;
6094
6095 /* reloc instructions */
6096 while(pInst)
6097 {
6098 if(SIT_CF_GENERIC == pInst->m_ShaderInstType)
6099 {
6100 pCFInst = (R700ControlFlowGenericClause *)pInst;
6101
6102 switch (pCFInst->m_Word1.f.cf_inst)
6103 {
6104 case SQ_CF_INST_POP:
6105 case SQ_CF_INST_JUMP:
6106 case SQ_CF_INST_ELSE:
6107 case SQ_CF_INST_LOOP_END:
6108 case SQ_CF_INST_LOOP_START:
6109 case SQ_CF_INST_LOOP_START_NO_AL:
6110 case SQ_CF_INST_LOOP_CONTINUE:
6111 case SQ_CF_INST_LOOP_BREAK:
6112 pCFInst->m_Word0.f.addr += unCFoffset;
6113 break;
6114 default:
6115 break;
6116 }
6117 }
6118
6119 pInst->m_uIndex += unCFoffset;
6120
6121 pInst = pInst->pNextInst;
6122 };
6123
6124 if(NULL != pAsm->subs[i].pPresubDesc)
6125 {
6126 GLuint uNumSrc;
6127
6128 unMinRegIndex = pAsm->subs[i].pPresubDesc->pCompiledSub->MinRegIndex;
6129 unRegOffset = pAsm->subs[i].pPresubDesc->maxStartReg;
6130 unConstOffset += pAsm->subs[i].pPresubDesc->unConstantsStart;
6131
6132 pInst = plstCFsub->pHead;
6133 while(pInst)
6134 {
6135 if(SIT_CF_ALU == pInst->m_ShaderInstType)
6136 {
6137 pCF_ALU = (R700ControlFlowALUClause *)pInst;
6138
6139 pALU = pCF_ALU->m_pLinkedALUInstruction;
6140 for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
6141 {
6142 pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex;
6143
6144 if(pALU->m_Word0.f.src0_sel < SQ_ALU_SRC_GPR_SIZE)
6145 {
6146 pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex;
6147 }
6148 else if(pALU->m_Word0.f.src0_sel >= SQ_ALU_SRC_CFILE_BASE)
6149 {
6150 pALU->m_Word0.f.src0_sel += unConstOffset;
6151 }
6152
6153 if( ((pALU->m_Word1.val >> SQ_ALU_WORD1_OP3_ALU_INST_SHIFT) & 0x0000001F)
6154 >= SQ_OP3_INST_MUL_LIT )
6155 { /* op3 : 3 srcs */
6156 if(pALU->m_Word1_OP3.f.src2_sel < SQ_ALU_SRC_GPR_SIZE)
6157 {
6158 pALU->m_Word1_OP3.f.src2_sel = pALU->m_Word1_OP3.f.src2_sel + unRegOffset - unMinRegIndex;
6159 }
6160 else if(pALU->m_Word1_OP3.f.src2_sel >= SQ_ALU_SRC_CFILE_BASE)
6161 {
6162 pALU->m_Word1_OP3.f.src2_sel += unConstOffset;
6163 }
6164 if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE)
6165 {
6166 pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex;
6167 }
6168 else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE)
6169 {
6170 pALU->m_Word0.f.src1_sel += unConstOffset;
6171 }
6172 }
6173 else
6174 {
6175 if(pAsm->bR6xx)
6176 {
6177 uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f6.alu_inst, 0);
6178 }
6179 else
6180 {
6181 uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f.alu_inst, 0);
6182 }
6183 if(2 == uNumSrc)
6184 { /* 2 srcs */
6185 if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE)
6186 {
6187 pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex;
6188 }
6189 else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE)
6190 {
6191 pALU->m_Word0.f.src1_sel += unConstOffset;
6192 }
6193 }
6194 }
6195 pALU = (R700ALUInstruction*)(pALU->pNextInst);
6196 }
6197 }
6198 pInst = pInst->pNextInst;
6199 };
6200 }
6201
6202 /* Put sub into main */
6203 plstCFmain->pTail->pNextInst = plstCFsub->pHead;
6204 plstCFmain->pTail = plstCFsub->pTail;
6205 plstCFmain->uNumOfNode += plstCFsub->uNumOfNode;
6206
6207 unCFoffset += plstCFsub->uNumOfNode;
6208 }
6209
6210 /* reloc callers */
6211 for(i=0; i<pAsm->unCallerArrayPointer; i++)
6212 {
6213 pAsm->callers[i].cf_ptr->m_Word0.f.addr
6214 = pAsm->subs[pAsm->callers[i].subDescIndex].unCFoffset;
6215
6216 if(NULL != pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc)
6217 {
6218 unMinRegIndex = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->pCompiledSub->MinRegIndex;
6219 unRegOffset = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->maxStartReg;
6220
6221 if(NULL != pAsm->callers[i].prelude_cf_ptr)
6222 {
6223 pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].prelude_cf_ptr);
6224 pALU = pCF_ALU->m_pLinkedALUInstruction;
6225 for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
6226 {
6227 pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex;
6228 pALU = (R700ALUInstruction*)(pALU->pNextInst);
6229 }
6230 }
6231 if(NULL != pAsm->callers[i].finale_cf_ptr)
6232 {
6233 pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].finale_cf_ptr);
6234 pALU = pCF_ALU->m_pLinkedALUInstruction;
6235 for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
6236 {
6237 pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex;
6238 pALU = (R700ALUInstruction*)(pALU->pNextInst);
6239 }
6240 }
6241 }
6242 }
6243
6244 return GL_TRUE;
6245 }
6246
6247 GLboolean callPreSub(r700_AssemblerBase* pAsm,
6248 LOADABLE_SCRIPT_SIGNITURE scriptSigniture,
6249 COMPILED_SUB * pCompiledSub,
6250 GLshort uOutReg,
6251 GLshort uNumValidSrc)
6252 {
6253 /* save assemble context */
6254 GLuint starting_temp_register_number_save;
6255 GLuint number_used_registers_save;
6256 GLuint uFirstHelpReg_save;
6257 GLuint uHelpReg_save;
6258 GLuint uiCurInst_save;
6259 struct prog_instruction *pILInst_save;
6260 PRESUB_DESC * pPresubDesc;
6261 GLboolean bRet;
6262 int i;
6263
6264 R700ControlFlowGenericClause* prelude_cf_ptr = NULL;
6265
6266 /* copy srcs to presub inputs */
6267 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
6268 for(i=0; i<uNumValidSrc; i++)
6269 {
6270 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
6271 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
6272 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
6273 pAsm->D.dst.reg = pCompiledSub->srcRegIndex[i];
6274 pAsm->D.dst.writex = 1;
6275 pAsm->D.dst.writey = 1;
6276 pAsm->D.dst.writez = 1;
6277 pAsm->D.dst.writew = 1;
6278
6279 if( GL_FALSE == assemble_src(pAsm, i, 0) )
6280 {
6281 return GL_FALSE;
6282 }
6283
6284 next_ins(pAsm);
6285 }
6286 if(uNumValidSrc > 0)
6287 {
6288 prelude_cf_ptr = (R700ControlFlowGenericClause*) pAsm->cf_current_alu_clause_ptr;
6289 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
6290 }
6291
6292 /* browse thro existing presubs. */
6293 for(i=0; i<pAsm->unNumPresub; i++)
6294 {
6295 if(pAsm->presubs[i].sptSigniture == scriptSigniture)
6296 {
6297 break;
6298 }
6299 }
6300
6301 if(i == pAsm->unNumPresub)
6302 { /* not loaded yet */
6303 /* save assemble context */
6304 number_used_registers_save = pAsm->number_used_registers;
6305 uFirstHelpReg_save = pAsm->uFirstHelpReg;
6306 uHelpReg_save = pAsm->uHelpReg;
6307 starting_temp_register_number_save = pAsm->starting_temp_register_number;
6308 pILInst_save = pAsm->pILInst;
6309 uiCurInst_save = pAsm->uiCurInst;
6310
6311 /* alloc in presub */
6312 if( (pAsm->unNumPresub + 1) > pAsm->unPresubArraySize )
6313 {
6314 pAsm->presubs = (PRESUB_DESC*)_mesa_realloc( (void *)pAsm->presubs,
6315 sizeof(PRESUB_DESC) * pAsm->unPresubArraySize,
6316 sizeof(PRESUB_DESC) * (pAsm->unPresubArraySize + 4) );
6317 if(NULL == pAsm->presubs)
6318 {
6319 radeon_error("No memeory to allocate built in shader function description structures. \n");
6320 return GL_FALSE;
6321 }
6322 pAsm->unPresubArraySize += 4;
6323 }
6324
6325 pPresubDesc = &(pAsm->presubs[i]);
6326 pPresubDesc->sptSigniture = scriptSigniture;
6327
6328 /* constants offsets need to be final resolved at reloc. */
6329 if(0 == pAsm->unNumPresub)
6330 {
6331 pPresubDesc->unConstantsStart = 0;
6332 }
6333 else
6334 {
6335 pPresubDesc->unConstantsStart = pAsm->presubs[i-1].unConstantsStart
6336 + pAsm->presubs[i-1].pCompiledSub->NumParameters;
6337 }
6338
6339 pPresubDesc->pCompiledSub = pCompiledSub;
6340
6341 pPresubDesc->subIL_Shift = pAsm->unCurNumILInsts;
6342 pPresubDesc->maxStartReg = uFirstHelpReg_save;
6343 pAsm->unCurNumILInsts += pCompiledSub->NumInstructions;
6344
6345 pAsm->unNumPresub++;
6346
6347 /* setup new assemble context */
6348 pAsm->starting_temp_register_number = 0;
6349 pAsm->number_used_registers = pCompiledSub->NumTemporaries;
6350 pAsm->uFirstHelpReg = pAsm->number_used_registers;
6351 pAsm->uHelpReg = pAsm->uFirstHelpReg;
6352
6353 bRet = assemble_CAL(pAsm,
6354 0,
6355 pPresubDesc->subIL_Shift,
6356 pCompiledSub->NumInstructions,
6357 pCompiledSub->Instructions,
6358 pPresubDesc);
6359
6360
6361 pPresubDesc->number_used_registers = pAsm->number_used_registers;
6362
6363 /* restore assemble context */
6364 pAsm->number_used_registers = number_used_registers_save;
6365 pAsm->uFirstHelpReg = uFirstHelpReg_save;
6366 pAsm->uHelpReg = uHelpReg_save;
6367 pAsm->starting_temp_register_number = starting_temp_register_number_save;
6368 pAsm->pILInst = pILInst_save;
6369 pAsm->uiCurInst = uiCurInst_save;
6370 }
6371 else
6372 { /* was loaded */
6373 pPresubDesc = &(pAsm->presubs[i]);
6374
6375 bRet = assemble_CAL(pAsm,
6376 0,
6377 pPresubDesc->subIL_Shift,
6378 pCompiledSub->NumInstructions,
6379 pCompiledSub->Instructions,
6380 pPresubDesc);
6381 }
6382
6383 if(GL_FALSE == bRet)
6384 {
6385 radeon_error("Shader presub assemble failed. \n");
6386 }
6387 else
6388 {
6389 /* copy presub output to real dst */
6390 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
6391 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
6392
6393 if( GL_FALSE == assemble_dst(pAsm) )
6394 {
6395 return GL_FALSE;
6396 }
6397
6398 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
6399 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
6400 pAsm->S[0].src.reg = pCompiledSub->dstRegIndex;
6401 pAsm->S[0].src.swizzlex = pCompiledSub->outputSwizzleX;
6402 pAsm->S[0].src.swizzley = pCompiledSub->outputSwizzleY;
6403 pAsm->S[0].src.swizzlez = pCompiledSub->outputSwizzleZ;
6404 pAsm->S[0].src.swizzlew = pCompiledSub->outputSwizzleW;
6405
6406 next_ins(pAsm);
6407
6408 pAsm->callers[pAsm->unCallerArrayPointer - 1].finale_cf_ptr = (R700ControlFlowGenericClause*) pAsm->cf_current_alu_clause_ptr;
6409 pAsm->callers[pAsm->unCallerArrayPointer - 1].prelude_cf_ptr = prelude_cf_ptr;
6410 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
6411 }
6412
6413 if( (pPresubDesc->number_used_registers + pAsm->uFirstHelpReg) > pAsm->number_used_registers )
6414 {
6415 pAsm->number_used_registers = pPresubDesc->number_used_registers + pAsm->uFirstHelpReg;
6416 }
6417 if(pAsm->uFirstHelpReg > pPresubDesc->maxStartReg)
6418 {
6419 pPresubDesc->maxStartReg = pAsm->uFirstHelpReg;
6420 }
6421
6422 return bRet;
6423 }
6424
6425 GLboolean Process_Export(r700_AssemblerBase* pAsm,
6426 GLuint type,
6427 GLuint export_starting_index,
6428 GLuint export_count,
6429 GLuint starting_register_number,
6430 GLboolean is_depth_export)
6431 {
6432 unsigned char ucWriteMask;
6433
6434 check_current_clause(pAsm, CF_EMPTY_CLAUSE);
6435 check_current_clause(pAsm, CF_EXPORT_CLAUSE); //alloc the cf_current_export_clause_ptr
6436
6437 pAsm->cf_current_export_clause_ptr->m_Word0.f.type = type;
6438
6439 switch (type)
6440 {
6441 case SQ_EXPORT_PIXEL:
6442 if(GL_TRUE == is_depth_export)
6443 {
6444 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_PIXEL_Z;
6445 }
6446 else
6447 {
6448 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_PIXEL_MRT0 + export_starting_index;
6449 }
6450 break;
6451
6452 case SQ_EXPORT_POS:
6453 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_POS_0 + export_starting_index;
6454 break;
6455
6456 case SQ_EXPORT_PARAM:
6457 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = 0x0 + export_starting_index;
6458 break;
6459
6460 default:
6461 radeon_error("Unknown export type: %d\n", type);
6462 return GL_FALSE;
6463 break;
6464 }
6465
6466 pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_gpr = starting_register_number;
6467
6468 pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_rel = SQ_ABSOLUTE;
6469 pAsm->cf_current_export_clause_ptr->m_Word0.f.index_gpr = 0x0;
6470 pAsm->cf_current_export_clause_ptr->m_Word0.f.elem_size = 0x3;
6471
6472 pAsm->cf_current_export_clause_ptr->m_Word1.f.burst_count = (export_count - 1);
6473 pAsm->cf_current_export_clause_ptr->m_Word1.f.end_of_program = 0x0;
6474 pAsm->cf_current_export_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
6475 pAsm->cf_current_export_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT; // _DONE
6476 pAsm->cf_current_export_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
6477 pAsm->cf_current_export_clause_ptr->m_Word1.f.barrier = 0x1;
6478
6479 if (export_count == 1)
6480 {
6481 assert(starting_register_number >= pAsm->starting_export_register_number);
6482
6483 ucWriteMask = pAsm->pucOutMask[starting_register_number - pAsm->starting_export_register_number];
6484 /* exports Z as a float into Red channel */
6485 if (GL_TRUE == is_depth_export)
6486 ucWriteMask = 0x1;
6487
6488 if( (ucWriteMask & 0x1) != 0)
6489 {
6490 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
6491 }
6492 else
6493 {
6494 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_MASK;
6495 }
6496 if( ((ucWriteMask>>1) & 0x1) != 0)
6497 {
6498 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
6499 }
6500 else
6501 {
6502 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_MASK;
6503 }
6504 if( ((ucWriteMask>>2) & 0x1) != 0)
6505 {
6506 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
6507 }
6508 else
6509 {
6510 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_MASK;
6511 }
6512 if( ((ucWriteMask>>3) & 0x1) != 0)
6513 {
6514 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
6515 }
6516 else
6517 {
6518 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_MASK;
6519 }
6520 }
6521 else
6522 {
6523 // This should only be used if all components for all registers have been written
6524 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
6525 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
6526 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
6527 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
6528 }
6529
6530 pAsm->cf_last_export_ptr = pAsm->cf_current_export_clause_ptr;
6531
6532 return GL_TRUE;
6533 }
6534
6535 GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm, BITS depth_channel_select)
6536 {
6537 gl_inst_opcode Opcode_save = pAsm->pILInst[pAsm->uiCurInst].Opcode; //Should be OPCODE_END
6538 pAsm->pILInst[pAsm->uiCurInst].Opcode = OPCODE_MOV;
6539
6540 // MOV depth_export_register.hw_depth_channel, depth_export_register.depth_channel_select
6541
6542 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
6543
6544 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
6545 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
6546 pAsm->D.dst.reg = pAsm->depth_export_register_number;
6547
6548 pAsm->D.dst.writex = 1; // depth goes in R channel for HW
6549
6550 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
6551 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
6552 pAsm->S[0].src.reg = pAsm->depth_export_register_number;
6553
6554 setswizzle_PVSSRC(&(pAsm->S[0].src), depth_channel_select);
6555
6556 noneg_PVSSRC(&(pAsm->S[0].src));
6557
6558 if( GL_FALSE == next_ins(pAsm) )
6559 {
6560 return GL_FALSE;
6561 }
6562
6563 pAsm->pILInst[pAsm->uiCurInst].Opcode = Opcode_save;
6564
6565 return GL_TRUE;
6566 }
6567
6568 GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode,
6569 GLbitfield OutputsWritten)
6570 {
6571 unsigned int unBit;
6572 GLuint export_count = 0;
6573 unsigned int i;
6574
6575 if(pR700AsmCode->depth_export_register_number >= 0)
6576 {
6577 if( GL_FALSE == Move_Depth_Exports_To_Correct_Channels(pR700AsmCode, SQ_SEL_Z) ) // depth
6578 {
6579 return GL_FALSE;
6580 }
6581 }
6582
6583 for (i = 0; i < FRAG_RESULT_MAX; ++i)
6584 {
6585 unBit = 1 << i;
6586
6587 if (OutputsWritten & unBit)
6588 {
6589 GLboolean is_depth = i == FRAG_RESULT_DEPTH ? GL_TRUE : GL_FALSE;
6590 if (!Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, pR700AsmCode->uiFP_OutputMap[i], is_depth))
6591 return GL_FALSE;
6592 ++export_count;
6593 }
6594 }
6595
6596 /* Need to export something, otherwise we'll hang
6597 * results are undefined anyway */
6598 if(export_count == 0)
6599 {
6600 Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, pR700AsmCode->starting_export_register_number, GL_FALSE);
6601 }
6602
6603 if(pR700AsmCode->cf_last_export_ptr != NULL)
6604 {
6605 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
6606 pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
6607 }
6608
6609 return GL_TRUE;
6610 }
6611
6612 GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode,
6613 GLbitfield OutputsWritten)
6614 {
6615 unsigned int unBit;
6616 unsigned int i;
6617
6618 GLuint export_starting_index = 0;
6619 GLuint export_count = pR700AsmCode->number_of_exports;
6620
6621 unBit = 1 << VERT_RESULT_HPOS;
6622 if(OutputsWritten & unBit)
6623 {
6624 if( GL_FALSE == Process_Export(pR700AsmCode,
6625 SQ_EXPORT_POS,
6626 export_starting_index,
6627 1,
6628 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_HPOS],
6629 GL_FALSE) )
6630 {
6631 return GL_FALSE;
6632 }
6633 export_starting_index++;
6634 export_count--;
6635 }
6636
6637 unBit = 1 << VERT_RESULT_PSIZ;
6638 if(OutputsWritten & unBit)
6639 {
6640 if( GL_FALSE == Process_Export(pR700AsmCode,
6641 SQ_EXPORT_POS,
6642 export_starting_index,
6643 1,
6644 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_PSIZ],
6645 GL_FALSE) )
6646 {
6647 return GL_FALSE;
6648 }
6649 export_count--;
6650 }
6651
6652 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
6653
6654
6655 pR700AsmCode->number_of_exports = export_count;
6656 export_starting_index = 0;
6657
6658 unBit = 1 << VERT_RESULT_COL0;
6659 if(OutputsWritten & unBit)
6660 {
6661 if( GL_FALSE == Process_Export(pR700AsmCode,
6662 SQ_EXPORT_PARAM,
6663 export_starting_index,
6664 1,
6665 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL0],
6666 GL_FALSE) )
6667 {
6668 return GL_FALSE;
6669 }
6670
6671 export_starting_index++;
6672 }
6673
6674 unBit = 1 << VERT_RESULT_COL1;
6675 if(OutputsWritten & unBit)
6676 {
6677 if( GL_FALSE == Process_Export(pR700AsmCode,
6678 SQ_EXPORT_PARAM,
6679 export_starting_index,
6680 1,
6681 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL1],
6682 GL_FALSE) )
6683 {
6684 return GL_FALSE;
6685 }
6686
6687 export_starting_index++;
6688 }
6689
6690 unBit = 1 << VERT_RESULT_FOGC;
6691 if(OutputsWritten & unBit)
6692 {
6693 if( GL_FALSE == Process_Export(pR700AsmCode,
6694 SQ_EXPORT_PARAM,
6695 export_starting_index,
6696 1,
6697 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_FOGC],
6698 GL_FALSE) )
6699 {
6700 return GL_FALSE;
6701 }
6702
6703 export_starting_index++;
6704 }
6705
6706 for(i=0; i<8; i++)
6707 {
6708 unBit = 1 << (VERT_RESULT_TEX0 + i);
6709 if(OutputsWritten & unBit)
6710 {
6711 if( GL_FALSE == Process_Export(pR700AsmCode,
6712 SQ_EXPORT_PARAM,
6713 export_starting_index,
6714 1,
6715 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_TEX0 + i],
6716 GL_FALSE) )
6717 {
6718 return GL_FALSE;
6719 }
6720
6721 export_starting_index++;
6722 }
6723 }
6724
6725 for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
6726 {
6727 unBit = 1 << i;
6728 if(OutputsWritten & unBit)
6729 {
6730 if( GL_FALSE == Process_Export(pR700AsmCode,
6731 SQ_EXPORT_PARAM,
6732 export_starting_index,
6733 1,
6734 pR700AsmCode->ucVP_OutputMap[i],
6735 GL_FALSE) )
6736 {
6737 return GL_FALSE;
6738 }
6739
6740 export_starting_index++;
6741 }
6742 }
6743
6744 // At least one param should be exported
6745 if (export_count)
6746 {
6747 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
6748 }
6749 else
6750 {
6751 if( GL_FALSE == Process_Export(pR700AsmCode,
6752 SQ_EXPORT_PARAM,
6753 0,
6754 1,
6755 pR700AsmCode->starting_export_register_number,
6756 GL_FALSE) )
6757 {
6758 return GL_FALSE;
6759 }
6760
6761 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_0;
6762 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_0;
6763 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_0;
6764 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_1;
6765 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
6766 }
6767
6768 pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
6769
6770 return GL_TRUE;
6771 }
6772
6773 GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode)
6774 {
6775 FREE(pR700AsmCode->pucOutMask);
6776 FREE(pR700AsmCode->pInstDeps);
6777
6778 if(NULL != pR700AsmCode->subs)
6779 {
6780 FREE(pR700AsmCode->subs);
6781 }
6782 if(NULL != pR700AsmCode->callers)
6783 {
6784 FREE(pR700AsmCode->callers);
6785 }
6786
6787 if(NULL != pR700AsmCode->presubs)
6788 {
6789 FREE(pR700AsmCode->presubs);
6790 }
6791
6792 return GL_TRUE;
6793 }
6794